program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.8.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})] { func main(tensor causal_mask, tensor input_ids, state> kv_cache_0, tensor position_ids, tensor update_mask) { tensor sin_full = const()[name = string("sin_full"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; tensor cos_full = const()[name = string("cos_full"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2097280)))]; tensor sin_sliding = const()[name = string("sin_sliding"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4194496)))]; tensor cos_sliding = const()[name = string("cos_sliding"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6291712)))]; tensor layers_0_self_attn_q_proj_weight = const()[name = string("layers_0_self_attn_q_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8388928)))]; tensor layers_0_self_attn_k_proj_weight = const()[name = string("layers_0_self_attn_k_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9699712)))]; tensor layers_0_self_attn_v_proj_weight = const()[name = string("layers_0_self_attn_v_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10027456)))]; tensor layers_0_mlp_gate_proj_weight = const()[name = string("layers_0_mlp_gate_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10355200)))]; tensor layers_0_mlp_up_proj_weight = const()[name = string("layers_0_mlp_up_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12976704)))]; tensor layers_0_mlp_down_proj_weight = const()[name = string("layers_0_mlp_down_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15598208)))]; tensor layers_1_self_attn_q_proj_weight = const()[name = string("layers_1_self_attn_q_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18219712)))]; tensor layers_1_self_attn_k_proj_weight = const()[name = string("layers_1_self_attn_k_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19530496)))]; tensor layers_1_self_attn_v_proj_weight = const()[name = string("layers_1_self_attn_v_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19858240)))]; tensor layers_1_mlp_gate_proj_weight = const()[name = string("layers_1_mlp_gate_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20185984)))]; tensor layers_1_mlp_up_proj_weight = const()[name = string("layers_1_mlp_up_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22807488)))]; tensor layers_1_mlp_down_proj_weight = const()[name = string("layers_1_mlp_down_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25428992)))]; tensor layers_2_self_attn_q_proj_weight = const()[name = string("layers_2_self_attn_q_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28050496)))]; tensor layers_2_self_attn_k_proj_weight = const()[name = string("layers_2_self_attn_k_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29361280)))]; tensor layers_2_self_attn_v_proj_weight = const()[name = string("layers_2_self_attn_v_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29689024)))]; tensor layers_2_mlp_gate_proj_weight = const()[name = string("layers_2_mlp_gate_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30016768)))]; tensor layers_2_mlp_up_proj_weight = const()[name = string("layers_2_mlp_up_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32638272)))]; tensor layers_2_mlp_down_proj_weight = const()[name = string("layers_2_mlp_down_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35259776)))]; tensor layers_3_self_attn_q_proj_weight = const()[name = string("layers_3_self_attn_q_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37881280)))]; tensor layers_3_self_attn_k_proj_weight = const()[name = string("layers_3_self_attn_k_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39192064)))]; tensor layers_3_self_attn_v_proj_weight = const()[name = string("layers_3_self_attn_v_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39519808)))]; tensor layers_3_mlp_gate_proj_weight = const()[name = string("layers_3_mlp_gate_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39847552)))]; tensor layers_3_mlp_up_proj_weight = const()[name = string("layers_3_mlp_up_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42469056)))]; tensor layers_3_mlp_down_proj_weight = const()[name = string("layers_3_mlp_down_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45090560)))]; tensor layers_4_self_attn_q_proj_weight = const()[name = string("layers_4_self_attn_q_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47712064)))]; tensor layers_4_self_attn_k_proj_weight = const()[name = string("layers_4_self_attn_k_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49022848)))]; tensor layers_4_self_attn_v_proj_weight = const()[name = string("layers_4_self_attn_v_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49350592)))]; tensor layers_4_mlp_gate_proj_weight = const()[name = string("layers_4_mlp_gate_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49678336)))]; tensor layers_4_mlp_up_proj_weight = const()[name = string("layers_4_mlp_up_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52299840)))]; tensor layers_4_mlp_down_proj_weight = const()[name = string("layers_4_mlp_down_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54921344)))]; tensor layers_5_self_attn_q_proj_weight = const()[name = string("layers_5_self_attn_q_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57542848)))]; tensor layers_5_self_attn_k_proj_weight = const()[name = string("layers_5_self_attn_k_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58853632)))]; tensor layers_5_self_attn_v_proj_weight = const()[name = string("layers_5_self_attn_v_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59181376)))]; tensor layers_5_mlp_gate_proj_weight = const()[name = string("layers_5_mlp_gate_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59509120)))]; tensor layers_5_mlp_up_proj_weight = const()[name = string("layers_5_mlp_up_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62130624)))]; tensor layers_5_mlp_down_proj_weight = const()[name = string("layers_5_mlp_down_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64752128)))]; tensor layers_6_self_attn_q_proj_weight = const()[name = string("layers_6_self_attn_q_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67373632)))]; tensor layers_6_self_attn_k_proj_weight = const()[name = string("layers_6_self_attn_k_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68684416)))]; tensor layers_6_self_attn_v_proj_weight = const()[name = string("layers_6_self_attn_v_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69012160)))]; tensor layers_6_mlp_gate_proj_weight = const()[name = string("layers_6_mlp_gate_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69339904)))]; tensor layers_6_mlp_up_proj_weight = const()[name = string("layers_6_mlp_up_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71961408)))]; tensor layers_6_mlp_down_proj_weight = const()[name = string("layers_6_mlp_down_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74582912)))]; tensor layers_7_self_attn_q_proj_weight = const()[name = string("layers_7_self_attn_q_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77204416)))]; tensor layers_7_self_attn_k_proj_weight = const()[name = string("layers_7_self_attn_k_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78515200)))]; tensor layers_7_self_attn_v_proj_weight = const()[name = string("layers_7_self_attn_v_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78842944)))]; tensor layers_7_mlp_gate_proj_weight = const()[name = string("layers_7_mlp_gate_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79170688)))]; tensor layers_7_mlp_up_proj_weight = const()[name = string("layers_7_mlp_up_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81792192)))]; tensor layers_7_mlp_down_proj_weight = const()[name = string("layers_7_mlp_down_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84413696)))]; tensor layers_8_self_attn_q_proj_weight = const()[name = string("layers_8_self_attn_q_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87035200)))]; tensor layers_8_self_attn_k_proj_weight = const()[name = string("layers_8_self_attn_k_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88345984)))]; tensor layers_8_self_attn_v_proj_weight = const()[name = string("layers_8_self_attn_v_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88673728)))]; tensor layers_8_mlp_gate_proj_weight = const()[name = string("layers_8_mlp_gate_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89001472)))]; tensor layers_8_mlp_up_proj_weight = const()[name = string("layers_8_mlp_up_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91622976)))]; tensor layers_8_mlp_down_proj_weight = const()[name = string("layers_8_mlp_down_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94244480)))]; tensor layers_9_self_attn_q_proj_weight = const()[name = string("layers_9_self_attn_q_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96865984)))]; tensor layers_9_self_attn_k_proj_weight = const()[name = string("layers_9_self_attn_k_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98176768)))]; tensor layers_9_self_attn_v_proj_weight = const()[name = string("layers_9_self_attn_v_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98504512)))]; tensor layers_9_mlp_gate_proj_weight = const()[name = string("layers_9_mlp_gate_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98832256)))]; tensor layers_9_mlp_up_proj_weight = const()[name = string("layers_9_mlp_up_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101453760)))]; tensor layers_9_mlp_down_proj_weight = const()[name = string("layers_9_mlp_down_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104075264)))]; tensor layers_10_self_attn_q_proj_weight = const()[name = string("layers_10_self_attn_q_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106696768)))]; tensor layers_10_self_attn_k_proj_weight = const()[name = string("layers_10_self_attn_k_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108007552)))]; tensor layers_10_self_attn_v_proj_weight = const()[name = string("layers_10_self_attn_v_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108335296)))]; tensor layers_10_mlp_gate_proj_weight = const()[name = string("layers_10_mlp_gate_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108663040)))]; tensor layers_10_mlp_up_proj_weight = const()[name = string("layers_10_mlp_up_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111284544)))]; tensor layers_10_mlp_down_proj_weight = const()[name = string("layers_10_mlp_down_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113906048)))]; tensor layers_11_self_attn_q_proj_weight = const()[name = string("layers_11_self_attn_q_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116527552)))]; tensor layers_11_self_attn_k_proj_weight = const()[name = string("layers_11_self_attn_k_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117838336)))]; tensor layers_11_self_attn_v_proj_weight = const()[name = string("layers_11_self_attn_v_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118166080)))]; tensor layers_11_mlp_gate_proj_weight = const()[name = string("layers_11_mlp_gate_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118493824)))]; tensor layers_11_mlp_up_proj_weight = const()[name = string("layers_11_mlp_up_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121115328)))]; tensor layers_11_mlp_down_proj_weight = const()[name = string("layers_11_mlp_down_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123736832)))]; tensor layers_12_self_attn_q_proj_weight = const()[name = string("layers_12_self_attn_q_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126358336)))]; tensor layers_12_self_attn_k_proj_weight = const()[name = string("layers_12_self_attn_k_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127669120)))]; tensor layers_12_self_attn_v_proj_weight = const()[name = string("layers_12_self_attn_v_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127996864)))]; tensor layers_12_mlp_gate_proj_weight = const()[name = string("layers_12_mlp_gate_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128324608)))]; tensor layers_12_mlp_up_proj_weight = const()[name = string("layers_12_mlp_up_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130946112)))]; tensor layers_12_mlp_down_proj_weight = const()[name = string("layers_12_mlp_down_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133567616)))]; tensor layers_13_self_attn_q_proj_weight = const()[name = string("layers_13_self_attn_q_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136189120)))]; tensor layers_13_self_attn_k_proj_weight = const()[name = string("layers_13_self_attn_k_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137499904)))]; tensor layers_13_self_attn_v_proj_weight = const()[name = string("layers_13_self_attn_v_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137827648)))]; tensor layers_13_mlp_gate_proj_weight = const()[name = string("layers_13_mlp_gate_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138155392)))]; tensor layers_13_mlp_up_proj_weight = const()[name = string("layers_13_mlp_up_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140776896)))]; tensor layers_13_mlp_down_proj_weight = const()[name = string("layers_13_mlp_down_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143398400)))]; tensor layers_14_self_attn_q_proj_weight = const()[name = string("layers_14_self_attn_q_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(146019904)))]; tensor layers_14_self_attn_k_proj_weight = const()[name = string("layers_14_self_attn_k_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147330688)))]; tensor layers_14_self_attn_v_proj_weight = const()[name = string("layers_14_self_attn_v_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147658432)))]; tensor layers_14_mlp_gate_proj_weight = const()[name = string("layers_14_mlp_gate_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147986176)))]; tensor layers_14_mlp_up_proj_weight = const()[name = string("layers_14_mlp_up_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150607680)))]; tensor layers_14_mlp_down_proj_weight = const()[name = string("layers_14_mlp_down_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153229184)))]; tensor layers_15_self_attn_q_proj_weight = const()[name = string("layers_15_self_attn_q_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155850688)))]; tensor layers_15_self_attn_k_proj_weight = const()[name = string("layers_15_self_attn_k_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157161472)))]; tensor layers_15_self_attn_v_proj_weight = const()[name = string("layers_15_self_attn_v_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157489216)))]; tensor layers_15_mlp_gate_proj_weight = const()[name = string("layers_15_mlp_gate_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157816960)))]; tensor layers_15_mlp_up_proj_weight = const()[name = string("layers_15_mlp_up_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160438464)))]; tensor layers_15_mlp_down_proj_weight = const()[name = string("layers_15_mlp_down_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163059968)))]; tensor layers_16_self_attn_q_proj_weight = const()[name = string("layers_16_self_attn_q_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165681472)))]; tensor layers_16_self_attn_k_proj_weight = const()[name = string("layers_16_self_attn_k_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166992256)))]; tensor layers_16_self_attn_v_proj_weight = const()[name = string("layers_16_self_attn_v_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167320000)))]; tensor layers_16_mlp_gate_proj_weight = const()[name = string("layers_16_mlp_gate_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167647744)))]; tensor layers_16_mlp_up_proj_weight = const()[name = string("layers_16_mlp_up_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170269248)))]; tensor layers_16_mlp_down_proj_weight = const()[name = string("layers_16_mlp_down_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172890752)))]; tensor layers_17_self_attn_q_proj_weight = const()[name = string("layers_17_self_attn_q_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175512256)))]; tensor layers_17_self_attn_k_proj_weight = const()[name = string("layers_17_self_attn_k_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176823040)))]; tensor layers_17_self_attn_v_proj_weight = const()[name = string("layers_17_self_attn_v_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177150784)))]; tensor layers_17_mlp_gate_proj_weight = const()[name = string("layers_17_mlp_gate_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177478528)))]; tensor layers_17_mlp_up_proj_weight = const()[name = string("layers_17_mlp_up_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180100032)))]; tensor layers_17_mlp_down_proj_weight = const()[name = string("layers_17_mlp_down_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182721536)))]; int32 var_880_batch_dims_0 = const()[name = string("op_880_batch_dims_0"), val = int32(0)]; bool var_880_validate_indices_0 = const()[name = string("op_880_validate_indices_0"), val = bool(false)]; tensor embed_tokens_weight_to_fp16 = const()[name = string("embed_tokens_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185343040)))]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor greater_equal_0 = greater_equal(x = input_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(262144)]; tensor add_0 = add(x = input_ids, y = slice_by_index_0)[name = string("add_0")]; tensor select_0 = select(a = input_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")]; int32 var_880_cast_fp16_axis_0 = const()[name = string("op_880_cast_fp16_axis_0"), val = int32(0)]; tensor var_880_cast_fp16 = gather(axis = var_880_cast_fp16_axis_0, batch_dims = var_880_batch_dims_0, indices = select_0, validate_indices = var_880_validate_indices_0, x = embed_tokens_weight_to_fp16)[name = string("op_880_cast_fp16")]; fp16 var_886_to_fp16 = const()[name = string("op_886_to_fp16"), val = fp16(0x1.94cp+4)]; tensor x_1_cast_fp16 = mul(x = var_880_cast_fp16, y = var_886_to_fp16)[name = string("x_1_cast_fp16")]; int32 var_888 = const()[name = string("op_888"), val = int32(0)]; int32 var_889_batch_dims_0 = const()[name = string("op_889_batch_dims_0"), val = int32(0)]; bool var_889_validate_indices_0 = const()[name = string("op_889_validate_indices_0"), val = bool(false)]; string position_ids_to_uint16_dtype_0 = const()[name = string("position_ids_to_uint16_dtype_0"), val = string("uint16")]; tensor position_ids_to_uint16 = cast(dtype = position_ids_to_uint16_dtype_0, x = position_ids)[name = string("cast_220")]; tensor var_889_cast_uint16 = gather(axis = var_888, batch_dims = var_889_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_889_validate_indices_0, x = cos_sliding)[name = string("op_889_cast_uint16")]; tensor var_891_axes_0 = const()[name = string("op_891_axes_0"), val = tensor([0])]; tensor var_891 = expand_dims(axes = var_891_axes_0, x = var_889_cast_uint16)[name = string("op_891")]; tensor cos_1_axes_0 = const()[name = string("cos_1_axes_0"), val = tensor([0])]; tensor cos_1 = expand_dims(axes = cos_1_axes_0, x = var_891)[name = string("cos_1")]; int32 var_894 = const()[name = string("op_894"), val = int32(0)]; int32 var_895_batch_dims_0 = const()[name = string("op_895_batch_dims_0"), val = int32(0)]; bool var_895_validate_indices_0 = const()[name = string("op_895_validate_indices_0"), val = bool(false)]; tensor var_895_cast_uint16 = gather(axis = var_894, batch_dims = var_895_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_895_validate_indices_0, x = sin_sliding)[name = string("op_895_cast_uint16")]; tensor var_897_axes_0 = const()[name = string("op_897_axes_0"), val = tensor([0])]; tensor var_897 = expand_dims(axes = var_897_axes_0, x = var_895_cast_uint16)[name = string("op_897")]; tensor sin_1_axes_0 = const()[name = string("sin_1_axes_0"), val = tensor([0])]; tensor sin_1 = expand_dims(axes = sin_1_axes_0, x = var_897)[name = string("sin_1")]; int32 var_900 = const()[name = string("op_900"), val = int32(0)]; int32 var_901_batch_dims_0 = const()[name = string("op_901_batch_dims_0"), val = int32(0)]; bool var_901_validate_indices_0 = const()[name = string("op_901_validate_indices_0"), val = bool(false)]; tensor var_901_cast_uint16 = gather(axis = var_900, batch_dims = var_901_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_901_validate_indices_0, x = cos_full)[name = string("op_901_cast_uint16")]; tensor var_903_axes_0 = const()[name = string("op_903_axes_0"), val = tensor([0])]; tensor var_903 = expand_dims(axes = var_903_axes_0, x = var_901_cast_uint16)[name = string("op_903")]; tensor cos_axes_0 = const()[name = string("cos_axes_0"), val = tensor([0])]; tensor cos = expand_dims(axes = cos_axes_0, x = var_903)[name = string("cos")]; int32 var_906 = const()[name = string("op_906"), val = int32(0)]; int32 var_907_batch_dims_0 = const()[name = string("op_907_batch_dims_0"), val = int32(0)]; bool var_907_validate_indices_0 = const()[name = string("op_907_validate_indices_0"), val = bool(false)]; tensor var_907_cast_uint16 = gather(axis = var_906, batch_dims = var_907_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_907_validate_indices_0, x = sin_full)[name = string("op_907_cast_uint16")]; tensor var_909_axes_0 = const()[name = string("op_909_axes_0"), val = tensor([0])]; tensor var_909 = expand_dims(axes = var_909_axes_0, x = var_907_cast_uint16)[name = string("op_909")]; tensor sin_axes_0 = const()[name = string("sin_axes_0"), val = tensor([0])]; tensor sin = expand_dims(axes = sin_axes_0, x = var_909)[name = string("sin")]; int32 var_916 = const()[name = string("op_916"), val = int32(-1)]; fp16 const_0_promoted_to_fp16 = const()[name = string("const_0_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_922_cast_fp16 = mul(x = x_1_cast_fp16, y = const_0_promoted_to_fp16)[name = string("op_922_cast_fp16")]; bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)]; tensor input_1_cast_fp16 = concat(axis = var_916, interleave = input_1_interleave_0, values = (x_1_cast_fp16, var_922_cast_fp16))[name = string("input_1_cast_fp16")]; tensor normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor([-1])]; fp16 var_914_to_fp16 = const()[name = string("op_914_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_914_to_fp16, x = input_1_cast_fp16)[name = string("normed_1_cast_fp16")]; tensor var_927_split_sizes_0 = const()[name = string("op_927_split_sizes_0"), val = tensor([640, 640])]; int32 var_927_axis_0 = const()[name = string("op_927_axis_0"), val = int32(-1)]; tensor var_927_cast_fp16_0, tensor var_927_cast_fp16_1 = split(axis = var_927_axis_0, split_sizes = var_927_split_sizes_0, x = normed_1_cast_fp16)[name = string("op_927_cast_fp16")]; tensor var_931_to_fp16 = const()[name = string("op_931_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(520887424)))]; tensor out_1_cast_fp16 = mul(x = var_927_cast_fp16_0, y = var_931_to_fp16)[name = string("out_1_cast_fp16")]; tensor var_945 = const()[name = string("op_945"), val = tensor([0, 2, 1])]; tensor input_3_axes_0 = const()[name = string("input_3_axes_0"), val = tensor([2])]; tensor var_946 = transpose(perm = var_945, x = out_1_cast_fp16)[name = string("transpose_163")]; tensor input_3 = expand_dims(axes = input_3_axes_0, x = var_946)[name = string("input_3")]; string var_959_pad_type_0 = const()[name = string("op_959_pad_type_0"), val = string("valid")]; tensor var_959_strides_0 = const()[name = string("op_959_strides_0"), val = tensor([1, 1])]; tensor var_959_pad_0 = const()[name = string("op_959_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_959_dilations_0 = const()[name = string("op_959_dilations_0"), val = tensor([1, 1])]; int32 var_959_groups_0 = const()[name = string("op_959_groups_0"), val = int32(1)]; tensor var_959 = conv(dilations = var_959_dilations_0, groups = var_959_groups_0, pad = var_959_pad_0, pad_type = var_959_pad_type_0, strides = var_959_strides_0, weight = layers_0_self_attn_q_proj_weight, x = input_3)[name = string("op_959")]; tensor var_964 = const()[name = string("op_964"), val = tensor([1, 4, 256, 1])]; tensor var_965 = reshape(shape = var_964, x = var_959)[name = string("op_965")]; tensor var_970 = const()[name = string("op_970"), val = tensor([0, 1, 3, 2])]; tensor var_975 = const()[name = string("op_975"), val = tensor([1, 4, 256])]; tensor q_1 = transpose(perm = var_970, x = var_965)[name = string("transpose_162")]; tensor x_5 = reshape(shape = var_975, x = q_1)[name = string("x_5")]; int32 var_982 = const()[name = string("op_982"), val = int32(-1)]; fp16 const_2_promoted_to_fp16 = const()[name = string("const_2_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_988_cast_fp16 = mul(x = x_5, y = const_2_promoted_to_fp16)[name = string("op_988_cast_fp16")]; bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)]; tensor input_5_cast_fp16 = concat(axis = var_982, interleave = input_5_interleave_0, values = (x_5, var_988_cast_fp16))[name = string("input_5_cast_fp16")]; tensor normed_7_axes_0 = const()[name = string("normed_7_axes_0"), val = tensor([-1])]; fp16 var_980_to_fp16 = const()[name = string("op_980_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_7_cast_fp16 = layer_norm(axes = normed_7_axes_0, epsilon = var_980_to_fp16, x = input_5_cast_fp16)[name = string("normed_7_cast_fp16")]; tensor var_993_split_sizes_0 = const()[name = string("op_993_split_sizes_0"), val = tensor([256, 256])]; int32 var_993_axis_0 = const()[name = string("op_993_axis_0"), val = int32(-1)]; tensor var_993_cast_fp16_0, tensor var_993_cast_fp16_1 = split(axis = var_993_axis_0, split_sizes = var_993_split_sizes_0, x = normed_7_cast_fp16)[name = string("op_993_cast_fp16")]; tensor var_997_to_fp16 = const()[name = string("op_997_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(520888768)))]; tensor out_3_cast_fp16 = mul(x = var_993_cast_fp16_0, y = var_997_to_fp16)[name = string("out_3_cast_fp16")]; tensor var_1004 = const()[name = string("op_1004"), val = tensor([1, 4, 1, 256])]; tensor q_3 = reshape(shape = var_1004, x = out_3_cast_fp16)[name = string("q_3")]; string var_1016_pad_type_0 = const()[name = string("op_1016_pad_type_0"), val = string("valid")]; tensor var_1016_strides_0 = const()[name = string("op_1016_strides_0"), val = tensor([1, 1])]; tensor var_1016_pad_0 = const()[name = string("op_1016_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1016_dilations_0 = const()[name = string("op_1016_dilations_0"), val = tensor([1, 1])]; int32 var_1016_groups_0 = const()[name = string("op_1016_groups_0"), val = int32(1)]; tensor var_1016 = conv(dilations = var_1016_dilations_0, groups = var_1016_groups_0, pad = var_1016_pad_0, pad_type = var_1016_pad_type_0, strides = var_1016_strides_0, weight = layers_0_self_attn_k_proj_weight, x = input_3)[name = string("op_1016")]; tensor var_1021 = const()[name = string("op_1021"), val = tensor([1, 1, 256, 1])]; tensor var_1022 = reshape(shape = var_1021, x = var_1016)[name = string("op_1022")]; tensor var_1027 = const()[name = string("op_1027"), val = tensor([0, 1, 3, 2])]; tensor var_1032 = const()[name = string("op_1032"), val = tensor([1, 1, 256])]; tensor k_1 = transpose(perm = var_1027, x = var_1022)[name = string("transpose_161")]; tensor x_7 = reshape(shape = var_1032, x = k_1)[name = string("x_7")]; int32 var_1039 = const()[name = string("op_1039"), val = int32(-1)]; fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1045_cast_fp16 = mul(x = x_7, y = const_4_promoted_to_fp16)[name = string("op_1045_cast_fp16")]; bool input_7_interleave_0 = const()[name = string("input_7_interleave_0"), val = bool(false)]; tensor input_7_cast_fp16 = concat(axis = var_1039, interleave = input_7_interleave_0, values = (x_7, var_1045_cast_fp16))[name = string("input_7_cast_fp16")]; tensor normed_11_axes_0 = const()[name = string("normed_11_axes_0"), val = tensor([-1])]; fp16 var_1037_to_fp16 = const()[name = string("op_1037_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_11_cast_fp16 = layer_norm(axes = normed_11_axes_0, epsilon = var_1037_to_fp16, x = input_7_cast_fp16)[name = string("normed_11_cast_fp16")]; tensor var_1050_split_sizes_0 = const()[name = string("op_1050_split_sizes_0"), val = tensor([256, 256])]; int32 var_1050_axis_0 = const()[name = string("op_1050_axis_0"), val = int32(-1)]; tensor var_1050_cast_fp16_0, tensor var_1050_cast_fp16_1 = split(axis = var_1050_axis_0, split_sizes = var_1050_split_sizes_0, x = normed_11_cast_fp16)[name = string("op_1050_cast_fp16")]; tensor var_1054_to_fp16 = const()[name = string("op_1054_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(520889344)))]; tensor out_5_cast_fp16 = mul(x = var_1050_cast_fp16_0, y = var_1054_to_fp16)[name = string("out_5_cast_fp16")]; tensor var_1061 = const()[name = string("op_1061"), val = tensor([1, 1, 1, 256])]; tensor k_3 = reshape(shape = var_1061, x = out_5_cast_fp16)[name = string("k_3")]; string var_1073_pad_type_0 = const()[name = string("op_1073_pad_type_0"), val = string("valid")]; tensor var_1073_strides_0 = const()[name = string("op_1073_strides_0"), val = tensor([1, 1])]; tensor var_1073_pad_0 = const()[name = string("op_1073_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1073_dilations_0 = const()[name = string("op_1073_dilations_0"), val = tensor([1, 1])]; int32 var_1073_groups_0 = const()[name = string("op_1073_groups_0"), val = int32(1)]; tensor var_1073 = conv(dilations = var_1073_dilations_0, groups = var_1073_groups_0, pad = var_1073_pad_0, pad_type = var_1073_pad_type_0, strides = var_1073_strides_0, weight = layers_0_self_attn_v_proj_weight, x = input_3)[name = string("op_1073")]; tensor var_1078 = const()[name = string("op_1078"), val = tensor([1, 1, 256, 1])]; tensor var_1079 = reshape(shape = var_1078, x = var_1073)[name = string("op_1079")]; tensor var_1084 = const()[name = string("op_1084"), val = tensor([0, 1, 3, 2])]; tensor var_1086 = mul(x = q_3, y = cos_1)[name = string("op_1086")]; tensor var_1087_split_sizes_0 = const()[name = string("op_1087_split_sizes_0"), val = tensor([128, 128])]; int32 var_1087_axis_0 = const()[name = string("op_1087_axis_0"), val = int32(-1)]; tensor var_1087_0, tensor var_1087_1 = split(axis = var_1087_axis_0, split_sizes = var_1087_split_sizes_0, x = q_3)[name = string("op_1087")]; fp16 const_6_promoted = const()[name = string("const_6_promoted"), val = fp16(-0x1p+0)]; tensor var_1089 = mul(x = var_1087_1, y = const_6_promoted)[name = string("op_1089")]; int32 var_1091 = const()[name = string("op_1091"), val = int32(-1)]; bool var_1092_interleave_0 = const()[name = string("op_1092_interleave_0"), val = bool(false)]; tensor var_1092 = concat(axis = var_1091, interleave = var_1092_interleave_0, values = (var_1089, var_1087_0))[name = string("op_1092")]; tensor var_1093 = mul(x = var_1092, y = sin_1)[name = string("op_1093")]; tensor q_5 = add(x = var_1086, y = var_1093)[name = string("q_5")]; tensor var_1096 = mul(x = k_3, y = cos_1)[name = string("op_1096")]; tensor var_1097_split_sizes_0 = const()[name = string("op_1097_split_sizes_0"), val = tensor([128, 128])]; int32 var_1097_axis_0 = const()[name = string("op_1097_axis_0"), val = int32(-1)]; tensor var_1097_0, tensor var_1097_1 = split(axis = var_1097_axis_0, split_sizes = var_1097_split_sizes_0, x = k_3)[name = string("op_1097")]; fp16 const_7_promoted = const()[name = string("const_7_promoted"), val = fp16(-0x1p+0)]; tensor var_1099 = mul(x = var_1097_1, y = const_7_promoted)[name = string("op_1099")]; int32 var_1101 = const()[name = string("op_1101"), val = int32(-1)]; bool var_1102_interleave_0 = const()[name = string("op_1102_interleave_0"), val = bool(false)]; tensor var_1102 = concat(axis = var_1101, interleave = var_1102_interleave_0, values = (var_1099, var_1097_0))[name = string("op_1102")]; tensor var_1103 = mul(x = var_1102, y = sin_1)[name = string("op_1103")]; tensor k_5 = add(x = var_1096, y = var_1103)[name = string("k_5")]; tensor read_state_0 = read_state(input = kv_cache_0)[name = string("read_state_0")]; tensor var_1108_begin_0 = const()[name = string("op_1108_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1108_end_0 = const()[name = string("op_1108_end_0"), val = tensor([1, 1, 2048, 256])]; tensor var_1108_end_mask_0 = const()[name = string("op_1108_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1108_squeeze_mask_0 = const()[name = string("op_1108_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_1108_cast_fp16 = slice_by_index(begin = var_1108_begin_0, end = var_1108_end_0, end_mask = var_1108_end_mask_0, squeeze_mask = var_1108_squeeze_mask_0, x = read_state_0)[name = string("op_1108_cast_fp16")]; tensor K_cache_1_axes_0 = const()[name = string("K_cache_1_axes_0"), val = tensor([0])]; tensor K_cache_1_cast_fp16 = expand_dims(axes = K_cache_1_axes_0, x = var_1108_cast_fp16)[name = string("K_cache_1_cast_fp16")]; tensor var_1113_begin_0 = const()[name = string("op_1113_begin_0"), val = tensor([18, 0, 0, 0])]; tensor var_1113_end_0 = const()[name = string("op_1113_end_0"), val = tensor([19, 1, 2048, 256])]; tensor var_1113_end_mask_0 = const()[name = string("op_1113_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1113_squeeze_mask_0 = const()[name = string("op_1113_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_1113_cast_fp16 = slice_by_index(begin = var_1113_begin_0, end = var_1113_end_0, end_mask = var_1113_end_mask_0, squeeze_mask = var_1113_squeeze_mask_0, x = read_state_0)[name = string("op_1113_cast_fp16")]; tensor V_cache_1_axes_0 = const()[name = string("V_cache_1_axes_0"), val = tensor([0])]; tensor V_cache_1_cast_fp16 = expand_dims(axes = V_cache_1_axes_0, x = var_1113_cast_fp16)[name = string("V_cache_1_cast_fp16")]; tensor k_broadcast_1_reps_0 = const()[name = string("k_broadcast_1_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor k_broadcast_1 = tile(reps = k_broadcast_1_reps_0, x = k_5)[name = string("k_broadcast_1")]; tensor v_broadcast_1_reps_0 = const()[name = string("v_broadcast_1_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor v_1 = transpose(perm = var_1084, x = var_1079)[name = string("transpose_160")]; tensor v_broadcast_1 = tile(reps = v_broadcast_1_reps_0, x = v_1)[name = string("v_broadcast_1")]; fp16 var_1118_promoted_to_fp16 = const()[name = string("op_1118_promoted_to_fp16"), val = fp16(0x1p+0)]; tensor var_1120_cast_fp16 = sub(x = var_1118_promoted_to_fp16, y = update_mask)[name = string("op_1120_cast_fp16")]; tensor var_1121_cast_fp16 = mul(x = K_cache_1_cast_fp16, y = var_1120_cast_fp16)[name = string("op_1121_cast_fp16")]; tensor var_1122_cast_fp16 = mul(x = k_broadcast_1, y = update_mask)[name = string("op_1122_cast_fp16")]; tensor K_new_1_cast_fp16 = add(x = var_1121_cast_fp16, y = var_1122_cast_fp16)[name = string("K_new_1_cast_fp16")]; tensor var_1128_cast_fp16 = mul(x = V_cache_1_cast_fp16, y = var_1120_cast_fp16)[name = string("op_1128_cast_fp16")]; tensor var_1129_cast_fp16 = mul(x = v_broadcast_1, y = update_mask)[name = string("op_1129_cast_fp16")]; tensor V_new_1_cast_fp16 = add(x = var_1128_cast_fp16, y = var_1129_cast_fp16)[name = string("V_new_1_cast_fp16")]; tensor var_1133_axes_0 = const()[name = string("op_1133_axes_0"), val = tensor([0])]; tensor var_1133_cast_fp16 = squeeze(axes = var_1133_axes_0, x = K_new_1_cast_fp16)[name = string("op_1133_cast_fp16")]; tensor concat_0 = const()[name = string("concat_0"), val = tensor([0, 0, 0, 0])]; tensor concat_1 = const()[name = string("concat_1"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_0, begin_mask = kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_1, end_mask = kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_1_stride_0, update = var_1133_cast_fp16, x = read_state_0)[name = string("kv_cache_0_internal_tensor_assign_1_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_1_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_36_write_state")]; tensor coreml_update_state_36 = read_state(input = kv_cache_0)[name = string("coreml_update_state_36")]; tensor var_1140_axes_0 = const()[name = string("op_1140_axes_0"), val = tensor([0])]; tensor var_1140_cast_fp16 = squeeze(axes = var_1140_axes_0, x = V_new_1_cast_fp16)[name = string("op_1140_cast_fp16")]; tensor concat_2 = const()[name = string("concat_2"), val = tensor([18, 0, 0, 0])]; tensor concat_3 = const()[name = string("concat_3"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_2, begin_mask = kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_3, end_mask = kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_2_stride_0, update = var_1140_cast_fp16, x = coreml_update_state_36)[name = string("kv_cache_0_internal_tensor_assign_2_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_2_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_37_write_state")]; tensor coreml_update_state_37 = read_state(input = kv_cache_0)[name = string("coreml_update_state_37")]; tensor hidden_states_3_axes_0 = const()[name = string("hidden_states_3_axes_0"), val = tensor([2])]; tensor hidden_states_3_cast_fp16 = expand_dims(axes = hidden_states_3_axes_0, x = K_new_1_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; tensor var_1153 = const()[name = string("op_1153"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_5_cast_fp16 = tile(reps = var_1153, x = hidden_states_3_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; tensor var_1159 = const()[name = string("op_1159"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_1_cast_fp16 = reshape(shape = var_1159, x = hidden_states_5_cast_fp16)[name = string("K_expanded_1_cast_fp16")]; tensor hidden_states_7_axes_0 = const()[name = string("hidden_states_7_axes_0"), val = tensor([2])]; tensor hidden_states_7_cast_fp16 = expand_dims(axes = hidden_states_7_axes_0, x = V_new_1_cast_fp16)[name = string("hidden_states_7_cast_fp16")]; tensor var_1168 = const()[name = string("op_1168"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_9_cast_fp16 = tile(reps = var_1168, x = hidden_states_7_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; tensor var_1174 = const()[name = string("op_1174"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_1_cast_fp16 = reshape(shape = var_1174, x = hidden_states_9_cast_fp16)[name = string("V_expanded_1_cast_fp16")]; bool var_1189_transpose_x_1 = const()[name = string("op_1189_transpose_x_1"), val = bool(false)]; bool var_1189_transpose_y_1 = const()[name = string("op_1189_transpose_y_1"), val = bool(true)]; tensor var_1189_cast_fp16 = matmul(transpose_x = var_1189_transpose_x_1, transpose_y = var_1189_transpose_y_1, x = q_5, y = K_expanded_1_cast_fp16)[name = string("op_1189_cast_fp16")]; fp16 var_1190_to_fp16 = const()[name = string("op_1190_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_1_cast_fp16 = mul(x = var_1189_cast_fp16, y = var_1190_to_fp16)[name = string("attn_weights_1_cast_fp16")]; tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("attn_weights_3_cast_fp16")]; int32 var_1199 = const()[name = string("op_1199"), val = int32(-1)]; tensor var_1201_cast_fp16 = softmax(axis = var_1199, x = attn_weights_3_cast_fp16)[name = string("op_1201_cast_fp16")]; bool var_1217_transpose_x_0 = const()[name = string("op_1217_transpose_x_0"), val = bool(false)]; bool var_1217_transpose_y_0 = const()[name = string("op_1217_transpose_y_0"), val = bool(false)]; tensor var_1217_cast_fp16 = matmul(transpose_x = var_1217_transpose_x_0, transpose_y = var_1217_transpose_y_0, x = var_1201_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("op_1217_cast_fp16")]; tensor var_1227 = const()[name = string("op_1227"), val = tensor([0, 2, 1, 3])]; tensor var_1234 = const()[name = string("op_1234"), val = tensor([1, 1, -1])]; tensor var_1228 = transpose(perm = var_1227, x = var_1217_cast_fp16)[name = string("transpose_159")]; tensor attn_output_3 = reshape(shape = var_1234, x = var_1228)[name = string("attn_output_3")]; tensor var_1239 = const()[name = string("op_1239"), val = tensor([0, 2, 1])]; tensor squeeze_0 = const()[name = string("squeeze_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(520889920)))]; string var_1255_pad_type_0 = const()[name = string("op_1255_pad_type_0"), val = string("valid")]; int32 var_1255_groups_0 = const()[name = string("op_1255_groups_0"), val = int32(1)]; tensor var_1255_strides_0 = const()[name = string("op_1255_strides_0"), val = tensor([1])]; tensor var_1255_pad_0 = const()[name = string("op_1255_pad_0"), val = tensor([0, 0])]; tensor var_1255_dilations_0 = const()[name = string("op_1255_dilations_0"), val = tensor([1])]; tensor var_1240 = transpose(perm = var_1239, x = attn_output_3)[name = string("transpose_158")]; tensor var_1255 = conv(dilations = var_1255_dilations_0, groups = var_1255_groups_0, pad = var_1255_pad_0, pad_type = var_1255_pad_type_0, strides = var_1255_strides_0, weight = squeeze_0, x = var_1240)[name = string("op_1255")]; tensor var_1259 = const()[name = string("op_1259"), val = tensor([0, 2, 1])]; int32 var_1266 = const()[name = string("op_1266"), val = int32(-1)]; fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_9 = transpose(perm = var_1259, x = var_1255)[name = string("transpose_157")]; tensor var_1272_cast_fp16 = mul(x = x_9, y = const_8_promoted_to_fp16)[name = string("op_1272_cast_fp16")]; bool input_11_interleave_0 = const()[name = string("input_11_interleave_0"), val = bool(false)]; tensor input_11_cast_fp16 = concat(axis = var_1266, interleave = input_11_interleave_0, values = (x_9, var_1272_cast_fp16))[name = string("input_11_cast_fp16")]; tensor normed_15_axes_0 = const()[name = string("normed_15_axes_0"), val = tensor([-1])]; fp16 var_1264_to_fp16 = const()[name = string("op_1264_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_15_cast_fp16 = layer_norm(axes = normed_15_axes_0, epsilon = var_1264_to_fp16, x = input_11_cast_fp16)[name = string("normed_15_cast_fp16")]; tensor var_1277_split_sizes_0 = const()[name = string("op_1277_split_sizes_0"), val = tensor([640, 640])]; int32 var_1277_axis_0 = const()[name = string("op_1277_axis_0"), val = int32(-1)]; tensor var_1277_cast_fp16_0, tensor var_1277_cast_fp16_1 = split(axis = var_1277_axis_0, split_sizes = var_1277_split_sizes_0, x = normed_15_cast_fp16)[name = string("op_1277_cast_fp16")]; tensor var_1281_to_fp16 = const()[name = string("op_1281_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522200704)))]; tensor out_7_cast_fp16 = mul(x = var_1277_cast_fp16_0, y = var_1281_to_fp16)[name = string("out_7_cast_fp16")]; tensor x_11_cast_fp16 = add(x = x_1_cast_fp16, y = out_7_cast_fp16)[name = string("x_11_cast_fp16")]; int32 var_1295 = const()[name = string("op_1295"), val = int32(-1)]; fp16 const_10_promoted_to_fp16 = const()[name = string("const_10_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1301_cast_fp16 = mul(x = x_11_cast_fp16, y = const_10_promoted_to_fp16)[name = string("op_1301_cast_fp16")]; bool input_13_interleave_0 = const()[name = string("input_13_interleave_0"), val = bool(false)]; tensor input_13_cast_fp16 = concat(axis = var_1295, interleave = input_13_interleave_0, values = (x_11_cast_fp16, var_1301_cast_fp16))[name = string("input_13_cast_fp16")]; tensor normed_19_axes_0 = const()[name = string("normed_19_axes_0"), val = tensor([-1])]; fp16 var_1293_to_fp16 = const()[name = string("op_1293_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_19_cast_fp16 = layer_norm(axes = normed_19_axes_0, epsilon = var_1293_to_fp16, x = input_13_cast_fp16)[name = string("normed_19_cast_fp16")]; tensor var_1306_split_sizes_0 = const()[name = string("op_1306_split_sizes_0"), val = tensor([640, 640])]; int32 var_1306_axis_0 = const()[name = string("op_1306_axis_0"), val = int32(-1)]; tensor var_1306_cast_fp16_0, tensor var_1306_cast_fp16_1 = split(axis = var_1306_axis_0, split_sizes = var_1306_split_sizes_0, x = normed_19_cast_fp16)[name = string("op_1306_cast_fp16")]; tensor var_1310_to_fp16 = const()[name = string("op_1310_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522202048)))]; tensor out_9_cast_fp16 = mul(x = var_1306_cast_fp16_0, y = var_1310_to_fp16)[name = string("out_9_cast_fp16")]; tensor var_1324 = const()[name = string("op_1324"), val = tensor([0, 2, 1])]; tensor input_15_axes_0 = const()[name = string("input_15_axes_0"), val = tensor([2])]; tensor var_1325 = transpose(perm = var_1324, x = out_9_cast_fp16)[name = string("transpose_156")]; tensor input_15 = expand_dims(axes = input_15_axes_0, x = var_1325)[name = string("input_15")]; string gate_1_pad_type_0 = const()[name = string("gate_1_pad_type_0"), val = string("valid")]; tensor gate_1_strides_0 = const()[name = string("gate_1_strides_0"), val = tensor([1, 1])]; tensor gate_1_pad_0 = const()[name = string("gate_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_1_dilations_0 = const()[name = string("gate_1_dilations_0"), val = tensor([1, 1])]; int32 gate_1_groups_0 = const()[name = string("gate_1_groups_0"), val = int32(1)]; tensor gate_1 = conv(dilations = gate_1_dilations_0, groups = gate_1_groups_0, pad = gate_1_pad_0, pad_type = gate_1_pad_type_0, strides = gate_1_strides_0, weight = layers_0_mlp_gate_proj_weight, x = input_15)[name = string("gate_1")]; string up_1_pad_type_0 = const()[name = string("up_1_pad_type_0"), val = string("valid")]; tensor up_1_strides_0 = const()[name = string("up_1_strides_0"), val = tensor([1, 1])]; tensor up_1_pad_0 = const()[name = string("up_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_1_dilations_0 = const()[name = string("up_1_dilations_0"), val = tensor([1, 1])]; int32 up_1_groups_0 = const()[name = string("up_1_groups_0"), val = int32(1)]; tensor up_1 = conv(dilations = up_1_dilations_0, groups = up_1_groups_0, pad = up_1_pad_0, pad_type = up_1_pad_type_0, strides = up_1_strides_0, weight = layers_0_mlp_up_proj_weight, x = input_15)[name = string("up_1")]; string gate_3_mode_0 = const()[name = string("gate_3_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_3 = gelu(mode = gate_3_mode_0, x = gate_1)[name = string("gate_3")]; tensor input_17 = mul(x = gate_3, y = up_1)[name = string("input_17")]; string mlp_out_1_pad_type_0 = const()[name = string("mlp_out_1_pad_type_0"), val = string("valid")]; tensor mlp_out_1_strides_0 = const()[name = string("mlp_out_1_strides_0"), val = tensor([1, 1])]; tensor mlp_out_1_pad_0 = const()[name = string("mlp_out_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_1_dilations_0 = const()[name = string("mlp_out_1_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_1_groups_0 = const()[name = string("mlp_out_1_groups_0"), val = int32(1)]; tensor mlp_out_1 = conv(dilations = mlp_out_1_dilations_0, groups = mlp_out_1_groups_0, pad = mlp_out_1_pad_0, pad_type = mlp_out_1_pad_type_0, strides = mlp_out_1_strides_0, weight = layers_0_mlp_down_proj_weight, x = input_17)[name = string("mlp_out_1")]; tensor var_1365_axes_0 = const()[name = string("op_1365_axes_0"), val = tensor([2])]; tensor var_1365 = squeeze(axes = var_1365_axes_0, x = mlp_out_1)[name = string("op_1365")]; tensor var_1369 = const()[name = string("op_1369"), val = tensor([0, 2, 1])]; int32 var_1376 = const()[name = string("op_1376"), val = int32(-1)]; fp16 const_12_promoted_to_fp16 = const()[name = string("const_12_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_15 = transpose(perm = var_1369, x = var_1365)[name = string("transpose_155")]; tensor var_1382_cast_fp16 = mul(x = x_15, y = const_12_promoted_to_fp16)[name = string("op_1382_cast_fp16")]; bool input_19_interleave_0 = const()[name = string("input_19_interleave_0"), val = bool(false)]; tensor input_19_cast_fp16 = concat(axis = var_1376, interleave = input_19_interleave_0, values = (x_15, var_1382_cast_fp16))[name = string("input_19_cast_fp16")]; tensor normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor([-1])]; fp16 var_1374_to_fp16 = const()[name = string("op_1374_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_1374_to_fp16, x = input_19_cast_fp16)[name = string("normed_25_cast_fp16")]; tensor var_1387_split_sizes_0 = const()[name = string("op_1387_split_sizes_0"), val = tensor([640, 640])]; int32 var_1387_axis_0 = const()[name = string("op_1387_axis_0"), val = int32(-1)]; tensor var_1387_cast_fp16_0, tensor var_1387_cast_fp16_1 = split(axis = var_1387_axis_0, split_sizes = var_1387_split_sizes_0, x = normed_25_cast_fp16)[name = string("op_1387_cast_fp16")]; tensor var_1391_to_fp16 = const()[name = string("op_1391_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522203392)))]; tensor out_11_cast_fp16 = mul(x = var_1387_cast_fp16_0, y = var_1391_to_fp16)[name = string("out_11_cast_fp16")]; tensor x_17_cast_fp16 = add(x = x_11_cast_fp16, y = out_11_cast_fp16)[name = string("x_17_cast_fp16")]; int32 var_1405 = const()[name = string("op_1405"), val = int32(-1)]; fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1411_cast_fp16 = mul(x = x_17_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_1411_cast_fp16")]; bool input_21_interleave_0 = const()[name = string("input_21_interleave_0"), val = bool(false)]; tensor input_21_cast_fp16 = concat(axis = var_1405, interleave = input_21_interleave_0, values = (x_17_cast_fp16, var_1411_cast_fp16))[name = string("input_21_cast_fp16")]; tensor normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor([-1])]; fp16 var_1403_to_fp16 = const()[name = string("op_1403_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_1403_to_fp16, x = input_21_cast_fp16)[name = string("normed_29_cast_fp16")]; tensor var_1416_split_sizes_0 = const()[name = string("op_1416_split_sizes_0"), val = tensor([640, 640])]; int32 var_1416_axis_0 = const()[name = string("op_1416_axis_0"), val = int32(-1)]; tensor var_1416_cast_fp16_0, tensor var_1416_cast_fp16_1 = split(axis = var_1416_axis_0, split_sizes = var_1416_split_sizes_0, x = normed_29_cast_fp16)[name = string("op_1416_cast_fp16")]; tensor var_1420_to_fp16 = const()[name = string("op_1420_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522204736)))]; tensor out_13_cast_fp16 = mul(x = var_1416_cast_fp16_0, y = var_1420_to_fp16)[name = string("out_13_cast_fp16")]; tensor var_1434 = const()[name = string("op_1434"), val = tensor([0, 2, 1])]; tensor input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor([2])]; tensor var_1435 = transpose(perm = var_1434, x = out_13_cast_fp16)[name = string("transpose_154")]; tensor input_23 = expand_dims(axes = input_23_axes_0, x = var_1435)[name = string("input_23")]; string var_1448_pad_type_0 = const()[name = string("op_1448_pad_type_0"), val = string("valid")]; tensor var_1448_strides_0 = const()[name = string("op_1448_strides_0"), val = tensor([1, 1])]; tensor var_1448_pad_0 = const()[name = string("op_1448_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1448_dilations_0 = const()[name = string("op_1448_dilations_0"), val = tensor([1, 1])]; int32 var_1448_groups_0 = const()[name = string("op_1448_groups_0"), val = int32(1)]; tensor var_1448 = conv(dilations = var_1448_dilations_0, groups = var_1448_groups_0, pad = var_1448_pad_0, pad_type = var_1448_pad_type_0, strides = var_1448_strides_0, weight = layers_1_self_attn_q_proj_weight, x = input_23)[name = string("op_1448")]; tensor var_1453 = const()[name = string("op_1453"), val = tensor([1, 4, 256, 1])]; tensor var_1454 = reshape(shape = var_1453, x = var_1448)[name = string("op_1454")]; tensor var_1459 = const()[name = string("op_1459"), val = tensor([0, 1, 3, 2])]; tensor var_1464 = const()[name = string("op_1464"), val = tensor([1, 4, 256])]; tensor q_7 = transpose(perm = var_1459, x = var_1454)[name = string("transpose_153")]; tensor x_21 = reshape(shape = var_1464, x = q_7)[name = string("x_21")]; int32 var_1471 = const()[name = string("op_1471"), val = int32(-1)]; fp16 const_16_promoted_to_fp16 = const()[name = string("const_16_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1477_cast_fp16 = mul(x = x_21, y = const_16_promoted_to_fp16)[name = string("op_1477_cast_fp16")]; bool input_25_interleave_0 = const()[name = string("input_25_interleave_0"), val = bool(false)]; tensor input_25_cast_fp16 = concat(axis = var_1471, interleave = input_25_interleave_0, values = (x_21, var_1477_cast_fp16))[name = string("input_25_cast_fp16")]; tensor normed_35_axes_0 = const()[name = string("normed_35_axes_0"), val = tensor([-1])]; fp16 var_1469_to_fp16 = const()[name = string("op_1469_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_35_cast_fp16 = layer_norm(axes = normed_35_axes_0, epsilon = var_1469_to_fp16, x = input_25_cast_fp16)[name = string("normed_35_cast_fp16")]; tensor var_1482_split_sizes_0 = const()[name = string("op_1482_split_sizes_0"), val = tensor([256, 256])]; int32 var_1482_axis_0 = const()[name = string("op_1482_axis_0"), val = int32(-1)]; tensor var_1482_cast_fp16_0, tensor var_1482_cast_fp16_1 = split(axis = var_1482_axis_0, split_sizes = var_1482_split_sizes_0, x = normed_35_cast_fp16)[name = string("op_1482_cast_fp16")]; tensor var_1486_to_fp16 = const()[name = string("op_1486_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522206080)))]; tensor out_15_cast_fp16 = mul(x = var_1482_cast_fp16_0, y = var_1486_to_fp16)[name = string("out_15_cast_fp16")]; tensor var_1493 = const()[name = string("op_1493"), val = tensor([1, 4, 1, 256])]; tensor q_9 = reshape(shape = var_1493, x = out_15_cast_fp16)[name = string("q_9")]; string var_1505_pad_type_0 = const()[name = string("op_1505_pad_type_0"), val = string("valid")]; tensor var_1505_strides_0 = const()[name = string("op_1505_strides_0"), val = tensor([1, 1])]; tensor var_1505_pad_0 = const()[name = string("op_1505_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1505_dilations_0 = const()[name = string("op_1505_dilations_0"), val = tensor([1, 1])]; int32 var_1505_groups_0 = const()[name = string("op_1505_groups_0"), val = int32(1)]; tensor var_1505 = conv(dilations = var_1505_dilations_0, groups = var_1505_groups_0, pad = var_1505_pad_0, pad_type = var_1505_pad_type_0, strides = var_1505_strides_0, weight = layers_1_self_attn_k_proj_weight, x = input_23)[name = string("op_1505")]; tensor var_1510 = const()[name = string("op_1510"), val = tensor([1, 1, 256, 1])]; tensor var_1511 = reshape(shape = var_1510, x = var_1505)[name = string("op_1511")]; tensor var_1516 = const()[name = string("op_1516"), val = tensor([0, 1, 3, 2])]; tensor var_1521 = const()[name = string("op_1521"), val = tensor([1, 1, 256])]; tensor k_7 = transpose(perm = var_1516, x = var_1511)[name = string("transpose_152")]; tensor x_23 = reshape(shape = var_1521, x = k_7)[name = string("x_23")]; int32 var_1528 = const()[name = string("op_1528"), val = int32(-1)]; fp16 const_18_promoted_to_fp16 = const()[name = string("const_18_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1534_cast_fp16 = mul(x = x_23, y = const_18_promoted_to_fp16)[name = string("op_1534_cast_fp16")]; bool input_27_interleave_0 = const()[name = string("input_27_interleave_0"), val = bool(false)]; tensor input_27_cast_fp16 = concat(axis = var_1528, interleave = input_27_interleave_0, values = (x_23, var_1534_cast_fp16))[name = string("input_27_cast_fp16")]; tensor normed_39_axes_0 = const()[name = string("normed_39_axes_0"), val = tensor([-1])]; fp16 var_1526_to_fp16 = const()[name = string("op_1526_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_39_cast_fp16 = layer_norm(axes = normed_39_axes_0, epsilon = var_1526_to_fp16, x = input_27_cast_fp16)[name = string("normed_39_cast_fp16")]; tensor var_1539_split_sizes_0 = const()[name = string("op_1539_split_sizes_0"), val = tensor([256, 256])]; int32 var_1539_axis_0 = const()[name = string("op_1539_axis_0"), val = int32(-1)]; tensor var_1539_cast_fp16_0, tensor var_1539_cast_fp16_1 = split(axis = var_1539_axis_0, split_sizes = var_1539_split_sizes_0, x = normed_39_cast_fp16)[name = string("op_1539_cast_fp16")]; tensor var_1543_to_fp16 = const()[name = string("op_1543_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522206656)))]; tensor out_17_cast_fp16 = mul(x = var_1539_cast_fp16_0, y = var_1543_to_fp16)[name = string("out_17_cast_fp16")]; tensor var_1550 = const()[name = string("op_1550"), val = tensor([1, 1, 1, 256])]; tensor k_9 = reshape(shape = var_1550, x = out_17_cast_fp16)[name = string("k_9")]; string var_1562_pad_type_0 = const()[name = string("op_1562_pad_type_0"), val = string("valid")]; tensor var_1562_strides_0 = const()[name = string("op_1562_strides_0"), val = tensor([1, 1])]; tensor var_1562_pad_0 = const()[name = string("op_1562_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1562_dilations_0 = const()[name = string("op_1562_dilations_0"), val = tensor([1, 1])]; int32 var_1562_groups_0 = const()[name = string("op_1562_groups_0"), val = int32(1)]; tensor var_1562 = conv(dilations = var_1562_dilations_0, groups = var_1562_groups_0, pad = var_1562_pad_0, pad_type = var_1562_pad_type_0, strides = var_1562_strides_0, weight = layers_1_self_attn_v_proj_weight, x = input_23)[name = string("op_1562")]; tensor var_1567 = const()[name = string("op_1567"), val = tensor([1, 1, 256, 1])]; tensor var_1568 = reshape(shape = var_1567, x = var_1562)[name = string("op_1568")]; tensor var_1573 = const()[name = string("op_1573"), val = tensor([0, 1, 3, 2])]; tensor var_1575 = mul(x = q_9, y = cos_1)[name = string("op_1575")]; tensor var_1576_split_sizes_0 = const()[name = string("op_1576_split_sizes_0"), val = tensor([128, 128])]; int32 var_1576_axis_0 = const()[name = string("op_1576_axis_0"), val = int32(-1)]; tensor var_1576_0, tensor var_1576_1 = split(axis = var_1576_axis_0, split_sizes = var_1576_split_sizes_0, x = q_9)[name = string("op_1576")]; fp16 const_20_promoted = const()[name = string("const_20_promoted"), val = fp16(-0x1p+0)]; tensor var_1578 = mul(x = var_1576_1, y = const_20_promoted)[name = string("op_1578")]; int32 var_1580 = const()[name = string("op_1580"), val = int32(-1)]; bool var_1581_interleave_0 = const()[name = string("op_1581_interleave_0"), val = bool(false)]; tensor var_1581 = concat(axis = var_1580, interleave = var_1581_interleave_0, values = (var_1578, var_1576_0))[name = string("op_1581")]; tensor var_1582 = mul(x = var_1581, y = sin_1)[name = string("op_1582")]; tensor q_11 = add(x = var_1575, y = var_1582)[name = string("q_11")]; tensor var_1585 = mul(x = k_9, y = cos_1)[name = string("op_1585")]; tensor var_1586_split_sizes_0 = const()[name = string("op_1586_split_sizes_0"), val = tensor([128, 128])]; int32 var_1586_axis_0 = const()[name = string("op_1586_axis_0"), val = int32(-1)]; tensor var_1586_0, tensor var_1586_1 = split(axis = var_1586_axis_0, split_sizes = var_1586_split_sizes_0, x = k_9)[name = string("op_1586")]; fp16 const_21_promoted = const()[name = string("const_21_promoted"), val = fp16(-0x1p+0)]; tensor var_1588 = mul(x = var_1586_1, y = const_21_promoted)[name = string("op_1588")]; int32 var_1590 = const()[name = string("op_1590"), val = int32(-1)]; bool var_1591_interleave_0 = const()[name = string("op_1591_interleave_0"), val = bool(false)]; tensor var_1591 = concat(axis = var_1590, interleave = var_1591_interleave_0, values = (var_1588, var_1586_0))[name = string("op_1591")]; tensor var_1592 = mul(x = var_1591, y = sin_1)[name = string("op_1592")]; tensor k_11 = add(x = var_1585, y = var_1592)[name = string("k_11")]; tensor var_1597_begin_0 = const()[name = string("op_1597_begin_0"), val = tensor([1, 0, 0, 0])]; tensor var_1597_end_0 = const()[name = string("op_1597_end_0"), val = tensor([2, 1, 2048, 256])]; tensor var_1597_end_mask_0 = const()[name = string("op_1597_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1597_squeeze_mask_0 = const()[name = string("op_1597_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_1597_cast_fp16 = slice_by_index(begin = var_1597_begin_0, end = var_1597_end_0, end_mask = var_1597_end_mask_0, squeeze_mask = var_1597_squeeze_mask_0, x = coreml_update_state_37)[name = string("op_1597_cast_fp16")]; tensor K_cache_3_axes_0 = const()[name = string("K_cache_3_axes_0"), val = tensor([0])]; tensor K_cache_3_cast_fp16 = expand_dims(axes = K_cache_3_axes_0, x = var_1597_cast_fp16)[name = string("K_cache_3_cast_fp16")]; tensor var_1602_begin_0 = const()[name = string("op_1602_begin_0"), val = tensor([19, 0, 0, 0])]; tensor var_1602_end_0 = const()[name = string("op_1602_end_0"), val = tensor([20, 1, 2048, 256])]; tensor var_1602_end_mask_0 = const()[name = string("op_1602_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1602_squeeze_mask_0 = const()[name = string("op_1602_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_1602_cast_fp16 = slice_by_index(begin = var_1602_begin_0, end = var_1602_end_0, end_mask = var_1602_end_mask_0, squeeze_mask = var_1602_squeeze_mask_0, x = coreml_update_state_37)[name = string("op_1602_cast_fp16")]; tensor V_cache_3_axes_0 = const()[name = string("V_cache_3_axes_0"), val = tensor([0])]; tensor V_cache_3_cast_fp16 = expand_dims(axes = V_cache_3_axes_0, x = var_1602_cast_fp16)[name = string("V_cache_3_cast_fp16")]; tensor k_broadcast_3_reps_0 = const()[name = string("k_broadcast_3_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor k_broadcast_3 = tile(reps = k_broadcast_3_reps_0, x = k_11)[name = string("k_broadcast_3")]; tensor v_broadcast_3_reps_0 = const()[name = string("v_broadcast_3_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor v_3 = transpose(perm = var_1573, x = var_1568)[name = string("transpose_151")]; tensor v_broadcast_3 = tile(reps = v_broadcast_3_reps_0, x = v_3)[name = string("v_broadcast_3")]; tensor var_1610_cast_fp16 = mul(x = K_cache_3_cast_fp16, y = var_1120_cast_fp16)[name = string("op_1610_cast_fp16")]; tensor var_1611_cast_fp16 = mul(x = k_broadcast_3, y = update_mask)[name = string("op_1611_cast_fp16")]; tensor K_new_3_cast_fp16 = add(x = var_1610_cast_fp16, y = var_1611_cast_fp16)[name = string("K_new_3_cast_fp16")]; tensor var_1617_cast_fp16 = mul(x = V_cache_3_cast_fp16, y = var_1120_cast_fp16)[name = string("op_1617_cast_fp16")]; tensor var_1618_cast_fp16 = mul(x = v_broadcast_3, y = update_mask)[name = string("op_1618_cast_fp16")]; tensor V_new_3_cast_fp16 = add(x = var_1617_cast_fp16, y = var_1618_cast_fp16)[name = string("V_new_3_cast_fp16")]; tensor var_1622_axes_0 = const()[name = string("op_1622_axes_0"), val = tensor([0])]; tensor var_1622_cast_fp16 = squeeze(axes = var_1622_axes_0, x = K_new_3_cast_fp16)[name = string("op_1622_cast_fp16")]; tensor concat_4 = const()[name = string("concat_4"), val = tensor([1, 0, 0, 0])]; tensor concat_5 = const()[name = string("concat_5"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_4, begin_mask = kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_5, end_mask = kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_3_stride_0, update = var_1622_cast_fp16, x = coreml_update_state_37)[name = string("kv_cache_0_internal_tensor_assign_3_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_3_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_38_write_state")]; tensor coreml_update_state_38 = read_state(input = kv_cache_0)[name = string("coreml_update_state_38")]; tensor var_1629_axes_0 = const()[name = string("op_1629_axes_0"), val = tensor([0])]; tensor var_1629_cast_fp16 = squeeze(axes = var_1629_axes_0, x = V_new_3_cast_fp16)[name = string("op_1629_cast_fp16")]; tensor concat_6 = const()[name = string("concat_6"), val = tensor([19, 0, 0, 0])]; tensor concat_7 = const()[name = string("concat_7"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_6, begin_mask = kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_7, end_mask = kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_4_stride_0, update = var_1629_cast_fp16, x = coreml_update_state_38)[name = string("kv_cache_0_internal_tensor_assign_4_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_4_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_39_write_state")]; tensor coreml_update_state_39 = read_state(input = kv_cache_0)[name = string("coreml_update_state_39")]; tensor hidden_states_11_axes_0 = const()[name = string("hidden_states_11_axes_0"), val = tensor([2])]; tensor hidden_states_11_cast_fp16 = expand_dims(axes = hidden_states_11_axes_0, x = K_new_3_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; tensor var_1642 = const()[name = string("op_1642"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_13_cast_fp16 = tile(reps = var_1642, x = hidden_states_11_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; tensor var_1648 = const()[name = string("op_1648"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_3_cast_fp16 = reshape(shape = var_1648, x = hidden_states_13_cast_fp16)[name = string("K_expanded_3_cast_fp16")]; tensor hidden_states_15_axes_0 = const()[name = string("hidden_states_15_axes_0"), val = tensor([2])]; tensor hidden_states_15_cast_fp16 = expand_dims(axes = hidden_states_15_axes_0, x = V_new_3_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; tensor var_1657 = const()[name = string("op_1657"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_17_cast_fp16 = tile(reps = var_1657, x = hidden_states_15_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; tensor var_1663 = const()[name = string("op_1663"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_3_cast_fp16 = reshape(shape = var_1663, x = hidden_states_17_cast_fp16)[name = string("V_expanded_3_cast_fp16")]; bool var_1678_transpose_x_1 = const()[name = string("op_1678_transpose_x_1"), val = bool(false)]; bool var_1678_transpose_y_1 = const()[name = string("op_1678_transpose_y_1"), val = bool(true)]; tensor var_1678_cast_fp16 = matmul(transpose_x = var_1678_transpose_x_1, transpose_y = var_1678_transpose_y_1, x = q_11, y = K_expanded_3_cast_fp16)[name = string("op_1678_cast_fp16")]; fp16 var_1679_to_fp16 = const()[name = string("op_1679_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_7_cast_fp16 = mul(x = var_1678_cast_fp16, y = var_1679_to_fp16)[name = string("attn_weights_7_cast_fp16")]; tensor attn_weights_9_cast_fp16 = add(x = attn_weights_7_cast_fp16, y = causal_mask)[name = string("attn_weights_9_cast_fp16")]; int32 var_1688 = const()[name = string("op_1688"), val = int32(-1)]; tensor var_1690_cast_fp16 = softmax(axis = var_1688, x = attn_weights_9_cast_fp16)[name = string("op_1690_cast_fp16")]; bool var_1706_transpose_x_0 = const()[name = string("op_1706_transpose_x_0"), val = bool(false)]; bool var_1706_transpose_y_0 = const()[name = string("op_1706_transpose_y_0"), val = bool(false)]; tensor var_1706_cast_fp16 = matmul(transpose_x = var_1706_transpose_x_0, transpose_y = var_1706_transpose_y_0, x = var_1690_cast_fp16, y = V_expanded_3_cast_fp16)[name = string("op_1706_cast_fp16")]; tensor var_1716 = const()[name = string("op_1716"), val = tensor([0, 2, 1, 3])]; tensor var_1723 = const()[name = string("op_1723"), val = tensor([1, 1, -1])]; tensor var_1717 = transpose(perm = var_1716, x = var_1706_cast_fp16)[name = string("transpose_150")]; tensor attn_output_9 = reshape(shape = var_1723, x = var_1717)[name = string("attn_output_9")]; tensor var_1728 = const()[name = string("op_1728"), val = tensor([0, 2, 1])]; tensor squeeze_1 = const()[name = string("squeeze_1"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522207232)))]; string var_1744_pad_type_0 = const()[name = string("op_1744_pad_type_0"), val = string("valid")]; int32 var_1744_groups_0 = const()[name = string("op_1744_groups_0"), val = int32(1)]; tensor var_1744_strides_0 = const()[name = string("op_1744_strides_0"), val = tensor([1])]; tensor var_1744_pad_0 = const()[name = string("op_1744_pad_0"), val = tensor([0, 0])]; tensor var_1744_dilations_0 = const()[name = string("op_1744_dilations_0"), val = tensor([1])]; tensor var_1729 = transpose(perm = var_1728, x = attn_output_9)[name = string("transpose_149")]; tensor var_1744 = conv(dilations = var_1744_dilations_0, groups = var_1744_groups_0, pad = var_1744_pad_0, pad_type = var_1744_pad_type_0, strides = var_1744_strides_0, weight = squeeze_1, x = var_1729)[name = string("op_1744")]; tensor var_1748 = const()[name = string("op_1748"), val = tensor([0, 2, 1])]; int32 var_1755 = const()[name = string("op_1755"), val = int32(-1)]; fp16 const_22_promoted_to_fp16 = const()[name = string("const_22_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_25 = transpose(perm = var_1748, x = var_1744)[name = string("transpose_148")]; tensor var_1761_cast_fp16 = mul(x = x_25, y = const_22_promoted_to_fp16)[name = string("op_1761_cast_fp16")]; bool input_31_interleave_0 = const()[name = string("input_31_interleave_0"), val = bool(false)]; tensor input_31_cast_fp16 = concat(axis = var_1755, interleave = input_31_interleave_0, values = (x_25, var_1761_cast_fp16))[name = string("input_31_cast_fp16")]; tensor normed_43_axes_0 = const()[name = string("normed_43_axes_0"), val = tensor([-1])]; fp16 var_1753_to_fp16 = const()[name = string("op_1753_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_43_cast_fp16 = layer_norm(axes = normed_43_axes_0, epsilon = var_1753_to_fp16, x = input_31_cast_fp16)[name = string("normed_43_cast_fp16")]; tensor var_1766_split_sizes_0 = const()[name = string("op_1766_split_sizes_0"), val = tensor([640, 640])]; int32 var_1766_axis_0 = const()[name = string("op_1766_axis_0"), val = int32(-1)]; tensor var_1766_cast_fp16_0, tensor var_1766_cast_fp16_1 = split(axis = var_1766_axis_0, split_sizes = var_1766_split_sizes_0, x = normed_43_cast_fp16)[name = string("op_1766_cast_fp16")]; tensor var_1770_to_fp16 = const()[name = string("op_1770_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523518016)))]; tensor out_19_cast_fp16 = mul(x = var_1766_cast_fp16_0, y = var_1770_to_fp16)[name = string("out_19_cast_fp16")]; tensor x_27_cast_fp16 = add(x = x_17_cast_fp16, y = out_19_cast_fp16)[name = string("x_27_cast_fp16")]; int32 var_1784 = const()[name = string("op_1784"), val = int32(-1)]; fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1790_cast_fp16 = mul(x = x_27_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_1790_cast_fp16")]; bool input_33_interleave_0 = const()[name = string("input_33_interleave_0"), val = bool(false)]; tensor input_33_cast_fp16 = concat(axis = var_1784, interleave = input_33_interleave_0, values = (x_27_cast_fp16, var_1790_cast_fp16))[name = string("input_33_cast_fp16")]; tensor normed_47_axes_0 = const()[name = string("normed_47_axes_0"), val = tensor([-1])]; fp16 var_1782_to_fp16 = const()[name = string("op_1782_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_47_cast_fp16 = layer_norm(axes = normed_47_axes_0, epsilon = var_1782_to_fp16, x = input_33_cast_fp16)[name = string("normed_47_cast_fp16")]; tensor var_1795_split_sizes_0 = const()[name = string("op_1795_split_sizes_0"), val = tensor([640, 640])]; int32 var_1795_axis_0 = const()[name = string("op_1795_axis_0"), val = int32(-1)]; tensor var_1795_cast_fp16_0, tensor var_1795_cast_fp16_1 = split(axis = var_1795_axis_0, split_sizes = var_1795_split_sizes_0, x = normed_47_cast_fp16)[name = string("op_1795_cast_fp16")]; tensor var_1799_to_fp16 = const()[name = string("op_1799_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523519360)))]; tensor out_21_cast_fp16 = mul(x = var_1795_cast_fp16_0, y = var_1799_to_fp16)[name = string("out_21_cast_fp16")]; tensor var_1813 = const()[name = string("op_1813"), val = tensor([0, 2, 1])]; tensor input_35_axes_0 = const()[name = string("input_35_axes_0"), val = tensor([2])]; tensor var_1814 = transpose(perm = var_1813, x = out_21_cast_fp16)[name = string("transpose_147")]; tensor input_35 = expand_dims(axes = input_35_axes_0, x = var_1814)[name = string("input_35")]; string gate_5_pad_type_0 = const()[name = string("gate_5_pad_type_0"), val = string("valid")]; tensor gate_5_strides_0 = const()[name = string("gate_5_strides_0"), val = tensor([1, 1])]; tensor gate_5_pad_0 = const()[name = string("gate_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_5_dilations_0 = const()[name = string("gate_5_dilations_0"), val = tensor([1, 1])]; int32 gate_5_groups_0 = const()[name = string("gate_5_groups_0"), val = int32(1)]; tensor gate_5 = conv(dilations = gate_5_dilations_0, groups = gate_5_groups_0, pad = gate_5_pad_0, pad_type = gate_5_pad_type_0, strides = gate_5_strides_0, weight = layers_1_mlp_gate_proj_weight, x = input_35)[name = string("gate_5")]; string up_3_pad_type_0 = const()[name = string("up_3_pad_type_0"), val = string("valid")]; tensor up_3_strides_0 = const()[name = string("up_3_strides_0"), val = tensor([1, 1])]; tensor up_3_pad_0 = const()[name = string("up_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_3_dilations_0 = const()[name = string("up_3_dilations_0"), val = tensor([1, 1])]; int32 up_3_groups_0 = const()[name = string("up_3_groups_0"), val = int32(1)]; tensor up_3 = conv(dilations = up_3_dilations_0, groups = up_3_groups_0, pad = up_3_pad_0, pad_type = up_3_pad_type_0, strides = up_3_strides_0, weight = layers_1_mlp_up_proj_weight, x = input_35)[name = string("up_3")]; string gate_7_mode_0 = const()[name = string("gate_7_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_7 = gelu(mode = gate_7_mode_0, x = gate_5)[name = string("gate_7")]; tensor input_37 = mul(x = gate_7, y = up_3)[name = string("input_37")]; string mlp_out_5_pad_type_0 = const()[name = string("mlp_out_5_pad_type_0"), val = string("valid")]; tensor mlp_out_5_strides_0 = const()[name = string("mlp_out_5_strides_0"), val = tensor([1, 1])]; tensor mlp_out_5_pad_0 = const()[name = string("mlp_out_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_5_dilations_0 = const()[name = string("mlp_out_5_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_5_groups_0 = const()[name = string("mlp_out_5_groups_0"), val = int32(1)]; tensor mlp_out_5 = conv(dilations = mlp_out_5_dilations_0, groups = mlp_out_5_groups_0, pad = mlp_out_5_pad_0, pad_type = mlp_out_5_pad_type_0, strides = mlp_out_5_strides_0, weight = layers_1_mlp_down_proj_weight, x = input_37)[name = string("mlp_out_5")]; tensor var_1854_axes_0 = const()[name = string("op_1854_axes_0"), val = tensor([2])]; tensor var_1854 = squeeze(axes = var_1854_axes_0, x = mlp_out_5)[name = string("op_1854")]; tensor var_1858 = const()[name = string("op_1858"), val = tensor([0, 2, 1])]; int32 var_1865 = const()[name = string("op_1865"), val = int32(-1)]; fp16 const_26_promoted_to_fp16 = const()[name = string("const_26_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_31 = transpose(perm = var_1858, x = var_1854)[name = string("transpose_146")]; tensor var_1871_cast_fp16 = mul(x = x_31, y = const_26_promoted_to_fp16)[name = string("op_1871_cast_fp16")]; bool input_39_interleave_0 = const()[name = string("input_39_interleave_0"), val = bool(false)]; tensor input_39_cast_fp16 = concat(axis = var_1865, interleave = input_39_interleave_0, values = (x_31, var_1871_cast_fp16))[name = string("input_39_cast_fp16")]; tensor normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor([-1])]; fp16 var_1863_to_fp16 = const()[name = string("op_1863_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_1863_to_fp16, x = input_39_cast_fp16)[name = string("normed_53_cast_fp16")]; tensor var_1876_split_sizes_0 = const()[name = string("op_1876_split_sizes_0"), val = tensor([640, 640])]; int32 var_1876_axis_0 = const()[name = string("op_1876_axis_0"), val = int32(-1)]; tensor var_1876_cast_fp16_0, tensor var_1876_cast_fp16_1 = split(axis = var_1876_axis_0, split_sizes = var_1876_split_sizes_0, x = normed_53_cast_fp16)[name = string("op_1876_cast_fp16")]; tensor var_1880_to_fp16 = const()[name = string("op_1880_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523520704)))]; tensor out_23_cast_fp16 = mul(x = var_1876_cast_fp16_0, y = var_1880_to_fp16)[name = string("out_23_cast_fp16")]; tensor x_33_cast_fp16 = add(x = x_27_cast_fp16, y = out_23_cast_fp16)[name = string("x_33_cast_fp16")]; int32 var_1894 = const()[name = string("op_1894"), val = int32(-1)]; fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1900_cast_fp16 = mul(x = x_33_cast_fp16, y = const_28_promoted_to_fp16)[name = string("op_1900_cast_fp16")]; bool input_41_interleave_0 = const()[name = string("input_41_interleave_0"), val = bool(false)]; tensor input_41_cast_fp16 = concat(axis = var_1894, interleave = input_41_interleave_0, values = (x_33_cast_fp16, var_1900_cast_fp16))[name = string("input_41_cast_fp16")]; tensor normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor([-1])]; fp16 var_1892_to_fp16 = const()[name = string("op_1892_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_1892_to_fp16, x = input_41_cast_fp16)[name = string("normed_57_cast_fp16")]; tensor var_1905_split_sizes_0 = const()[name = string("op_1905_split_sizes_0"), val = tensor([640, 640])]; int32 var_1905_axis_0 = const()[name = string("op_1905_axis_0"), val = int32(-1)]; tensor var_1905_cast_fp16_0, tensor var_1905_cast_fp16_1 = split(axis = var_1905_axis_0, split_sizes = var_1905_split_sizes_0, x = normed_57_cast_fp16)[name = string("op_1905_cast_fp16")]; tensor var_1909_to_fp16 = const()[name = string("op_1909_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523522048)))]; tensor out_25_cast_fp16 = mul(x = var_1905_cast_fp16_0, y = var_1909_to_fp16)[name = string("out_25_cast_fp16")]; tensor var_1923 = const()[name = string("op_1923"), val = tensor([0, 2, 1])]; tensor input_43_axes_0 = const()[name = string("input_43_axes_0"), val = tensor([2])]; tensor var_1924 = transpose(perm = var_1923, x = out_25_cast_fp16)[name = string("transpose_145")]; tensor input_43 = expand_dims(axes = input_43_axes_0, x = var_1924)[name = string("input_43")]; string var_1937_pad_type_0 = const()[name = string("op_1937_pad_type_0"), val = string("valid")]; tensor var_1937_strides_0 = const()[name = string("op_1937_strides_0"), val = tensor([1, 1])]; tensor var_1937_pad_0 = const()[name = string("op_1937_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1937_dilations_0 = const()[name = string("op_1937_dilations_0"), val = tensor([1, 1])]; int32 var_1937_groups_0 = const()[name = string("op_1937_groups_0"), val = int32(1)]; tensor var_1937 = conv(dilations = var_1937_dilations_0, groups = var_1937_groups_0, pad = var_1937_pad_0, pad_type = var_1937_pad_type_0, strides = var_1937_strides_0, weight = layers_2_self_attn_q_proj_weight, x = input_43)[name = string("op_1937")]; tensor var_1942 = const()[name = string("op_1942"), val = tensor([1, 4, 256, 1])]; tensor var_1943 = reshape(shape = var_1942, x = var_1937)[name = string("op_1943")]; tensor var_1948 = const()[name = string("op_1948"), val = tensor([0, 1, 3, 2])]; tensor var_1953 = const()[name = string("op_1953"), val = tensor([1, 4, 256])]; tensor q_13 = transpose(perm = var_1948, x = var_1943)[name = string("transpose_144")]; tensor x_37 = reshape(shape = var_1953, x = q_13)[name = string("x_37")]; int32 var_1960 = const()[name = string("op_1960"), val = int32(-1)]; fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1966_cast_fp16 = mul(x = x_37, y = const_30_promoted_to_fp16)[name = string("op_1966_cast_fp16")]; bool input_45_interleave_0 = const()[name = string("input_45_interleave_0"), val = bool(false)]; tensor input_45_cast_fp16 = concat(axis = var_1960, interleave = input_45_interleave_0, values = (x_37, var_1966_cast_fp16))[name = string("input_45_cast_fp16")]; tensor normed_63_axes_0 = const()[name = string("normed_63_axes_0"), val = tensor([-1])]; fp16 var_1958_to_fp16 = const()[name = string("op_1958_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_63_cast_fp16 = layer_norm(axes = normed_63_axes_0, epsilon = var_1958_to_fp16, x = input_45_cast_fp16)[name = string("normed_63_cast_fp16")]; tensor var_1971_split_sizes_0 = const()[name = string("op_1971_split_sizes_0"), val = tensor([256, 256])]; int32 var_1971_axis_0 = const()[name = string("op_1971_axis_0"), val = int32(-1)]; tensor var_1971_cast_fp16_0, tensor var_1971_cast_fp16_1 = split(axis = var_1971_axis_0, split_sizes = var_1971_split_sizes_0, x = normed_63_cast_fp16)[name = string("op_1971_cast_fp16")]; tensor var_1975_to_fp16 = const()[name = string("op_1975_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523523392)))]; tensor out_27_cast_fp16 = mul(x = var_1971_cast_fp16_0, y = var_1975_to_fp16)[name = string("out_27_cast_fp16")]; tensor var_1982 = const()[name = string("op_1982"), val = tensor([1, 4, 1, 256])]; tensor q_15 = reshape(shape = var_1982, x = out_27_cast_fp16)[name = string("q_15")]; string var_1994_pad_type_0 = const()[name = string("op_1994_pad_type_0"), val = string("valid")]; tensor var_1994_strides_0 = const()[name = string("op_1994_strides_0"), val = tensor([1, 1])]; tensor var_1994_pad_0 = const()[name = string("op_1994_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1994_dilations_0 = const()[name = string("op_1994_dilations_0"), val = tensor([1, 1])]; int32 var_1994_groups_0 = const()[name = string("op_1994_groups_0"), val = int32(1)]; tensor var_1994 = conv(dilations = var_1994_dilations_0, groups = var_1994_groups_0, pad = var_1994_pad_0, pad_type = var_1994_pad_type_0, strides = var_1994_strides_0, weight = layers_2_self_attn_k_proj_weight, x = input_43)[name = string("op_1994")]; tensor var_1999 = const()[name = string("op_1999"), val = tensor([1, 1, 256, 1])]; tensor var_2000 = reshape(shape = var_1999, x = var_1994)[name = string("op_2000")]; tensor var_2005 = const()[name = string("op_2005"), val = tensor([0, 1, 3, 2])]; tensor var_2010 = const()[name = string("op_2010"), val = tensor([1, 1, 256])]; tensor k_13 = transpose(perm = var_2005, x = var_2000)[name = string("transpose_143")]; tensor x_39 = reshape(shape = var_2010, x = k_13)[name = string("x_39")]; int32 var_2017 = const()[name = string("op_2017"), val = int32(-1)]; fp16 const_32_promoted_to_fp16 = const()[name = string("const_32_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2023_cast_fp16 = mul(x = x_39, y = const_32_promoted_to_fp16)[name = string("op_2023_cast_fp16")]; bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)]; tensor input_47_cast_fp16 = concat(axis = var_2017, interleave = input_47_interleave_0, values = (x_39, var_2023_cast_fp16))[name = string("input_47_cast_fp16")]; tensor normed_67_axes_0 = const()[name = string("normed_67_axes_0"), val = tensor([-1])]; fp16 var_2015_to_fp16 = const()[name = string("op_2015_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_67_cast_fp16 = layer_norm(axes = normed_67_axes_0, epsilon = var_2015_to_fp16, x = input_47_cast_fp16)[name = string("normed_67_cast_fp16")]; tensor var_2028_split_sizes_0 = const()[name = string("op_2028_split_sizes_0"), val = tensor([256, 256])]; int32 var_2028_axis_0 = const()[name = string("op_2028_axis_0"), val = int32(-1)]; tensor var_2028_cast_fp16_0, tensor var_2028_cast_fp16_1 = split(axis = var_2028_axis_0, split_sizes = var_2028_split_sizes_0, x = normed_67_cast_fp16)[name = string("op_2028_cast_fp16")]; tensor var_2032_to_fp16 = const()[name = string("op_2032_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523523968)))]; tensor out_29_cast_fp16 = mul(x = var_2028_cast_fp16_0, y = var_2032_to_fp16)[name = string("out_29_cast_fp16")]; tensor var_2039 = const()[name = string("op_2039"), val = tensor([1, 1, 1, 256])]; tensor k_15 = reshape(shape = var_2039, x = out_29_cast_fp16)[name = string("k_15")]; string var_2051_pad_type_0 = const()[name = string("op_2051_pad_type_0"), val = string("valid")]; tensor var_2051_strides_0 = const()[name = string("op_2051_strides_0"), val = tensor([1, 1])]; tensor var_2051_pad_0 = const()[name = string("op_2051_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2051_dilations_0 = const()[name = string("op_2051_dilations_0"), val = tensor([1, 1])]; int32 var_2051_groups_0 = const()[name = string("op_2051_groups_0"), val = int32(1)]; tensor var_2051 = conv(dilations = var_2051_dilations_0, groups = var_2051_groups_0, pad = var_2051_pad_0, pad_type = var_2051_pad_type_0, strides = var_2051_strides_0, weight = layers_2_self_attn_v_proj_weight, x = input_43)[name = string("op_2051")]; tensor var_2056 = const()[name = string("op_2056"), val = tensor([1, 1, 256, 1])]; tensor var_2057 = reshape(shape = var_2056, x = var_2051)[name = string("op_2057")]; tensor var_2062 = const()[name = string("op_2062"), val = tensor([0, 1, 3, 2])]; tensor var_2064 = mul(x = q_15, y = cos_1)[name = string("op_2064")]; tensor var_2065_split_sizes_0 = const()[name = string("op_2065_split_sizes_0"), val = tensor([128, 128])]; int32 var_2065_axis_0 = const()[name = string("op_2065_axis_0"), val = int32(-1)]; tensor var_2065_0, tensor var_2065_1 = split(axis = var_2065_axis_0, split_sizes = var_2065_split_sizes_0, x = q_15)[name = string("op_2065")]; fp16 const_34_promoted = const()[name = string("const_34_promoted"), val = fp16(-0x1p+0)]; tensor var_2067 = mul(x = var_2065_1, y = const_34_promoted)[name = string("op_2067")]; int32 var_2069 = const()[name = string("op_2069"), val = int32(-1)]; bool var_2070_interleave_0 = const()[name = string("op_2070_interleave_0"), val = bool(false)]; tensor var_2070 = concat(axis = var_2069, interleave = var_2070_interleave_0, values = (var_2067, var_2065_0))[name = string("op_2070")]; tensor var_2071 = mul(x = var_2070, y = sin_1)[name = string("op_2071")]; tensor q_17 = add(x = var_2064, y = var_2071)[name = string("q_17")]; tensor var_2074 = mul(x = k_15, y = cos_1)[name = string("op_2074")]; tensor var_2075_split_sizes_0 = const()[name = string("op_2075_split_sizes_0"), val = tensor([128, 128])]; int32 var_2075_axis_0 = const()[name = string("op_2075_axis_0"), val = int32(-1)]; tensor var_2075_0, tensor var_2075_1 = split(axis = var_2075_axis_0, split_sizes = var_2075_split_sizes_0, x = k_15)[name = string("op_2075")]; fp16 const_35_promoted = const()[name = string("const_35_promoted"), val = fp16(-0x1p+0)]; tensor var_2077 = mul(x = var_2075_1, y = const_35_promoted)[name = string("op_2077")]; int32 var_2079 = const()[name = string("op_2079"), val = int32(-1)]; bool var_2080_interleave_0 = const()[name = string("op_2080_interleave_0"), val = bool(false)]; tensor var_2080 = concat(axis = var_2079, interleave = var_2080_interleave_0, values = (var_2077, var_2075_0))[name = string("op_2080")]; tensor var_2081 = mul(x = var_2080, y = sin_1)[name = string("op_2081")]; tensor k_17 = add(x = var_2074, y = var_2081)[name = string("k_17")]; tensor var_2086_begin_0 = const()[name = string("op_2086_begin_0"), val = tensor([2, 0, 0, 0])]; tensor var_2086_end_0 = const()[name = string("op_2086_end_0"), val = tensor([3, 1, 2048, 256])]; tensor var_2086_end_mask_0 = const()[name = string("op_2086_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2086_squeeze_mask_0 = const()[name = string("op_2086_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_2086_cast_fp16 = slice_by_index(begin = var_2086_begin_0, end = var_2086_end_0, end_mask = var_2086_end_mask_0, squeeze_mask = var_2086_squeeze_mask_0, x = coreml_update_state_39)[name = string("op_2086_cast_fp16")]; tensor K_cache_5_axes_0 = const()[name = string("K_cache_5_axes_0"), val = tensor([0])]; tensor K_cache_5_cast_fp16 = expand_dims(axes = K_cache_5_axes_0, x = var_2086_cast_fp16)[name = string("K_cache_5_cast_fp16")]; tensor var_2091_begin_0 = const()[name = string("op_2091_begin_0"), val = tensor([20, 0, 0, 0])]; tensor var_2091_end_0 = const()[name = string("op_2091_end_0"), val = tensor([21, 1, 2048, 256])]; tensor var_2091_end_mask_0 = const()[name = string("op_2091_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2091_squeeze_mask_0 = const()[name = string("op_2091_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_2091_cast_fp16 = slice_by_index(begin = var_2091_begin_0, end = var_2091_end_0, end_mask = var_2091_end_mask_0, squeeze_mask = var_2091_squeeze_mask_0, x = coreml_update_state_39)[name = string("op_2091_cast_fp16")]; tensor V_cache_5_axes_0 = const()[name = string("V_cache_5_axes_0"), val = tensor([0])]; tensor V_cache_5_cast_fp16 = expand_dims(axes = V_cache_5_axes_0, x = var_2091_cast_fp16)[name = string("V_cache_5_cast_fp16")]; tensor k_broadcast_5_reps_0 = const()[name = string("k_broadcast_5_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor k_broadcast_5 = tile(reps = k_broadcast_5_reps_0, x = k_17)[name = string("k_broadcast_5")]; tensor v_broadcast_5_reps_0 = const()[name = string("v_broadcast_5_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor v_5 = transpose(perm = var_2062, x = var_2057)[name = string("transpose_142")]; tensor v_broadcast_5 = tile(reps = v_broadcast_5_reps_0, x = v_5)[name = string("v_broadcast_5")]; tensor var_2099_cast_fp16 = mul(x = K_cache_5_cast_fp16, y = var_1120_cast_fp16)[name = string("op_2099_cast_fp16")]; tensor var_2100_cast_fp16 = mul(x = k_broadcast_5, y = update_mask)[name = string("op_2100_cast_fp16")]; tensor K_new_5_cast_fp16 = add(x = var_2099_cast_fp16, y = var_2100_cast_fp16)[name = string("K_new_5_cast_fp16")]; tensor var_2106_cast_fp16 = mul(x = V_cache_5_cast_fp16, y = var_1120_cast_fp16)[name = string("op_2106_cast_fp16")]; tensor var_2107_cast_fp16 = mul(x = v_broadcast_5, y = update_mask)[name = string("op_2107_cast_fp16")]; tensor V_new_5_cast_fp16 = add(x = var_2106_cast_fp16, y = var_2107_cast_fp16)[name = string("V_new_5_cast_fp16")]; tensor var_2111_axes_0 = const()[name = string("op_2111_axes_0"), val = tensor([0])]; tensor var_2111_cast_fp16 = squeeze(axes = var_2111_axes_0, x = K_new_5_cast_fp16)[name = string("op_2111_cast_fp16")]; tensor concat_8 = const()[name = string("concat_8"), val = tensor([2, 0, 0, 0])]; tensor concat_9 = const()[name = string("concat_9"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_8, begin_mask = kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_9, end_mask = kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_5_stride_0, update = var_2111_cast_fp16, x = coreml_update_state_39)[name = string("kv_cache_0_internal_tensor_assign_5_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_5_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_40_write_state")]; tensor coreml_update_state_40 = read_state(input = kv_cache_0)[name = string("coreml_update_state_40")]; tensor var_2118_axes_0 = const()[name = string("op_2118_axes_0"), val = tensor([0])]; tensor var_2118_cast_fp16 = squeeze(axes = var_2118_axes_0, x = V_new_5_cast_fp16)[name = string("op_2118_cast_fp16")]; tensor concat_10 = const()[name = string("concat_10"), val = tensor([20, 0, 0, 0])]; tensor concat_11 = const()[name = string("concat_11"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_10, begin_mask = kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_11, end_mask = kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_6_stride_0, update = var_2118_cast_fp16, x = coreml_update_state_40)[name = string("kv_cache_0_internal_tensor_assign_6_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_6_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_41_write_state")]; tensor coreml_update_state_41 = read_state(input = kv_cache_0)[name = string("coreml_update_state_41")]; tensor hidden_states_19_axes_0 = const()[name = string("hidden_states_19_axes_0"), val = tensor([2])]; tensor hidden_states_19_cast_fp16 = expand_dims(axes = hidden_states_19_axes_0, x = K_new_5_cast_fp16)[name = string("hidden_states_19_cast_fp16")]; tensor var_2131 = const()[name = string("op_2131"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_21_cast_fp16 = tile(reps = var_2131, x = hidden_states_19_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; tensor var_2137 = const()[name = string("op_2137"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_5_cast_fp16 = reshape(shape = var_2137, x = hidden_states_21_cast_fp16)[name = string("K_expanded_5_cast_fp16")]; tensor hidden_states_23_axes_0 = const()[name = string("hidden_states_23_axes_0"), val = tensor([2])]; tensor hidden_states_23_cast_fp16 = expand_dims(axes = hidden_states_23_axes_0, x = V_new_5_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; tensor var_2146 = const()[name = string("op_2146"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_25_cast_fp16 = tile(reps = var_2146, x = hidden_states_23_cast_fp16)[name = string("hidden_states_25_cast_fp16")]; tensor var_2152 = const()[name = string("op_2152"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_5_cast_fp16 = reshape(shape = var_2152, x = hidden_states_25_cast_fp16)[name = string("V_expanded_5_cast_fp16")]; bool var_2167_transpose_x_1 = const()[name = string("op_2167_transpose_x_1"), val = bool(false)]; bool var_2167_transpose_y_1 = const()[name = string("op_2167_transpose_y_1"), val = bool(true)]; tensor var_2167_cast_fp16 = matmul(transpose_x = var_2167_transpose_x_1, transpose_y = var_2167_transpose_y_1, x = q_17, y = K_expanded_5_cast_fp16)[name = string("op_2167_cast_fp16")]; fp16 var_2168_to_fp16 = const()[name = string("op_2168_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_13_cast_fp16 = mul(x = var_2167_cast_fp16, y = var_2168_to_fp16)[name = string("attn_weights_13_cast_fp16")]; tensor attn_weights_15_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("attn_weights_15_cast_fp16")]; int32 var_2177 = const()[name = string("op_2177"), val = int32(-1)]; tensor var_2179_cast_fp16 = softmax(axis = var_2177, x = attn_weights_15_cast_fp16)[name = string("op_2179_cast_fp16")]; bool var_2195_transpose_x_0 = const()[name = string("op_2195_transpose_x_0"), val = bool(false)]; bool var_2195_transpose_y_0 = const()[name = string("op_2195_transpose_y_0"), val = bool(false)]; tensor var_2195_cast_fp16 = matmul(transpose_x = var_2195_transpose_x_0, transpose_y = var_2195_transpose_y_0, x = var_2179_cast_fp16, y = V_expanded_5_cast_fp16)[name = string("op_2195_cast_fp16")]; tensor var_2205 = const()[name = string("op_2205"), val = tensor([0, 2, 1, 3])]; tensor var_2212 = const()[name = string("op_2212"), val = tensor([1, 1, -1])]; tensor var_2206 = transpose(perm = var_2205, x = var_2195_cast_fp16)[name = string("transpose_141")]; tensor attn_output_15 = reshape(shape = var_2212, x = var_2206)[name = string("attn_output_15")]; tensor var_2217 = const()[name = string("op_2217"), val = tensor([0, 2, 1])]; tensor squeeze_2 = const()[name = string("squeeze_2"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523524544)))]; string var_2233_pad_type_0 = const()[name = string("op_2233_pad_type_0"), val = string("valid")]; int32 var_2233_groups_0 = const()[name = string("op_2233_groups_0"), val = int32(1)]; tensor var_2233_strides_0 = const()[name = string("op_2233_strides_0"), val = tensor([1])]; tensor var_2233_pad_0 = const()[name = string("op_2233_pad_0"), val = tensor([0, 0])]; tensor var_2233_dilations_0 = const()[name = string("op_2233_dilations_0"), val = tensor([1])]; tensor var_2218 = transpose(perm = var_2217, x = attn_output_15)[name = string("transpose_140")]; tensor var_2233 = conv(dilations = var_2233_dilations_0, groups = var_2233_groups_0, pad = var_2233_pad_0, pad_type = var_2233_pad_type_0, strides = var_2233_strides_0, weight = squeeze_2, x = var_2218)[name = string("op_2233")]; tensor var_2237 = const()[name = string("op_2237"), val = tensor([0, 2, 1])]; int32 var_2244 = const()[name = string("op_2244"), val = int32(-1)]; fp16 const_36_promoted_to_fp16 = const()[name = string("const_36_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_41 = transpose(perm = var_2237, x = var_2233)[name = string("transpose_139")]; tensor var_2250_cast_fp16 = mul(x = x_41, y = const_36_promoted_to_fp16)[name = string("op_2250_cast_fp16")]; bool input_51_interleave_0 = const()[name = string("input_51_interleave_0"), val = bool(false)]; tensor input_51_cast_fp16 = concat(axis = var_2244, interleave = input_51_interleave_0, values = (x_41, var_2250_cast_fp16))[name = string("input_51_cast_fp16")]; tensor normed_71_axes_0 = const()[name = string("normed_71_axes_0"), val = tensor([-1])]; fp16 var_2242_to_fp16 = const()[name = string("op_2242_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_71_cast_fp16 = layer_norm(axes = normed_71_axes_0, epsilon = var_2242_to_fp16, x = input_51_cast_fp16)[name = string("normed_71_cast_fp16")]; tensor var_2255_split_sizes_0 = const()[name = string("op_2255_split_sizes_0"), val = tensor([640, 640])]; int32 var_2255_axis_0 = const()[name = string("op_2255_axis_0"), val = int32(-1)]; tensor var_2255_cast_fp16_0, tensor var_2255_cast_fp16_1 = split(axis = var_2255_axis_0, split_sizes = var_2255_split_sizes_0, x = normed_71_cast_fp16)[name = string("op_2255_cast_fp16")]; tensor var_2259_to_fp16 = const()[name = string("op_2259_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524835328)))]; tensor out_31_cast_fp16 = mul(x = var_2255_cast_fp16_0, y = var_2259_to_fp16)[name = string("out_31_cast_fp16")]; tensor x_43_cast_fp16 = add(x = x_33_cast_fp16, y = out_31_cast_fp16)[name = string("x_43_cast_fp16")]; int32 var_2273 = const()[name = string("op_2273"), val = int32(-1)]; fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2279_cast_fp16 = mul(x = x_43_cast_fp16, y = const_38_promoted_to_fp16)[name = string("op_2279_cast_fp16")]; bool input_53_interleave_0 = const()[name = string("input_53_interleave_0"), val = bool(false)]; tensor input_53_cast_fp16 = concat(axis = var_2273, interleave = input_53_interleave_0, values = (x_43_cast_fp16, var_2279_cast_fp16))[name = string("input_53_cast_fp16")]; tensor normed_75_axes_0 = const()[name = string("normed_75_axes_0"), val = tensor([-1])]; fp16 var_2271_to_fp16 = const()[name = string("op_2271_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_75_cast_fp16 = layer_norm(axes = normed_75_axes_0, epsilon = var_2271_to_fp16, x = input_53_cast_fp16)[name = string("normed_75_cast_fp16")]; tensor var_2284_split_sizes_0 = const()[name = string("op_2284_split_sizes_0"), val = tensor([640, 640])]; int32 var_2284_axis_0 = const()[name = string("op_2284_axis_0"), val = int32(-1)]; tensor var_2284_cast_fp16_0, tensor var_2284_cast_fp16_1 = split(axis = var_2284_axis_0, split_sizes = var_2284_split_sizes_0, x = normed_75_cast_fp16)[name = string("op_2284_cast_fp16")]; tensor var_2288_to_fp16 = const()[name = string("op_2288_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524836672)))]; tensor out_33_cast_fp16 = mul(x = var_2284_cast_fp16_0, y = var_2288_to_fp16)[name = string("out_33_cast_fp16")]; tensor var_2302 = const()[name = string("op_2302"), val = tensor([0, 2, 1])]; tensor input_55_axes_0 = const()[name = string("input_55_axes_0"), val = tensor([2])]; tensor var_2303 = transpose(perm = var_2302, x = out_33_cast_fp16)[name = string("transpose_138")]; tensor input_55 = expand_dims(axes = input_55_axes_0, x = var_2303)[name = string("input_55")]; string gate_9_pad_type_0 = const()[name = string("gate_9_pad_type_0"), val = string("valid")]; tensor gate_9_strides_0 = const()[name = string("gate_9_strides_0"), val = tensor([1, 1])]; tensor gate_9_pad_0 = const()[name = string("gate_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_9_dilations_0 = const()[name = string("gate_9_dilations_0"), val = tensor([1, 1])]; int32 gate_9_groups_0 = const()[name = string("gate_9_groups_0"), val = int32(1)]; tensor gate_9 = conv(dilations = gate_9_dilations_0, groups = gate_9_groups_0, pad = gate_9_pad_0, pad_type = gate_9_pad_type_0, strides = gate_9_strides_0, weight = layers_2_mlp_gate_proj_weight, x = input_55)[name = string("gate_9")]; string up_5_pad_type_0 = const()[name = string("up_5_pad_type_0"), val = string("valid")]; tensor up_5_strides_0 = const()[name = string("up_5_strides_0"), val = tensor([1, 1])]; tensor up_5_pad_0 = const()[name = string("up_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_5_dilations_0 = const()[name = string("up_5_dilations_0"), val = tensor([1, 1])]; int32 up_5_groups_0 = const()[name = string("up_5_groups_0"), val = int32(1)]; tensor up_5 = conv(dilations = up_5_dilations_0, groups = up_5_groups_0, pad = up_5_pad_0, pad_type = up_5_pad_type_0, strides = up_5_strides_0, weight = layers_2_mlp_up_proj_weight, x = input_55)[name = string("up_5")]; string gate_11_mode_0 = const()[name = string("gate_11_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_11 = gelu(mode = gate_11_mode_0, x = gate_9)[name = string("gate_11")]; tensor input_57 = mul(x = gate_11, y = up_5)[name = string("input_57")]; string mlp_out_9_pad_type_0 = const()[name = string("mlp_out_9_pad_type_0"), val = string("valid")]; tensor mlp_out_9_strides_0 = const()[name = string("mlp_out_9_strides_0"), val = tensor([1, 1])]; tensor mlp_out_9_pad_0 = const()[name = string("mlp_out_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_9_dilations_0 = const()[name = string("mlp_out_9_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_9_groups_0 = const()[name = string("mlp_out_9_groups_0"), val = int32(1)]; tensor mlp_out_9 = conv(dilations = mlp_out_9_dilations_0, groups = mlp_out_9_groups_0, pad = mlp_out_9_pad_0, pad_type = mlp_out_9_pad_type_0, strides = mlp_out_9_strides_0, weight = layers_2_mlp_down_proj_weight, x = input_57)[name = string("mlp_out_9")]; tensor var_2343_axes_0 = const()[name = string("op_2343_axes_0"), val = tensor([2])]; tensor var_2343 = squeeze(axes = var_2343_axes_0, x = mlp_out_9)[name = string("op_2343")]; tensor var_2347 = const()[name = string("op_2347"), val = tensor([0, 2, 1])]; int32 var_2354 = const()[name = string("op_2354"), val = int32(-1)]; fp16 const_40_promoted_to_fp16 = const()[name = string("const_40_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_47 = transpose(perm = var_2347, x = var_2343)[name = string("transpose_137")]; tensor var_2360_cast_fp16 = mul(x = x_47, y = const_40_promoted_to_fp16)[name = string("op_2360_cast_fp16")]; bool input_59_interleave_0 = const()[name = string("input_59_interleave_0"), val = bool(false)]; tensor input_59_cast_fp16 = concat(axis = var_2354, interleave = input_59_interleave_0, values = (x_47, var_2360_cast_fp16))[name = string("input_59_cast_fp16")]; tensor normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor([-1])]; fp16 var_2352_to_fp16 = const()[name = string("op_2352_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_2352_to_fp16, x = input_59_cast_fp16)[name = string("normed_81_cast_fp16")]; tensor var_2365_split_sizes_0 = const()[name = string("op_2365_split_sizes_0"), val = tensor([640, 640])]; int32 var_2365_axis_0 = const()[name = string("op_2365_axis_0"), val = int32(-1)]; tensor var_2365_cast_fp16_0, tensor var_2365_cast_fp16_1 = split(axis = var_2365_axis_0, split_sizes = var_2365_split_sizes_0, x = normed_81_cast_fp16)[name = string("op_2365_cast_fp16")]; tensor var_2369_to_fp16 = const()[name = string("op_2369_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524838016)))]; tensor out_35_cast_fp16 = mul(x = var_2365_cast_fp16_0, y = var_2369_to_fp16)[name = string("out_35_cast_fp16")]; tensor x_49_cast_fp16 = add(x = x_43_cast_fp16, y = out_35_cast_fp16)[name = string("x_49_cast_fp16")]; int32 var_2383 = const()[name = string("op_2383"), val = int32(-1)]; fp16 const_42_promoted_to_fp16 = const()[name = string("const_42_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2389_cast_fp16 = mul(x = x_49_cast_fp16, y = const_42_promoted_to_fp16)[name = string("op_2389_cast_fp16")]; bool input_61_interleave_0 = const()[name = string("input_61_interleave_0"), val = bool(false)]; tensor input_61_cast_fp16 = concat(axis = var_2383, interleave = input_61_interleave_0, values = (x_49_cast_fp16, var_2389_cast_fp16))[name = string("input_61_cast_fp16")]; tensor normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor([-1])]; fp16 var_2381_to_fp16 = const()[name = string("op_2381_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_2381_to_fp16, x = input_61_cast_fp16)[name = string("normed_85_cast_fp16")]; tensor var_2394_split_sizes_0 = const()[name = string("op_2394_split_sizes_0"), val = tensor([640, 640])]; int32 var_2394_axis_0 = const()[name = string("op_2394_axis_0"), val = int32(-1)]; tensor var_2394_cast_fp16_0, tensor var_2394_cast_fp16_1 = split(axis = var_2394_axis_0, split_sizes = var_2394_split_sizes_0, x = normed_85_cast_fp16)[name = string("op_2394_cast_fp16")]; tensor var_2398_to_fp16 = const()[name = string("op_2398_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524839360)))]; tensor out_37_cast_fp16 = mul(x = var_2394_cast_fp16_0, y = var_2398_to_fp16)[name = string("out_37_cast_fp16")]; tensor var_2412 = const()[name = string("op_2412"), val = tensor([0, 2, 1])]; tensor input_63_axes_0 = const()[name = string("input_63_axes_0"), val = tensor([2])]; tensor var_2413 = transpose(perm = var_2412, x = out_37_cast_fp16)[name = string("transpose_136")]; tensor input_63 = expand_dims(axes = input_63_axes_0, x = var_2413)[name = string("input_63")]; string var_2426_pad_type_0 = const()[name = string("op_2426_pad_type_0"), val = string("valid")]; tensor var_2426_strides_0 = const()[name = string("op_2426_strides_0"), val = tensor([1, 1])]; tensor var_2426_pad_0 = const()[name = string("op_2426_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2426_dilations_0 = const()[name = string("op_2426_dilations_0"), val = tensor([1, 1])]; int32 var_2426_groups_0 = const()[name = string("op_2426_groups_0"), val = int32(1)]; tensor var_2426 = conv(dilations = var_2426_dilations_0, groups = var_2426_groups_0, pad = var_2426_pad_0, pad_type = var_2426_pad_type_0, strides = var_2426_strides_0, weight = layers_3_self_attn_q_proj_weight, x = input_63)[name = string("op_2426")]; tensor var_2431 = const()[name = string("op_2431"), val = tensor([1, 4, 256, 1])]; tensor var_2432 = reshape(shape = var_2431, x = var_2426)[name = string("op_2432")]; tensor var_2437 = const()[name = string("op_2437"), val = tensor([0, 1, 3, 2])]; tensor var_2442 = const()[name = string("op_2442"), val = tensor([1, 4, 256])]; tensor q_19 = transpose(perm = var_2437, x = var_2432)[name = string("transpose_135")]; tensor x_53 = reshape(shape = var_2442, x = q_19)[name = string("x_53")]; int32 var_2449 = const()[name = string("op_2449"), val = int32(-1)]; fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2455_cast_fp16 = mul(x = x_53, y = const_44_promoted_to_fp16)[name = string("op_2455_cast_fp16")]; bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)]; tensor input_65_cast_fp16 = concat(axis = var_2449, interleave = input_65_interleave_0, values = (x_53, var_2455_cast_fp16))[name = string("input_65_cast_fp16")]; tensor normed_91_axes_0 = const()[name = string("normed_91_axes_0"), val = tensor([-1])]; fp16 var_2447_to_fp16 = const()[name = string("op_2447_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_91_cast_fp16 = layer_norm(axes = normed_91_axes_0, epsilon = var_2447_to_fp16, x = input_65_cast_fp16)[name = string("normed_91_cast_fp16")]; tensor var_2460_split_sizes_0 = const()[name = string("op_2460_split_sizes_0"), val = tensor([256, 256])]; int32 var_2460_axis_0 = const()[name = string("op_2460_axis_0"), val = int32(-1)]; tensor var_2460_cast_fp16_0, tensor var_2460_cast_fp16_1 = split(axis = var_2460_axis_0, split_sizes = var_2460_split_sizes_0, x = normed_91_cast_fp16)[name = string("op_2460_cast_fp16")]; tensor var_2464_to_fp16 = const()[name = string("op_2464_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524840704)))]; tensor out_39_cast_fp16 = mul(x = var_2460_cast_fp16_0, y = var_2464_to_fp16)[name = string("out_39_cast_fp16")]; tensor var_2471 = const()[name = string("op_2471"), val = tensor([1, 4, 1, 256])]; tensor q_21 = reshape(shape = var_2471, x = out_39_cast_fp16)[name = string("q_21")]; string var_2483_pad_type_0 = const()[name = string("op_2483_pad_type_0"), val = string("valid")]; tensor var_2483_strides_0 = const()[name = string("op_2483_strides_0"), val = tensor([1, 1])]; tensor var_2483_pad_0 = const()[name = string("op_2483_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2483_dilations_0 = const()[name = string("op_2483_dilations_0"), val = tensor([1, 1])]; int32 var_2483_groups_0 = const()[name = string("op_2483_groups_0"), val = int32(1)]; tensor var_2483 = conv(dilations = var_2483_dilations_0, groups = var_2483_groups_0, pad = var_2483_pad_0, pad_type = var_2483_pad_type_0, strides = var_2483_strides_0, weight = layers_3_self_attn_k_proj_weight, x = input_63)[name = string("op_2483")]; tensor var_2488 = const()[name = string("op_2488"), val = tensor([1, 1, 256, 1])]; tensor var_2489 = reshape(shape = var_2488, x = var_2483)[name = string("op_2489")]; tensor var_2494 = const()[name = string("op_2494"), val = tensor([0, 1, 3, 2])]; tensor var_2499 = const()[name = string("op_2499"), val = tensor([1, 1, 256])]; tensor k_19 = transpose(perm = var_2494, x = var_2489)[name = string("transpose_134")]; tensor x_55 = reshape(shape = var_2499, x = k_19)[name = string("x_55")]; int32 var_2506 = const()[name = string("op_2506"), val = int32(-1)]; fp16 const_46_promoted_to_fp16 = const()[name = string("const_46_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2512_cast_fp16 = mul(x = x_55, y = const_46_promoted_to_fp16)[name = string("op_2512_cast_fp16")]; bool input_67_interleave_0 = const()[name = string("input_67_interleave_0"), val = bool(false)]; tensor input_67_cast_fp16 = concat(axis = var_2506, interleave = input_67_interleave_0, values = (x_55, var_2512_cast_fp16))[name = string("input_67_cast_fp16")]; tensor normed_95_axes_0 = const()[name = string("normed_95_axes_0"), val = tensor([-1])]; fp16 var_2504_to_fp16 = const()[name = string("op_2504_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_95_cast_fp16 = layer_norm(axes = normed_95_axes_0, epsilon = var_2504_to_fp16, x = input_67_cast_fp16)[name = string("normed_95_cast_fp16")]; tensor var_2517_split_sizes_0 = const()[name = string("op_2517_split_sizes_0"), val = tensor([256, 256])]; int32 var_2517_axis_0 = const()[name = string("op_2517_axis_0"), val = int32(-1)]; tensor var_2517_cast_fp16_0, tensor var_2517_cast_fp16_1 = split(axis = var_2517_axis_0, split_sizes = var_2517_split_sizes_0, x = normed_95_cast_fp16)[name = string("op_2517_cast_fp16")]; tensor var_2521_to_fp16 = const()[name = string("op_2521_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524841280)))]; tensor out_41_cast_fp16 = mul(x = var_2517_cast_fp16_0, y = var_2521_to_fp16)[name = string("out_41_cast_fp16")]; tensor var_2528 = const()[name = string("op_2528"), val = tensor([1, 1, 1, 256])]; tensor k_21 = reshape(shape = var_2528, x = out_41_cast_fp16)[name = string("k_21")]; string var_2540_pad_type_0 = const()[name = string("op_2540_pad_type_0"), val = string("valid")]; tensor var_2540_strides_0 = const()[name = string("op_2540_strides_0"), val = tensor([1, 1])]; tensor var_2540_pad_0 = const()[name = string("op_2540_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2540_dilations_0 = const()[name = string("op_2540_dilations_0"), val = tensor([1, 1])]; int32 var_2540_groups_0 = const()[name = string("op_2540_groups_0"), val = int32(1)]; tensor var_2540 = conv(dilations = var_2540_dilations_0, groups = var_2540_groups_0, pad = var_2540_pad_0, pad_type = var_2540_pad_type_0, strides = var_2540_strides_0, weight = layers_3_self_attn_v_proj_weight, x = input_63)[name = string("op_2540")]; tensor var_2545 = const()[name = string("op_2545"), val = tensor([1, 1, 256, 1])]; tensor var_2546 = reshape(shape = var_2545, x = var_2540)[name = string("op_2546")]; tensor var_2551 = const()[name = string("op_2551"), val = tensor([0, 1, 3, 2])]; tensor var_2553 = mul(x = q_21, y = cos_1)[name = string("op_2553")]; tensor var_2554_split_sizes_0 = const()[name = string("op_2554_split_sizes_0"), val = tensor([128, 128])]; int32 var_2554_axis_0 = const()[name = string("op_2554_axis_0"), val = int32(-1)]; tensor var_2554_0, tensor var_2554_1 = split(axis = var_2554_axis_0, split_sizes = var_2554_split_sizes_0, x = q_21)[name = string("op_2554")]; fp16 const_48_promoted = const()[name = string("const_48_promoted"), val = fp16(-0x1p+0)]; tensor var_2556 = mul(x = var_2554_1, y = const_48_promoted)[name = string("op_2556")]; int32 var_2558 = const()[name = string("op_2558"), val = int32(-1)]; bool var_2559_interleave_0 = const()[name = string("op_2559_interleave_0"), val = bool(false)]; tensor var_2559 = concat(axis = var_2558, interleave = var_2559_interleave_0, values = (var_2556, var_2554_0))[name = string("op_2559")]; tensor var_2560 = mul(x = var_2559, y = sin_1)[name = string("op_2560")]; tensor q_23 = add(x = var_2553, y = var_2560)[name = string("q_23")]; tensor var_2563 = mul(x = k_21, y = cos_1)[name = string("op_2563")]; tensor var_2564_split_sizes_0 = const()[name = string("op_2564_split_sizes_0"), val = tensor([128, 128])]; int32 var_2564_axis_0 = const()[name = string("op_2564_axis_0"), val = int32(-1)]; tensor var_2564_0, tensor var_2564_1 = split(axis = var_2564_axis_0, split_sizes = var_2564_split_sizes_0, x = k_21)[name = string("op_2564")]; fp16 const_49_promoted = const()[name = string("const_49_promoted"), val = fp16(-0x1p+0)]; tensor var_2566 = mul(x = var_2564_1, y = const_49_promoted)[name = string("op_2566")]; int32 var_2568 = const()[name = string("op_2568"), val = int32(-1)]; bool var_2569_interleave_0 = const()[name = string("op_2569_interleave_0"), val = bool(false)]; tensor var_2569 = concat(axis = var_2568, interleave = var_2569_interleave_0, values = (var_2566, var_2564_0))[name = string("op_2569")]; tensor var_2570 = mul(x = var_2569, y = sin_1)[name = string("op_2570")]; tensor k_23 = add(x = var_2563, y = var_2570)[name = string("k_23")]; tensor var_2575_begin_0 = const()[name = string("op_2575_begin_0"), val = tensor([3, 0, 0, 0])]; tensor var_2575_end_0 = const()[name = string("op_2575_end_0"), val = tensor([4, 1, 2048, 256])]; tensor var_2575_end_mask_0 = const()[name = string("op_2575_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2575_squeeze_mask_0 = const()[name = string("op_2575_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_2575_cast_fp16 = slice_by_index(begin = var_2575_begin_0, end = var_2575_end_0, end_mask = var_2575_end_mask_0, squeeze_mask = var_2575_squeeze_mask_0, x = coreml_update_state_41)[name = string("op_2575_cast_fp16")]; tensor K_cache_7_axes_0 = const()[name = string("K_cache_7_axes_0"), val = tensor([0])]; tensor K_cache_7_cast_fp16 = expand_dims(axes = K_cache_7_axes_0, x = var_2575_cast_fp16)[name = string("K_cache_7_cast_fp16")]; tensor var_2580_begin_0 = const()[name = string("op_2580_begin_0"), val = tensor([21, 0, 0, 0])]; tensor var_2580_end_0 = const()[name = string("op_2580_end_0"), val = tensor([22, 1, 2048, 256])]; tensor var_2580_end_mask_0 = const()[name = string("op_2580_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2580_squeeze_mask_0 = const()[name = string("op_2580_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_2580_cast_fp16 = slice_by_index(begin = var_2580_begin_0, end = var_2580_end_0, end_mask = var_2580_end_mask_0, squeeze_mask = var_2580_squeeze_mask_0, x = coreml_update_state_41)[name = string("op_2580_cast_fp16")]; tensor V_cache_7_axes_0 = const()[name = string("V_cache_7_axes_0"), val = tensor([0])]; tensor V_cache_7_cast_fp16 = expand_dims(axes = V_cache_7_axes_0, x = var_2580_cast_fp16)[name = string("V_cache_7_cast_fp16")]; tensor k_broadcast_7_reps_0 = const()[name = string("k_broadcast_7_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor k_broadcast_7 = tile(reps = k_broadcast_7_reps_0, x = k_23)[name = string("k_broadcast_7")]; tensor v_broadcast_7_reps_0 = const()[name = string("v_broadcast_7_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor v_7 = transpose(perm = var_2551, x = var_2546)[name = string("transpose_133")]; tensor v_broadcast_7 = tile(reps = v_broadcast_7_reps_0, x = v_7)[name = string("v_broadcast_7")]; tensor var_2588_cast_fp16 = mul(x = K_cache_7_cast_fp16, y = var_1120_cast_fp16)[name = string("op_2588_cast_fp16")]; tensor var_2589_cast_fp16 = mul(x = k_broadcast_7, y = update_mask)[name = string("op_2589_cast_fp16")]; tensor K_new_7_cast_fp16 = add(x = var_2588_cast_fp16, y = var_2589_cast_fp16)[name = string("K_new_7_cast_fp16")]; tensor var_2595_cast_fp16 = mul(x = V_cache_7_cast_fp16, y = var_1120_cast_fp16)[name = string("op_2595_cast_fp16")]; tensor var_2596_cast_fp16 = mul(x = v_broadcast_7, y = update_mask)[name = string("op_2596_cast_fp16")]; tensor V_new_7_cast_fp16 = add(x = var_2595_cast_fp16, y = var_2596_cast_fp16)[name = string("V_new_7_cast_fp16")]; tensor var_2600_axes_0 = const()[name = string("op_2600_axes_0"), val = tensor([0])]; tensor var_2600_cast_fp16 = squeeze(axes = var_2600_axes_0, x = K_new_7_cast_fp16)[name = string("op_2600_cast_fp16")]; tensor concat_12 = const()[name = string("concat_12"), val = tensor([3, 0, 0, 0])]; tensor concat_13 = const()[name = string("concat_13"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_12, begin_mask = kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_13, end_mask = kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_7_stride_0, update = var_2600_cast_fp16, x = coreml_update_state_41)[name = string("kv_cache_0_internal_tensor_assign_7_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_7_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_42_write_state")]; tensor coreml_update_state_42 = read_state(input = kv_cache_0)[name = string("coreml_update_state_42")]; tensor var_2607_axes_0 = const()[name = string("op_2607_axes_0"), val = tensor([0])]; tensor var_2607_cast_fp16 = squeeze(axes = var_2607_axes_0, x = V_new_7_cast_fp16)[name = string("op_2607_cast_fp16")]; tensor concat_14 = const()[name = string("concat_14"), val = tensor([21, 0, 0, 0])]; tensor concat_15 = const()[name = string("concat_15"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_14, begin_mask = kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_15, end_mask = kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_8_stride_0, update = var_2607_cast_fp16, x = coreml_update_state_42)[name = string("kv_cache_0_internal_tensor_assign_8_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_8_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_43_write_state")]; tensor coreml_update_state_43 = read_state(input = kv_cache_0)[name = string("coreml_update_state_43")]; tensor hidden_states_27_axes_0 = const()[name = string("hidden_states_27_axes_0"), val = tensor([2])]; tensor hidden_states_27_cast_fp16 = expand_dims(axes = hidden_states_27_axes_0, x = K_new_7_cast_fp16)[name = string("hidden_states_27_cast_fp16")]; tensor var_2620 = const()[name = string("op_2620"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_29_cast_fp16 = tile(reps = var_2620, x = hidden_states_27_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; tensor var_2626 = const()[name = string("op_2626"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_7_cast_fp16 = reshape(shape = var_2626, x = hidden_states_29_cast_fp16)[name = string("K_expanded_7_cast_fp16")]; tensor hidden_states_31_axes_0 = const()[name = string("hidden_states_31_axes_0"), val = tensor([2])]; tensor hidden_states_31_cast_fp16 = expand_dims(axes = hidden_states_31_axes_0, x = V_new_7_cast_fp16)[name = string("hidden_states_31_cast_fp16")]; tensor var_2635 = const()[name = string("op_2635"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_33_cast_fp16 = tile(reps = var_2635, x = hidden_states_31_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; tensor var_2641 = const()[name = string("op_2641"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_7_cast_fp16 = reshape(shape = var_2641, x = hidden_states_33_cast_fp16)[name = string("V_expanded_7_cast_fp16")]; bool var_2656_transpose_x_1 = const()[name = string("op_2656_transpose_x_1"), val = bool(false)]; bool var_2656_transpose_y_1 = const()[name = string("op_2656_transpose_y_1"), val = bool(true)]; tensor var_2656_cast_fp16 = matmul(transpose_x = var_2656_transpose_x_1, transpose_y = var_2656_transpose_y_1, x = q_23, y = K_expanded_7_cast_fp16)[name = string("op_2656_cast_fp16")]; fp16 var_2657_to_fp16 = const()[name = string("op_2657_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_19_cast_fp16 = mul(x = var_2656_cast_fp16, y = var_2657_to_fp16)[name = string("attn_weights_19_cast_fp16")]; tensor attn_weights_21_cast_fp16 = add(x = attn_weights_19_cast_fp16, y = causal_mask)[name = string("attn_weights_21_cast_fp16")]; int32 var_2666 = const()[name = string("op_2666"), val = int32(-1)]; tensor var_2668_cast_fp16 = softmax(axis = var_2666, x = attn_weights_21_cast_fp16)[name = string("op_2668_cast_fp16")]; bool var_2684_transpose_x_0 = const()[name = string("op_2684_transpose_x_0"), val = bool(false)]; bool var_2684_transpose_y_0 = const()[name = string("op_2684_transpose_y_0"), val = bool(false)]; tensor var_2684_cast_fp16 = matmul(transpose_x = var_2684_transpose_x_0, transpose_y = var_2684_transpose_y_0, x = var_2668_cast_fp16, y = V_expanded_7_cast_fp16)[name = string("op_2684_cast_fp16")]; tensor var_2694 = const()[name = string("op_2694"), val = tensor([0, 2, 1, 3])]; tensor var_2701 = const()[name = string("op_2701"), val = tensor([1, 1, -1])]; tensor var_2695 = transpose(perm = var_2694, x = var_2684_cast_fp16)[name = string("transpose_132")]; tensor attn_output_21 = reshape(shape = var_2701, x = var_2695)[name = string("attn_output_21")]; tensor var_2706 = const()[name = string("op_2706"), val = tensor([0, 2, 1])]; tensor squeeze_3 = const()[name = string("squeeze_3"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524841856)))]; string var_2722_pad_type_0 = const()[name = string("op_2722_pad_type_0"), val = string("valid")]; int32 var_2722_groups_0 = const()[name = string("op_2722_groups_0"), val = int32(1)]; tensor var_2722_strides_0 = const()[name = string("op_2722_strides_0"), val = tensor([1])]; tensor var_2722_pad_0 = const()[name = string("op_2722_pad_0"), val = tensor([0, 0])]; tensor var_2722_dilations_0 = const()[name = string("op_2722_dilations_0"), val = tensor([1])]; tensor var_2707 = transpose(perm = var_2706, x = attn_output_21)[name = string("transpose_131")]; tensor var_2722 = conv(dilations = var_2722_dilations_0, groups = var_2722_groups_0, pad = var_2722_pad_0, pad_type = var_2722_pad_type_0, strides = var_2722_strides_0, weight = squeeze_3, x = var_2707)[name = string("op_2722")]; tensor var_2726 = const()[name = string("op_2726"), val = tensor([0, 2, 1])]; int32 var_2733 = const()[name = string("op_2733"), val = int32(-1)]; fp16 const_50_promoted_to_fp16 = const()[name = string("const_50_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_57 = transpose(perm = var_2726, x = var_2722)[name = string("transpose_130")]; tensor var_2739_cast_fp16 = mul(x = x_57, y = const_50_promoted_to_fp16)[name = string("op_2739_cast_fp16")]; bool input_71_interleave_0 = const()[name = string("input_71_interleave_0"), val = bool(false)]; tensor input_71_cast_fp16 = concat(axis = var_2733, interleave = input_71_interleave_0, values = (x_57, var_2739_cast_fp16))[name = string("input_71_cast_fp16")]; tensor normed_99_axes_0 = const()[name = string("normed_99_axes_0"), val = tensor([-1])]; fp16 var_2731_to_fp16 = const()[name = string("op_2731_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_99_cast_fp16 = layer_norm(axes = normed_99_axes_0, epsilon = var_2731_to_fp16, x = input_71_cast_fp16)[name = string("normed_99_cast_fp16")]; tensor var_2744_split_sizes_0 = const()[name = string("op_2744_split_sizes_0"), val = tensor([640, 640])]; int32 var_2744_axis_0 = const()[name = string("op_2744_axis_0"), val = int32(-1)]; tensor var_2744_cast_fp16_0, tensor var_2744_cast_fp16_1 = split(axis = var_2744_axis_0, split_sizes = var_2744_split_sizes_0, x = normed_99_cast_fp16)[name = string("op_2744_cast_fp16")]; tensor var_2748_to_fp16 = const()[name = string("op_2748_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(526152640)))]; tensor out_43_cast_fp16 = mul(x = var_2744_cast_fp16_0, y = var_2748_to_fp16)[name = string("out_43_cast_fp16")]; tensor x_59_cast_fp16 = add(x = x_49_cast_fp16, y = out_43_cast_fp16)[name = string("x_59_cast_fp16")]; int32 var_2762 = const()[name = string("op_2762"), val = int32(-1)]; fp16 const_52_promoted_to_fp16 = const()[name = string("const_52_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2768_cast_fp16 = mul(x = x_59_cast_fp16, y = const_52_promoted_to_fp16)[name = string("op_2768_cast_fp16")]; bool input_73_interleave_0 = const()[name = string("input_73_interleave_0"), val = bool(false)]; tensor input_73_cast_fp16 = concat(axis = var_2762, interleave = input_73_interleave_0, values = (x_59_cast_fp16, var_2768_cast_fp16))[name = string("input_73_cast_fp16")]; tensor normed_103_axes_0 = const()[name = string("normed_103_axes_0"), val = tensor([-1])]; fp16 var_2760_to_fp16 = const()[name = string("op_2760_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_103_cast_fp16 = layer_norm(axes = normed_103_axes_0, epsilon = var_2760_to_fp16, x = input_73_cast_fp16)[name = string("normed_103_cast_fp16")]; tensor var_2773_split_sizes_0 = const()[name = string("op_2773_split_sizes_0"), val = tensor([640, 640])]; int32 var_2773_axis_0 = const()[name = string("op_2773_axis_0"), val = int32(-1)]; tensor var_2773_cast_fp16_0, tensor var_2773_cast_fp16_1 = split(axis = var_2773_axis_0, split_sizes = var_2773_split_sizes_0, x = normed_103_cast_fp16)[name = string("op_2773_cast_fp16")]; tensor var_2777_to_fp16 = const()[name = string("op_2777_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(526153984)))]; tensor out_45_cast_fp16 = mul(x = var_2773_cast_fp16_0, y = var_2777_to_fp16)[name = string("out_45_cast_fp16")]; tensor var_2791 = const()[name = string("op_2791"), val = tensor([0, 2, 1])]; tensor input_75_axes_0 = const()[name = string("input_75_axes_0"), val = tensor([2])]; tensor var_2792 = transpose(perm = var_2791, x = out_45_cast_fp16)[name = string("transpose_129")]; tensor input_75 = expand_dims(axes = input_75_axes_0, x = var_2792)[name = string("input_75")]; string gate_13_pad_type_0 = const()[name = string("gate_13_pad_type_0"), val = string("valid")]; tensor gate_13_strides_0 = const()[name = string("gate_13_strides_0"), val = tensor([1, 1])]; tensor gate_13_pad_0 = const()[name = string("gate_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_13_dilations_0 = const()[name = string("gate_13_dilations_0"), val = tensor([1, 1])]; int32 gate_13_groups_0 = const()[name = string("gate_13_groups_0"), val = int32(1)]; tensor gate_13 = conv(dilations = gate_13_dilations_0, groups = gate_13_groups_0, pad = gate_13_pad_0, pad_type = gate_13_pad_type_0, strides = gate_13_strides_0, weight = layers_3_mlp_gate_proj_weight, x = input_75)[name = string("gate_13")]; string up_7_pad_type_0 = const()[name = string("up_7_pad_type_0"), val = string("valid")]; tensor up_7_strides_0 = const()[name = string("up_7_strides_0"), val = tensor([1, 1])]; tensor up_7_pad_0 = const()[name = string("up_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_7_dilations_0 = const()[name = string("up_7_dilations_0"), val = tensor([1, 1])]; int32 up_7_groups_0 = const()[name = string("up_7_groups_0"), val = int32(1)]; tensor up_7 = conv(dilations = up_7_dilations_0, groups = up_7_groups_0, pad = up_7_pad_0, pad_type = up_7_pad_type_0, strides = up_7_strides_0, weight = layers_3_mlp_up_proj_weight, x = input_75)[name = string("up_7")]; string gate_15_mode_0 = const()[name = string("gate_15_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_15 = gelu(mode = gate_15_mode_0, x = gate_13)[name = string("gate_15")]; tensor input_77 = mul(x = gate_15, y = up_7)[name = string("input_77")]; string mlp_out_13_pad_type_0 = const()[name = string("mlp_out_13_pad_type_0"), val = string("valid")]; tensor mlp_out_13_strides_0 = const()[name = string("mlp_out_13_strides_0"), val = tensor([1, 1])]; tensor mlp_out_13_pad_0 = const()[name = string("mlp_out_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_13_dilations_0 = const()[name = string("mlp_out_13_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_13_groups_0 = const()[name = string("mlp_out_13_groups_0"), val = int32(1)]; tensor mlp_out_13 = conv(dilations = mlp_out_13_dilations_0, groups = mlp_out_13_groups_0, pad = mlp_out_13_pad_0, pad_type = mlp_out_13_pad_type_0, strides = mlp_out_13_strides_0, weight = layers_3_mlp_down_proj_weight, x = input_77)[name = string("mlp_out_13")]; tensor var_2832_axes_0 = const()[name = string("op_2832_axes_0"), val = tensor([2])]; tensor var_2832 = squeeze(axes = var_2832_axes_0, x = mlp_out_13)[name = string("op_2832")]; tensor var_2836 = const()[name = string("op_2836"), val = tensor([0, 2, 1])]; int32 var_2843 = const()[name = string("op_2843"), val = int32(-1)]; fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_63 = transpose(perm = var_2836, x = var_2832)[name = string("transpose_128")]; tensor var_2849_cast_fp16 = mul(x = x_63, y = const_54_promoted_to_fp16)[name = string("op_2849_cast_fp16")]; bool input_79_interleave_0 = const()[name = string("input_79_interleave_0"), val = bool(false)]; tensor input_79_cast_fp16 = concat(axis = var_2843, interleave = input_79_interleave_0, values = (x_63, var_2849_cast_fp16))[name = string("input_79_cast_fp16")]; tensor normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor([-1])]; fp16 var_2841_to_fp16 = const()[name = string("op_2841_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_2841_to_fp16, x = input_79_cast_fp16)[name = string("normed_109_cast_fp16")]; tensor var_2854_split_sizes_0 = const()[name = string("op_2854_split_sizes_0"), val = tensor([640, 640])]; int32 var_2854_axis_0 = const()[name = string("op_2854_axis_0"), val = int32(-1)]; tensor var_2854_cast_fp16_0, tensor var_2854_cast_fp16_1 = split(axis = var_2854_axis_0, split_sizes = var_2854_split_sizes_0, x = normed_109_cast_fp16)[name = string("op_2854_cast_fp16")]; tensor var_2858_to_fp16 = const()[name = string("op_2858_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(526155328)))]; tensor out_47_cast_fp16 = mul(x = var_2854_cast_fp16_0, y = var_2858_to_fp16)[name = string("out_47_cast_fp16")]; tensor x_65_cast_fp16 = add(x = x_59_cast_fp16, y = out_47_cast_fp16)[name = string("x_65_cast_fp16")]; int32 var_2872 = const()[name = string("op_2872"), val = int32(-1)]; fp16 const_56_promoted_to_fp16 = const()[name = string("const_56_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2878_cast_fp16 = mul(x = x_65_cast_fp16, y = const_56_promoted_to_fp16)[name = string("op_2878_cast_fp16")]; bool input_81_interleave_0 = const()[name = string("input_81_interleave_0"), val = bool(false)]; tensor input_81_cast_fp16 = concat(axis = var_2872, interleave = input_81_interleave_0, values = (x_65_cast_fp16, var_2878_cast_fp16))[name = string("input_81_cast_fp16")]; tensor normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor([-1])]; fp16 var_2870_to_fp16 = const()[name = string("op_2870_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_2870_to_fp16, x = input_81_cast_fp16)[name = string("normed_113_cast_fp16")]; tensor var_2883_split_sizes_0 = const()[name = string("op_2883_split_sizes_0"), val = tensor([640, 640])]; int32 var_2883_axis_0 = const()[name = string("op_2883_axis_0"), val = int32(-1)]; tensor var_2883_cast_fp16_0, tensor var_2883_cast_fp16_1 = split(axis = var_2883_axis_0, split_sizes = var_2883_split_sizes_0, x = normed_113_cast_fp16)[name = string("op_2883_cast_fp16")]; tensor var_2887_to_fp16 = const()[name = string("op_2887_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(526156672)))]; tensor out_49_cast_fp16 = mul(x = var_2883_cast_fp16_0, y = var_2887_to_fp16)[name = string("out_49_cast_fp16")]; tensor var_2901 = const()[name = string("op_2901"), val = tensor([0, 2, 1])]; tensor input_83_axes_0 = const()[name = string("input_83_axes_0"), val = tensor([2])]; tensor var_2902 = transpose(perm = var_2901, x = out_49_cast_fp16)[name = string("transpose_127")]; tensor input_83 = expand_dims(axes = input_83_axes_0, x = var_2902)[name = string("input_83")]; string var_2915_pad_type_0 = const()[name = string("op_2915_pad_type_0"), val = string("valid")]; tensor var_2915_strides_0 = const()[name = string("op_2915_strides_0"), val = tensor([1, 1])]; tensor var_2915_pad_0 = const()[name = string("op_2915_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2915_dilations_0 = const()[name = string("op_2915_dilations_0"), val = tensor([1, 1])]; int32 var_2915_groups_0 = const()[name = string("op_2915_groups_0"), val = int32(1)]; tensor var_2915 = conv(dilations = var_2915_dilations_0, groups = var_2915_groups_0, pad = var_2915_pad_0, pad_type = var_2915_pad_type_0, strides = var_2915_strides_0, weight = layers_4_self_attn_q_proj_weight, x = input_83)[name = string("op_2915")]; tensor var_2920 = const()[name = string("op_2920"), val = tensor([1, 4, 256, 1])]; tensor var_2921 = reshape(shape = var_2920, x = var_2915)[name = string("op_2921")]; tensor var_2926 = const()[name = string("op_2926"), val = tensor([0, 1, 3, 2])]; tensor var_2931 = const()[name = string("op_2931"), val = tensor([1, 4, 256])]; tensor q_25 = transpose(perm = var_2926, x = var_2921)[name = string("transpose_126")]; tensor x_69 = reshape(shape = var_2931, x = q_25)[name = string("x_69")]; int32 var_2938 = const()[name = string("op_2938"), val = int32(-1)]; fp16 const_58_promoted_to_fp16 = const()[name = string("const_58_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2944_cast_fp16 = mul(x = x_69, y = const_58_promoted_to_fp16)[name = string("op_2944_cast_fp16")]; bool input_85_interleave_0 = const()[name = string("input_85_interleave_0"), val = bool(false)]; tensor input_85_cast_fp16 = concat(axis = var_2938, interleave = input_85_interleave_0, values = (x_69, var_2944_cast_fp16))[name = string("input_85_cast_fp16")]; tensor normed_119_axes_0 = const()[name = string("normed_119_axes_0"), val = tensor([-1])]; fp16 var_2936_to_fp16 = const()[name = string("op_2936_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_119_cast_fp16 = layer_norm(axes = normed_119_axes_0, epsilon = var_2936_to_fp16, x = input_85_cast_fp16)[name = string("normed_119_cast_fp16")]; tensor var_2949_split_sizes_0 = const()[name = string("op_2949_split_sizes_0"), val = tensor([256, 256])]; int32 var_2949_axis_0 = const()[name = string("op_2949_axis_0"), val = int32(-1)]; tensor var_2949_cast_fp16_0, tensor var_2949_cast_fp16_1 = split(axis = var_2949_axis_0, split_sizes = var_2949_split_sizes_0, x = normed_119_cast_fp16)[name = string("op_2949_cast_fp16")]; tensor var_2953_to_fp16 = const()[name = string("op_2953_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(526158016)))]; tensor out_51_cast_fp16 = mul(x = var_2949_cast_fp16_0, y = var_2953_to_fp16)[name = string("out_51_cast_fp16")]; tensor var_2960 = const()[name = string("op_2960"), val = tensor([1, 4, 1, 256])]; tensor q_27 = reshape(shape = var_2960, x = out_51_cast_fp16)[name = string("q_27")]; string var_2972_pad_type_0 = const()[name = string("op_2972_pad_type_0"), val = string("valid")]; tensor var_2972_strides_0 = const()[name = string("op_2972_strides_0"), val = tensor([1, 1])]; tensor var_2972_pad_0 = const()[name = string("op_2972_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2972_dilations_0 = const()[name = string("op_2972_dilations_0"), val = tensor([1, 1])]; int32 var_2972_groups_0 = const()[name = string("op_2972_groups_0"), val = int32(1)]; tensor var_2972 = conv(dilations = var_2972_dilations_0, groups = var_2972_groups_0, pad = var_2972_pad_0, pad_type = var_2972_pad_type_0, strides = var_2972_strides_0, weight = layers_4_self_attn_k_proj_weight, x = input_83)[name = string("op_2972")]; tensor var_2977 = const()[name = string("op_2977"), val = tensor([1, 1, 256, 1])]; tensor var_2978 = reshape(shape = var_2977, x = var_2972)[name = string("op_2978")]; tensor var_2983 = const()[name = string("op_2983"), val = tensor([0, 1, 3, 2])]; tensor var_2988 = const()[name = string("op_2988"), val = tensor([1, 1, 256])]; tensor k_25 = transpose(perm = var_2983, x = var_2978)[name = string("transpose_125")]; tensor x_71 = reshape(shape = var_2988, x = k_25)[name = string("x_71")]; int32 var_2995 = const()[name = string("op_2995"), val = int32(-1)]; fp16 const_60_promoted_to_fp16 = const()[name = string("const_60_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3001_cast_fp16 = mul(x = x_71, y = const_60_promoted_to_fp16)[name = string("op_3001_cast_fp16")]; bool input_87_interleave_0 = const()[name = string("input_87_interleave_0"), val = bool(false)]; tensor input_87_cast_fp16 = concat(axis = var_2995, interleave = input_87_interleave_0, values = (x_71, var_3001_cast_fp16))[name = string("input_87_cast_fp16")]; tensor normed_123_axes_0 = const()[name = string("normed_123_axes_0"), val = tensor([-1])]; fp16 var_2993_to_fp16 = const()[name = string("op_2993_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_123_cast_fp16 = layer_norm(axes = normed_123_axes_0, epsilon = var_2993_to_fp16, x = input_87_cast_fp16)[name = string("normed_123_cast_fp16")]; tensor var_3006_split_sizes_0 = const()[name = string("op_3006_split_sizes_0"), val = tensor([256, 256])]; int32 var_3006_axis_0 = const()[name = string("op_3006_axis_0"), val = int32(-1)]; tensor var_3006_cast_fp16_0, tensor var_3006_cast_fp16_1 = split(axis = var_3006_axis_0, split_sizes = var_3006_split_sizes_0, x = normed_123_cast_fp16)[name = string("op_3006_cast_fp16")]; tensor var_3010_to_fp16 = const()[name = string("op_3010_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(526158592)))]; tensor out_53_cast_fp16 = mul(x = var_3006_cast_fp16_0, y = var_3010_to_fp16)[name = string("out_53_cast_fp16")]; tensor var_3017 = const()[name = string("op_3017"), val = tensor([1, 1, 1, 256])]; tensor k_27 = reshape(shape = var_3017, x = out_53_cast_fp16)[name = string("k_27")]; string var_3029_pad_type_0 = const()[name = string("op_3029_pad_type_0"), val = string("valid")]; tensor var_3029_strides_0 = const()[name = string("op_3029_strides_0"), val = tensor([1, 1])]; tensor var_3029_pad_0 = const()[name = string("op_3029_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3029_dilations_0 = const()[name = string("op_3029_dilations_0"), val = tensor([1, 1])]; int32 var_3029_groups_0 = const()[name = string("op_3029_groups_0"), val = int32(1)]; tensor var_3029 = conv(dilations = var_3029_dilations_0, groups = var_3029_groups_0, pad = var_3029_pad_0, pad_type = var_3029_pad_type_0, strides = var_3029_strides_0, weight = layers_4_self_attn_v_proj_weight, x = input_83)[name = string("op_3029")]; tensor var_3034 = const()[name = string("op_3034"), val = tensor([1, 1, 256, 1])]; tensor var_3035 = reshape(shape = var_3034, x = var_3029)[name = string("op_3035")]; tensor var_3040 = const()[name = string("op_3040"), val = tensor([0, 1, 3, 2])]; tensor var_3042 = mul(x = q_27, y = cos_1)[name = string("op_3042")]; tensor var_3043_split_sizes_0 = const()[name = string("op_3043_split_sizes_0"), val = tensor([128, 128])]; int32 var_3043_axis_0 = const()[name = string("op_3043_axis_0"), val = int32(-1)]; tensor var_3043_0, tensor var_3043_1 = split(axis = var_3043_axis_0, split_sizes = var_3043_split_sizes_0, x = q_27)[name = string("op_3043")]; fp16 const_62_promoted = const()[name = string("const_62_promoted"), val = fp16(-0x1p+0)]; tensor var_3045 = mul(x = var_3043_1, y = const_62_promoted)[name = string("op_3045")]; int32 var_3047 = const()[name = string("op_3047"), val = int32(-1)]; bool var_3048_interleave_0 = const()[name = string("op_3048_interleave_0"), val = bool(false)]; tensor var_3048 = concat(axis = var_3047, interleave = var_3048_interleave_0, values = (var_3045, var_3043_0))[name = string("op_3048")]; tensor var_3049 = mul(x = var_3048, y = sin_1)[name = string("op_3049")]; tensor q_29 = add(x = var_3042, y = var_3049)[name = string("q_29")]; tensor var_3052 = mul(x = k_27, y = cos_1)[name = string("op_3052")]; tensor var_3053_split_sizes_0 = const()[name = string("op_3053_split_sizes_0"), val = tensor([128, 128])]; int32 var_3053_axis_0 = const()[name = string("op_3053_axis_0"), val = int32(-1)]; tensor var_3053_0, tensor var_3053_1 = split(axis = var_3053_axis_0, split_sizes = var_3053_split_sizes_0, x = k_27)[name = string("op_3053")]; fp16 const_63_promoted = const()[name = string("const_63_promoted"), val = fp16(-0x1p+0)]; tensor var_3055 = mul(x = var_3053_1, y = const_63_promoted)[name = string("op_3055")]; int32 var_3057 = const()[name = string("op_3057"), val = int32(-1)]; bool var_3058_interleave_0 = const()[name = string("op_3058_interleave_0"), val = bool(false)]; tensor var_3058 = concat(axis = var_3057, interleave = var_3058_interleave_0, values = (var_3055, var_3053_0))[name = string("op_3058")]; tensor var_3059 = mul(x = var_3058, y = sin_1)[name = string("op_3059")]; tensor k_29 = add(x = var_3052, y = var_3059)[name = string("k_29")]; tensor var_3064_begin_0 = const()[name = string("op_3064_begin_0"), val = tensor([4, 0, 0, 0])]; tensor var_3064_end_0 = const()[name = string("op_3064_end_0"), val = tensor([5, 1, 2048, 256])]; tensor var_3064_end_mask_0 = const()[name = string("op_3064_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3064_squeeze_mask_0 = const()[name = string("op_3064_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_3064_cast_fp16 = slice_by_index(begin = var_3064_begin_0, end = var_3064_end_0, end_mask = var_3064_end_mask_0, squeeze_mask = var_3064_squeeze_mask_0, x = coreml_update_state_43)[name = string("op_3064_cast_fp16")]; tensor K_cache_9_axes_0 = const()[name = string("K_cache_9_axes_0"), val = tensor([0])]; tensor K_cache_9_cast_fp16 = expand_dims(axes = K_cache_9_axes_0, x = var_3064_cast_fp16)[name = string("K_cache_9_cast_fp16")]; tensor var_3069_begin_0 = const()[name = string("op_3069_begin_0"), val = tensor([22, 0, 0, 0])]; tensor var_3069_end_0 = const()[name = string("op_3069_end_0"), val = tensor([23, 1, 2048, 256])]; tensor var_3069_end_mask_0 = const()[name = string("op_3069_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3069_squeeze_mask_0 = const()[name = string("op_3069_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_3069_cast_fp16 = slice_by_index(begin = var_3069_begin_0, end = var_3069_end_0, end_mask = var_3069_end_mask_0, squeeze_mask = var_3069_squeeze_mask_0, x = coreml_update_state_43)[name = string("op_3069_cast_fp16")]; tensor V_cache_9_axes_0 = const()[name = string("V_cache_9_axes_0"), val = tensor([0])]; tensor V_cache_9_cast_fp16 = expand_dims(axes = V_cache_9_axes_0, x = var_3069_cast_fp16)[name = string("V_cache_9_cast_fp16")]; tensor k_broadcast_9_reps_0 = const()[name = string("k_broadcast_9_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor k_broadcast_9 = tile(reps = k_broadcast_9_reps_0, x = k_29)[name = string("k_broadcast_9")]; tensor v_broadcast_9_reps_0 = const()[name = string("v_broadcast_9_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor v_9 = transpose(perm = var_3040, x = var_3035)[name = string("transpose_124")]; tensor v_broadcast_9 = tile(reps = v_broadcast_9_reps_0, x = v_9)[name = string("v_broadcast_9")]; tensor var_3077_cast_fp16 = mul(x = K_cache_9_cast_fp16, y = var_1120_cast_fp16)[name = string("op_3077_cast_fp16")]; tensor var_3078_cast_fp16 = mul(x = k_broadcast_9, y = update_mask)[name = string("op_3078_cast_fp16")]; tensor K_new_9_cast_fp16 = add(x = var_3077_cast_fp16, y = var_3078_cast_fp16)[name = string("K_new_9_cast_fp16")]; tensor var_3084_cast_fp16 = mul(x = V_cache_9_cast_fp16, y = var_1120_cast_fp16)[name = string("op_3084_cast_fp16")]; tensor var_3085_cast_fp16 = mul(x = v_broadcast_9, y = update_mask)[name = string("op_3085_cast_fp16")]; tensor V_new_9_cast_fp16 = add(x = var_3084_cast_fp16, y = var_3085_cast_fp16)[name = string("V_new_9_cast_fp16")]; tensor var_3089_axes_0 = const()[name = string("op_3089_axes_0"), val = tensor([0])]; tensor var_3089_cast_fp16 = squeeze(axes = var_3089_axes_0, x = K_new_9_cast_fp16)[name = string("op_3089_cast_fp16")]; tensor concat_16 = const()[name = string("concat_16"), val = tensor([4, 0, 0, 0])]; tensor concat_17 = const()[name = string("concat_17"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_16, begin_mask = kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_17, end_mask = kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_9_stride_0, update = var_3089_cast_fp16, x = coreml_update_state_43)[name = string("kv_cache_0_internal_tensor_assign_9_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_9_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_44_write_state")]; tensor coreml_update_state_44 = read_state(input = kv_cache_0)[name = string("coreml_update_state_44")]; tensor var_3096_axes_0 = const()[name = string("op_3096_axes_0"), val = tensor([0])]; tensor var_3096_cast_fp16 = squeeze(axes = var_3096_axes_0, x = V_new_9_cast_fp16)[name = string("op_3096_cast_fp16")]; tensor concat_18 = const()[name = string("concat_18"), val = tensor([22, 0, 0, 0])]; tensor concat_19 = const()[name = string("concat_19"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_18, begin_mask = kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_19, end_mask = kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_10_stride_0, update = var_3096_cast_fp16, x = coreml_update_state_44)[name = string("kv_cache_0_internal_tensor_assign_10_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_10_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_45_write_state")]; tensor coreml_update_state_45 = read_state(input = kv_cache_0)[name = string("coreml_update_state_45")]; tensor hidden_states_35_axes_0 = const()[name = string("hidden_states_35_axes_0"), val = tensor([2])]; tensor hidden_states_35_cast_fp16 = expand_dims(axes = hidden_states_35_axes_0, x = K_new_9_cast_fp16)[name = string("hidden_states_35_cast_fp16")]; tensor var_3109 = const()[name = string("op_3109"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_37_cast_fp16 = tile(reps = var_3109, x = hidden_states_35_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; tensor var_3115 = const()[name = string("op_3115"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_9_cast_fp16 = reshape(shape = var_3115, x = hidden_states_37_cast_fp16)[name = string("K_expanded_9_cast_fp16")]; tensor hidden_states_39_axes_0 = const()[name = string("hidden_states_39_axes_0"), val = tensor([2])]; tensor hidden_states_39_cast_fp16 = expand_dims(axes = hidden_states_39_axes_0, x = V_new_9_cast_fp16)[name = string("hidden_states_39_cast_fp16")]; tensor var_3124 = const()[name = string("op_3124"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_41_cast_fp16 = tile(reps = var_3124, x = hidden_states_39_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; tensor var_3130 = const()[name = string("op_3130"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_9_cast_fp16 = reshape(shape = var_3130, x = hidden_states_41_cast_fp16)[name = string("V_expanded_9_cast_fp16")]; bool var_3145_transpose_x_1 = const()[name = string("op_3145_transpose_x_1"), val = bool(false)]; bool var_3145_transpose_y_1 = const()[name = string("op_3145_transpose_y_1"), val = bool(true)]; tensor var_3145_cast_fp16 = matmul(transpose_x = var_3145_transpose_x_1, transpose_y = var_3145_transpose_y_1, x = q_29, y = K_expanded_9_cast_fp16)[name = string("op_3145_cast_fp16")]; fp16 var_3146_to_fp16 = const()[name = string("op_3146_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_25_cast_fp16 = mul(x = var_3145_cast_fp16, y = var_3146_to_fp16)[name = string("attn_weights_25_cast_fp16")]; tensor attn_weights_27_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask)[name = string("attn_weights_27_cast_fp16")]; int32 var_3155 = const()[name = string("op_3155"), val = int32(-1)]; tensor var_3157_cast_fp16 = softmax(axis = var_3155, x = attn_weights_27_cast_fp16)[name = string("op_3157_cast_fp16")]; bool var_3173_transpose_x_0 = const()[name = string("op_3173_transpose_x_0"), val = bool(false)]; bool var_3173_transpose_y_0 = const()[name = string("op_3173_transpose_y_0"), val = bool(false)]; tensor var_3173_cast_fp16 = matmul(transpose_x = var_3173_transpose_x_0, transpose_y = var_3173_transpose_y_0, x = var_3157_cast_fp16, y = V_expanded_9_cast_fp16)[name = string("op_3173_cast_fp16")]; tensor var_3183 = const()[name = string("op_3183"), val = tensor([0, 2, 1, 3])]; tensor var_3190 = const()[name = string("op_3190"), val = tensor([1, 1, -1])]; tensor var_3184 = transpose(perm = var_3183, x = var_3173_cast_fp16)[name = string("transpose_123")]; tensor attn_output_27 = reshape(shape = var_3190, x = var_3184)[name = string("attn_output_27")]; tensor var_3195 = const()[name = string("op_3195"), val = tensor([0, 2, 1])]; tensor squeeze_4 = const()[name = string("squeeze_4"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(526159168)))]; string var_3211_pad_type_0 = const()[name = string("op_3211_pad_type_0"), val = string("valid")]; int32 var_3211_groups_0 = const()[name = string("op_3211_groups_0"), val = int32(1)]; tensor var_3211_strides_0 = const()[name = string("op_3211_strides_0"), val = tensor([1])]; tensor var_3211_pad_0 = const()[name = string("op_3211_pad_0"), val = tensor([0, 0])]; tensor var_3211_dilations_0 = const()[name = string("op_3211_dilations_0"), val = tensor([1])]; tensor var_3196 = transpose(perm = var_3195, x = attn_output_27)[name = string("transpose_122")]; tensor var_3211 = conv(dilations = var_3211_dilations_0, groups = var_3211_groups_0, pad = var_3211_pad_0, pad_type = var_3211_pad_type_0, strides = var_3211_strides_0, weight = squeeze_4, x = var_3196)[name = string("op_3211")]; tensor var_3215 = const()[name = string("op_3215"), val = tensor([0, 2, 1])]; int32 var_3222 = const()[name = string("op_3222"), val = int32(-1)]; fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_73 = transpose(perm = var_3215, x = var_3211)[name = string("transpose_121")]; tensor var_3228_cast_fp16 = mul(x = x_73, y = const_64_promoted_to_fp16)[name = string("op_3228_cast_fp16")]; bool input_91_interleave_0 = const()[name = string("input_91_interleave_0"), val = bool(false)]; tensor input_91_cast_fp16 = concat(axis = var_3222, interleave = input_91_interleave_0, values = (x_73, var_3228_cast_fp16))[name = string("input_91_cast_fp16")]; tensor normed_127_axes_0 = const()[name = string("normed_127_axes_0"), val = tensor([-1])]; fp16 var_3220_to_fp16 = const()[name = string("op_3220_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_127_cast_fp16 = layer_norm(axes = normed_127_axes_0, epsilon = var_3220_to_fp16, x = input_91_cast_fp16)[name = string("normed_127_cast_fp16")]; tensor var_3233_split_sizes_0 = const()[name = string("op_3233_split_sizes_0"), val = tensor([640, 640])]; int32 var_3233_axis_0 = const()[name = string("op_3233_axis_0"), val = int32(-1)]; tensor var_3233_cast_fp16_0, tensor var_3233_cast_fp16_1 = split(axis = var_3233_axis_0, split_sizes = var_3233_split_sizes_0, x = normed_127_cast_fp16)[name = string("op_3233_cast_fp16")]; tensor var_3237_to_fp16 = const()[name = string("op_3237_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(527469952)))]; tensor out_55_cast_fp16 = mul(x = var_3233_cast_fp16_0, y = var_3237_to_fp16)[name = string("out_55_cast_fp16")]; tensor x_75_cast_fp16 = add(x = x_65_cast_fp16, y = out_55_cast_fp16)[name = string("x_75_cast_fp16")]; int32 var_3251 = const()[name = string("op_3251"), val = int32(-1)]; fp16 const_66_promoted_to_fp16 = const()[name = string("const_66_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3257_cast_fp16 = mul(x = x_75_cast_fp16, y = const_66_promoted_to_fp16)[name = string("op_3257_cast_fp16")]; bool input_93_interleave_0 = const()[name = string("input_93_interleave_0"), val = bool(false)]; tensor input_93_cast_fp16 = concat(axis = var_3251, interleave = input_93_interleave_0, values = (x_75_cast_fp16, var_3257_cast_fp16))[name = string("input_93_cast_fp16")]; tensor normed_131_axes_0 = const()[name = string("normed_131_axes_0"), val = tensor([-1])]; fp16 var_3249_to_fp16 = const()[name = string("op_3249_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_131_cast_fp16 = layer_norm(axes = normed_131_axes_0, epsilon = var_3249_to_fp16, x = input_93_cast_fp16)[name = string("normed_131_cast_fp16")]; tensor var_3262_split_sizes_0 = const()[name = string("op_3262_split_sizes_0"), val = tensor([640, 640])]; int32 var_3262_axis_0 = const()[name = string("op_3262_axis_0"), val = int32(-1)]; tensor var_3262_cast_fp16_0, tensor var_3262_cast_fp16_1 = split(axis = var_3262_axis_0, split_sizes = var_3262_split_sizes_0, x = normed_131_cast_fp16)[name = string("op_3262_cast_fp16")]; tensor var_3266_to_fp16 = const()[name = string("op_3266_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(527471296)))]; tensor out_57_cast_fp16 = mul(x = var_3262_cast_fp16_0, y = var_3266_to_fp16)[name = string("out_57_cast_fp16")]; tensor var_3280 = const()[name = string("op_3280"), val = tensor([0, 2, 1])]; tensor input_95_axes_0 = const()[name = string("input_95_axes_0"), val = tensor([2])]; tensor var_3281 = transpose(perm = var_3280, x = out_57_cast_fp16)[name = string("transpose_120")]; tensor input_95 = expand_dims(axes = input_95_axes_0, x = var_3281)[name = string("input_95")]; string gate_17_pad_type_0 = const()[name = string("gate_17_pad_type_0"), val = string("valid")]; tensor gate_17_strides_0 = const()[name = string("gate_17_strides_0"), val = tensor([1, 1])]; tensor gate_17_pad_0 = const()[name = string("gate_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_17_dilations_0 = const()[name = string("gate_17_dilations_0"), val = tensor([1, 1])]; int32 gate_17_groups_0 = const()[name = string("gate_17_groups_0"), val = int32(1)]; tensor gate_17 = conv(dilations = gate_17_dilations_0, groups = gate_17_groups_0, pad = gate_17_pad_0, pad_type = gate_17_pad_type_0, strides = gate_17_strides_0, weight = layers_4_mlp_gate_proj_weight, x = input_95)[name = string("gate_17")]; string up_9_pad_type_0 = const()[name = string("up_9_pad_type_0"), val = string("valid")]; tensor up_9_strides_0 = const()[name = string("up_9_strides_0"), val = tensor([1, 1])]; tensor up_9_pad_0 = const()[name = string("up_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_9_dilations_0 = const()[name = string("up_9_dilations_0"), val = tensor([1, 1])]; int32 up_9_groups_0 = const()[name = string("up_9_groups_0"), val = int32(1)]; tensor up_9 = conv(dilations = up_9_dilations_0, groups = up_9_groups_0, pad = up_9_pad_0, pad_type = up_9_pad_type_0, strides = up_9_strides_0, weight = layers_4_mlp_up_proj_weight, x = input_95)[name = string("up_9")]; string gate_19_mode_0 = const()[name = string("gate_19_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_19 = gelu(mode = gate_19_mode_0, x = gate_17)[name = string("gate_19")]; tensor input_97 = mul(x = gate_19, y = up_9)[name = string("input_97")]; string mlp_out_17_pad_type_0 = const()[name = string("mlp_out_17_pad_type_0"), val = string("valid")]; tensor mlp_out_17_strides_0 = const()[name = string("mlp_out_17_strides_0"), val = tensor([1, 1])]; tensor mlp_out_17_pad_0 = const()[name = string("mlp_out_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_17_dilations_0 = const()[name = string("mlp_out_17_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_17_groups_0 = const()[name = string("mlp_out_17_groups_0"), val = int32(1)]; tensor mlp_out_17 = conv(dilations = mlp_out_17_dilations_0, groups = mlp_out_17_groups_0, pad = mlp_out_17_pad_0, pad_type = mlp_out_17_pad_type_0, strides = mlp_out_17_strides_0, weight = layers_4_mlp_down_proj_weight, x = input_97)[name = string("mlp_out_17")]; tensor var_3321_axes_0 = const()[name = string("op_3321_axes_0"), val = tensor([2])]; tensor var_3321 = squeeze(axes = var_3321_axes_0, x = mlp_out_17)[name = string("op_3321")]; tensor var_3325 = const()[name = string("op_3325"), val = tensor([0, 2, 1])]; int32 var_3332 = const()[name = string("op_3332"), val = int32(-1)]; fp16 const_68_promoted_to_fp16 = const()[name = string("const_68_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_79 = transpose(perm = var_3325, x = var_3321)[name = string("transpose_119")]; tensor var_3338_cast_fp16 = mul(x = x_79, y = const_68_promoted_to_fp16)[name = string("op_3338_cast_fp16")]; bool input_99_interleave_0 = const()[name = string("input_99_interleave_0"), val = bool(false)]; tensor input_99_cast_fp16 = concat(axis = var_3332, interleave = input_99_interleave_0, values = (x_79, var_3338_cast_fp16))[name = string("input_99_cast_fp16")]; tensor normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor([-1])]; fp16 var_3330_to_fp16 = const()[name = string("op_3330_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_3330_to_fp16, x = input_99_cast_fp16)[name = string("normed_137_cast_fp16")]; tensor var_3343_split_sizes_0 = const()[name = string("op_3343_split_sizes_0"), val = tensor([640, 640])]; int32 var_3343_axis_0 = const()[name = string("op_3343_axis_0"), val = int32(-1)]; tensor var_3343_cast_fp16_0, tensor var_3343_cast_fp16_1 = split(axis = var_3343_axis_0, split_sizes = var_3343_split_sizes_0, x = normed_137_cast_fp16)[name = string("op_3343_cast_fp16")]; tensor var_3347_to_fp16 = const()[name = string("op_3347_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(527472640)))]; tensor out_59_cast_fp16 = mul(x = var_3343_cast_fp16_0, y = var_3347_to_fp16)[name = string("out_59_cast_fp16")]; tensor x_81_cast_fp16 = add(x = x_75_cast_fp16, y = out_59_cast_fp16)[name = string("x_81_cast_fp16")]; int32 var_3361 = const()[name = string("op_3361"), val = int32(-1)]; fp16 const_70_promoted_to_fp16 = const()[name = string("const_70_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3367_cast_fp16 = mul(x = x_81_cast_fp16, y = const_70_promoted_to_fp16)[name = string("op_3367_cast_fp16")]; bool input_101_interleave_0 = const()[name = string("input_101_interleave_0"), val = bool(false)]; tensor input_101_cast_fp16 = concat(axis = var_3361, interleave = input_101_interleave_0, values = (x_81_cast_fp16, var_3367_cast_fp16))[name = string("input_101_cast_fp16")]; tensor normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor([-1])]; fp16 var_3359_to_fp16 = const()[name = string("op_3359_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_3359_to_fp16, x = input_101_cast_fp16)[name = string("normed_141_cast_fp16")]; tensor var_3372_split_sizes_0 = const()[name = string("op_3372_split_sizes_0"), val = tensor([640, 640])]; int32 var_3372_axis_0 = const()[name = string("op_3372_axis_0"), val = int32(-1)]; tensor var_3372_cast_fp16_0, tensor var_3372_cast_fp16_1 = split(axis = var_3372_axis_0, split_sizes = var_3372_split_sizes_0, x = normed_141_cast_fp16)[name = string("op_3372_cast_fp16")]; tensor var_3376_to_fp16 = const()[name = string("op_3376_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(527473984)))]; tensor out_61_cast_fp16 = mul(x = var_3372_cast_fp16_0, y = var_3376_to_fp16)[name = string("out_61_cast_fp16")]; tensor var_3390 = const()[name = string("op_3390"), val = tensor([0, 2, 1])]; tensor input_103_axes_0 = const()[name = string("input_103_axes_0"), val = tensor([2])]; tensor var_3391 = transpose(perm = var_3390, x = out_61_cast_fp16)[name = string("transpose_118")]; tensor input_103 = expand_dims(axes = input_103_axes_0, x = var_3391)[name = string("input_103")]; string var_3404_pad_type_0 = const()[name = string("op_3404_pad_type_0"), val = string("valid")]; tensor var_3404_strides_0 = const()[name = string("op_3404_strides_0"), val = tensor([1, 1])]; tensor var_3404_pad_0 = const()[name = string("op_3404_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3404_dilations_0 = const()[name = string("op_3404_dilations_0"), val = tensor([1, 1])]; int32 var_3404_groups_0 = const()[name = string("op_3404_groups_0"), val = int32(1)]; tensor var_3404 = conv(dilations = var_3404_dilations_0, groups = var_3404_groups_0, pad = var_3404_pad_0, pad_type = var_3404_pad_type_0, strides = var_3404_strides_0, weight = layers_5_self_attn_q_proj_weight, x = input_103)[name = string("op_3404")]; tensor var_3409 = const()[name = string("op_3409"), val = tensor([1, 4, 256, 1])]; tensor var_3410 = reshape(shape = var_3409, x = var_3404)[name = string("op_3410")]; tensor var_3415 = const()[name = string("op_3415"), val = tensor([0, 1, 3, 2])]; tensor var_3420 = const()[name = string("op_3420"), val = tensor([1, 4, 256])]; tensor q_31 = transpose(perm = var_3415, x = var_3410)[name = string("transpose_117")]; tensor x_85 = reshape(shape = var_3420, x = q_31)[name = string("x_85")]; int32 var_3427 = const()[name = string("op_3427"), val = int32(-1)]; fp16 const_72_promoted_to_fp16 = const()[name = string("const_72_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3433_cast_fp16 = mul(x = x_85, y = const_72_promoted_to_fp16)[name = string("op_3433_cast_fp16")]; bool input_105_interleave_0 = const()[name = string("input_105_interleave_0"), val = bool(false)]; tensor input_105_cast_fp16 = concat(axis = var_3427, interleave = input_105_interleave_0, values = (x_85, var_3433_cast_fp16))[name = string("input_105_cast_fp16")]; tensor normed_147_axes_0 = const()[name = string("normed_147_axes_0"), val = tensor([-1])]; fp16 var_3425_to_fp16 = const()[name = string("op_3425_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_147_cast_fp16 = layer_norm(axes = normed_147_axes_0, epsilon = var_3425_to_fp16, x = input_105_cast_fp16)[name = string("normed_147_cast_fp16")]; tensor var_3438_split_sizes_0 = const()[name = string("op_3438_split_sizes_0"), val = tensor([256, 256])]; int32 var_3438_axis_0 = const()[name = string("op_3438_axis_0"), val = int32(-1)]; tensor var_3438_cast_fp16_0, tensor var_3438_cast_fp16_1 = split(axis = var_3438_axis_0, split_sizes = var_3438_split_sizes_0, x = normed_147_cast_fp16)[name = string("op_3438_cast_fp16")]; tensor var_3442_to_fp16 = const()[name = string("op_3442_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(527475328)))]; tensor out_63_cast_fp16 = mul(x = var_3438_cast_fp16_0, y = var_3442_to_fp16)[name = string("out_63_cast_fp16")]; tensor var_3449 = const()[name = string("op_3449"), val = tensor([1, 4, 1, 256])]; tensor q_33 = reshape(shape = var_3449, x = out_63_cast_fp16)[name = string("q_33")]; string var_3461_pad_type_0 = const()[name = string("op_3461_pad_type_0"), val = string("valid")]; tensor var_3461_strides_0 = const()[name = string("op_3461_strides_0"), val = tensor([1, 1])]; tensor var_3461_pad_0 = const()[name = string("op_3461_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3461_dilations_0 = const()[name = string("op_3461_dilations_0"), val = tensor([1, 1])]; int32 var_3461_groups_0 = const()[name = string("op_3461_groups_0"), val = int32(1)]; tensor var_3461 = conv(dilations = var_3461_dilations_0, groups = var_3461_groups_0, pad = var_3461_pad_0, pad_type = var_3461_pad_type_0, strides = var_3461_strides_0, weight = layers_5_self_attn_k_proj_weight, x = input_103)[name = string("op_3461")]; tensor var_3466 = const()[name = string("op_3466"), val = tensor([1, 1, 256, 1])]; tensor var_3467 = reshape(shape = var_3466, x = var_3461)[name = string("op_3467")]; tensor var_3472 = const()[name = string("op_3472"), val = tensor([0, 1, 3, 2])]; tensor var_3477 = const()[name = string("op_3477"), val = tensor([1, 1, 256])]; tensor k_31 = transpose(perm = var_3472, x = var_3467)[name = string("transpose_116")]; tensor x_87 = reshape(shape = var_3477, x = k_31)[name = string("x_87")]; int32 var_3484 = const()[name = string("op_3484"), val = int32(-1)]; fp16 const_74_promoted_to_fp16 = const()[name = string("const_74_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3490_cast_fp16 = mul(x = x_87, y = const_74_promoted_to_fp16)[name = string("op_3490_cast_fp16")]; bool input_107_interleave_0 = const()[name = string("input_107_interleave_0"), val = bool(false)]; tensor input_107_cast_fp16 = concat(axis = var_3484, interleave = input_107_interleave_0, values = (x_87, var_3490_cast_fp16))[name = string("input_107_cast_fp16")]; tensor normed_151_axes_0 = const()[name = string("normed_151_axes_0"), val = tensor([-1])]; fp16 var_3482_to_fp16 = const()[name = string("op_3482_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_151_cast_fp16 = layer_norm(axes = normed_151_axes_0, epsilon = var_3482_to_fp16, x = input_107_cast_fp16)[name = string("normed_151_cast_fp16")]; tensor var_3495_split_sizes_0 = const()[name = string("op_3495_split_sizes_0"), val = tensor([256, 256])]; int32 var_3495_axis_0 = const()[name = string("op_3495_axis_0"), val = int32(-1)]; tensor var_3495_cast_fp16_0, tensor var_3495_cast_fp16_1 = split(axis = var_3495_axis_0, split_sizes = var_3495_split_sizes_0, x = normed_151_cast_fp16)[name = string("op_3495_cast_fp16")]; tensor var_3499_to_fp16 = const()[name = string("op_3499_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(527475904)))]; tensor out_65_cast_fp16 = mul(x = var_3495_cast_fp16_0, y = var_3499_to_fp16)[name = string("out_65_cast_fp16")]; tensor var_3506 = const()[name = string("op_3506"), val = tensor([1, 1, 1, 256])]; tensor k_33 = reshape(shape = var_3506, x = out_65_cast_fp16)[name = string("k_33")]; string var_3518_pad_type_0 = const()[name = string("op_3518_pad_type_0"), val = string("valid")]; tensor var_3518_strides_0 = const()[name = string("op_3518_strides_0"), val = tensor([1, 1])]; tensor var_3518_pad_0 = const()[name = string("op_3518_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3518_dilations_0 = const()[name = string("op_3518_dilations_0"), val = tensor([1, 1])]; int32 var_3518_groups_0 = const()[name = string("op_3518_groups_0"), val = int32(1)]; tensor var_3518 = conv(dilations = var_3518_dilations_0, groups = var_3518_groups_0, pad = var_3518_pad_0, pad_type = var_3518_pad_type_0, strides = var_3518_strides_0, weight = layers_5_self_attn_v_proj_weight, x = input_103)[name = string("op_3518")]; tensor var_3523 = const()[name = string("op_3523"), val = tensor([1, 1, 256, 1])]; tensor var_3524 = reshape(shape = var_3523, x = var_3518)[name = string("op_3524")]; tensor var_3529 = const()[name = string("op_3529"), val = tensor([0, 1, 3, 2])]; tensor var_3531 = mul(x = q_33, y = cos)[name = string("op_3531")]; tensor var_3532_split_sizes_0 = const()[name = string("op_3532_split_sizes_0"), val = tensor([128, 128])]; int32 var_3532_axis_0 = const()[name = string("op_3532_axis_0"), val = int32(-1)]; tensor var_3532_0, tensor var_3532_1 = split(axis = var_3532_axis_0, split_sizes = var_3532_split_sizes_0, x = q_33)[name = string("op_3532")]; fp16 const_76_promoted = const()[name = string("const_76_promoted"), val = fp16(-0x1p+0)]; tensor var_3534 = mul(x = var_3532_1, y = const_76_promoted)[name = string("op_3534")]; int32 var_3536 = const()[name = string("op_3536"), val = int32(-1)]; bool var_3537_interleave_0 = const()[name = string("op_3537_interleave_0"), val = bool(false)]; tensor var_3537 = concat(axis = var_3536, interleave = var_3537_interleave_0, values = (var_3534, var_3532_0))[name = string("op_3537")]; tensor var_3538 = mul(x = var_3537, y = sin)[name = string("op_3538")]; tensor q_35 = add(x = var_3531, y = var_3538)[name = string("q_35")]; tensor var_3541 = mul(x = k_33, y = cos)[name = string("op_3541")]; tensor var_3542_split_sizes_0 = const()[name = string("op_3542_split_sizes_0"), val = tensor([128, 128])]; int32 var_3542_axis_0 = const()[name = string("op_3542_axis_0"), val = int32(-1)]; tensor var_3542_0, tensor var_3542_1 = split(axis = var_3542_axis_0, split_sizes = var_3542_split_sizes_0, x = k_33)[name = string("op_3542")]; fp16 const_77_promoted = const()[name = string("const_77_promoted"), val = fp16(-0x1p+0)]; tensor var_3544 = mul(x = var_3542_1, y = const_77_promoted)[name = string("op_3544")]; int32 var_3546 = const()[name = string("op_3546"), val = int32(-1)]; bool var_3547_interleave_0 = const()[name = string("op_3547_interleave_0"), val = bool(false)]; tensor var_3547 = concat(axis = var_3546, interleave = var_3547_interleave_0, values = (var_3544, var_3542_0))[name = string("op_3547")]; tensor var_3548 = mul(x = var_3547, y = sin)[name = string("op_3548")]; tensor k_35 = add(x = var_3541, y = var_3548)[name = string("k_35")]; tensor var_3553_begin_0 = const()[name = string("op_3553_begin_0"), val = tensor([5, 0, 0, 0])]; tensor var_3553_end_0 = const()[name = string("op_3553_end_0"), val = tensor([6, 1, 2048, 256])]; tensor var_3553_end_mask_0 = const()[name = string("op_3553_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3553_squeeze_mask_0 = const()[name = string("op_3553_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_3553_cast_fp16 = slice_by_index(begin = var_3553_begin_0, end = var_3553_end_0, end_mask = var_3553_end_mask_0, squeeze_mask = var_3553_squeeze_mask_0, x = coreml_update_state_45)[name = string("op_3553_cast_fp16")]; tensor K_cache_11_axes_0 = const()[name = string("K_cache_11_axes_0"), val = tensor([0])]; tensor K_cache_11_cast_fp16 = expand_dims(axes = K_cache_11_axes_0, x = var_3553_cast_fp16)[name = string("K_cache_11_cast_fp16")]; tensor var_3558_begin_0 = const()[name = string("op_3558_begin_0"), val = tensor([23, 0, 0, 0])]; tensor var_3558_end_0 = const()[name = string("op_3558_end_0"), val = tensor([24, 1, 2048, 256])]; tensor var_3558_end_mask_0 = const()[name = string("op_3558_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3558_squeeze_mask_0 = const()[name = string("op_3558_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_3558_cast_fp16 = slice_by_index(begin = var_3558_begin_0, end = var_3558_end_0, end_mask = var_3558_end_mask_0, squeeze_mask = var_3558_squeeze_mask_0, x = coreml_update_state_45)[name = string("op_3558_cast_fp16")]; tensor V_cache_11_axes_0 = const()[name = string("V_cache_11_axes_0"), val = tensor([0])]; tensor V_cache_11_cast_fp16 = expand_dims(axes = V_cache_11_axes_0, x = var_3558_cast_fp16)[name = string("V_cache_11_cast_fp16")]; tensor k_broadcast_11_reps_0 = const()[name = string("k_broadcast_11_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor k_broadcast_11 = tile(reps = k_broadcast_11_reps_0, x = k_35)[name = string("k_broadcast_11")]; tensor v_broadcast_11_reps_0 = const()[name = string("v_broadcast_11_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor v_11 = transpose(perm = var_3529, x = var_3524)[name = string("transpose_115")]; tensor v_broadcast_11 = tile(reps = v_broadcast_11_reps_0, x = v_11)[name = string("v_broadcast_11")]; tensor var_3566_cast_fp16 = mul(x = K_cache_11_cast_fp16, y = var_1120_cast_fp16)[name = string("op_3566_cast_fp16")]; tensor var_3567_cast_fp16 = mul(x = k_broadcast_11, y = update_mask)[name = string("op_3567_cast_fp16")]; tensor K_new_11_cast_fp16 = add(x = var_3566_cast_fp16, y = var_3567_cast_fp16)[name = string("K_new_11_cast_fp16")]; tensor var_3573_cast_fp16 = mul(x = V_cache_11_cast_fp16, y = var_1120_cast_fp16)[name = string("op_3573_cast_fp16")]; tensor var_3574_cast_fp16 = mul(x = v_broadcast_11, y = update_mask)[name = string("op_3574_cast_fp16")]; tensor V_new_11_cast_fp16 = add(x = var_3573_cast_fp16, y = var_3574_cast_fp16)[name = string("V_new_11_cast_fp16")]; tensor var_3578_axes_0 = const()[name = string("op_3578_axes_0"), val = tensor([0])]; tensor var_3578_cast_fp16 = squeeze(axes = var_3578_axes_0, x = K_new_11_cast_fp16)[name = string("op_3578_cast_fp16")]; tensor concat_20 = const()[name = string("concat_20"), val = tensor([5, 0, 0, 0])]; tensor concat_21 = const()[name = string("concat_21"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_20, begin_mask = kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_21, end_mask = kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_11_stride_0, update = var_3578_cast_fp16, x = coreml_update_state_45)[name = string("kv_cache_0_internal_tensor_assign_11_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_11_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_46_write_state")]; tensor coreml_update_state_46 = read_state(input = kv_cache_0)[name = string("coreml_update_state_46")]; tensor var_3585_axes_0 = const()[name = string("op_3585_axes_0"), val = tensor([0])]; tensor var_3585_cast_fp16 = squeeze(axes = var_3585_axes_0, x = V_new_11_cast_fp16)[name = string("op_3585_cast_fp16")]; tensor concat_22 = const()[name = string("concat_22"), val = tensor([23, 0, 0, 0])]; tensor concat_23 = const()[name = string("concat_23"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_22, begin_mask = kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_23, end_mask = kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_12_stride_0, update = var_3585_cast_fp16, x = coreml_update_state_46)[name = string("kv_cache_0_internal_tensor_assign_12_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_12_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_47_write_state")]; tensor coreml_update_state_47 = read_state(input = kv_cache_0)[name = string("coreml_update_state_47")]; tensor hidden_states_43_axes_0 = const()[name = string("hidden_states_43_axes_0"), val = tensor([2])]; tensor hidden_states_43_cast_fp16 = expand_dims(axes = hidden_states_43_axes_0, x = K_new_11_cast_fp16)[name = string("hidden_states_43_cast_fp16")]; tensor var_3598 = const()[name = string("op_3598"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_45_cast_fp16 = tile(reps = var_3598, x = hidden_states_43_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; tensor var_3604 = const()[name = string("op_3604"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_11_cast_fp16 = reshape(shape = var_3604, x = hidden_states_45_cast_fp16)[name = string("K_expanded_11_cast_fp16")]; tensor hidden_states_47_axes_0 = const()[name = string("hidden_states_47_axes_0"), val = tensor([2])]; tensor hidden_states_47_cast_fp16 = expand_dims(axes = hidden_states_47_axes_0, x = V_new_11_cast_fp16)[name = string("hidden_states_47_cast_fp16")]; tensor var_3613 = const()[name = string("op_3613"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_49_cast_fp16 = tile(reps = var_3613, x = hidden_states_47_cast_fp16)[name = string("hidden_states_49_cast_fp16")]; tensor var_3619 = const()[name = string("op_3619"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_11_cast_fp16 = reshape(shape = var_3619, x = hidden_states_49_cast_fp16)[name = string("V_expanded_11_cast_fp16")]; bool var_3634_transpose_x_1 = const()[name = string("op_3634_transpose_x_1"), val = bool(false)]; bool var_3634_transpose_y_1 = const()[name = string("op_3634_transpose_y_1"), val = bool(true)]; tensor var_3634_cast_fp16 = matmul(transpose_x = var_3634_transpose_x_1, transpose_y = var_3634_transpose_y_1, x = q_35, y = K_expanded_11_cast_fp16)[name = string("op_3634_cast_fp16")]; fp16 var_3635_to_fp16 = const()[name = string("op_3635_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_31_cast_fp16 = mul(x = var_3634_cast_fp16, y = var_3635_to_fp16)[name = string("attn_weights_31_cast_fp16")]; tensor attn_weights_33_cast_fp16 = add(x = attn_weights_31_cast_fp16, y = causal_mask)[name = string("attn_weights_33_cast_fp16")]; int32 var_3644 = const()[name = string("op_3644"), val = int32(-1)]; tensor var_3646_cast_fp16 = softmax(axis = var_3644, x = attn_weights_33_cast_fp16)[name = string("op_3646_cast_fp16")]; bool var_3662_transpose_x_0 = const()[name = string("op_3662_transpose_x_0"), val = bool(false)]; bool var_3662_transpose_y_0 = const()[name = string("op_3662_transpose_y_0"), val = bool(false)]; tensor var_3662_cast_fp16 = matmul(transpose_x = var_3662_transpose_x_0, transpose_y = var_3662_transpose_y_0, x = var_3646_cast_fp16, y = V_expanded_11_cast_fp16)[name = string("op_3662_cast_fp16")]; tensor var_3672 = const()[name = string("op_3672"), val = tensor([0, 2, 1, 3])]; tensor var_3679 = const()[name = string("op_3679"), val = tensor([1, 1, -1])]; tensor var_3673 = transpose(perm = var_3672, x = var_3662_cast_fp16)[name = string("transpose_114")]; tensor attn_output_33 = reshape(shape = var_3679, x = var_3673)[name = string("attn_output_33")]; tensor var_3684 = const()[name = string("op_3684"), val = tensor([0, 2, 1])]; tensor squeeze_5 = const()[name = string("squeeze_5"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(527476480)))]; string var_3700_pad_type_0 = const()[name = string("op_3700_pad_type_0"), val = string("valid")]; int32 var_3700_groups_0 = const()[name = string("op_3700_groups_0"), val = int32(1)]; tensor var_3700_strides_0 = const()[name = string("op_3700_strides_0"), val = tensor([1])]; tensor var_3700_pad_0 = const()[name = string("op_3700_pad_0"), val = tensor([0, 0])]; tensor var_3700_dilations_0 = const()[name = string("op_3700_dilations_0"), val = tensor([1])]; tensor var_3685 = transpose(perm = var_3684, x = attn_output_33)[name = string("transpose_113")]; tensor var_3700 = conv(dilations = var_3700_dilations_0, groups = var_3700_groups_0, pad = var_3700_pad_0, pad_type = var_3700_pad_type_0, strides = var_3700_strides_0, weight = squeeze_5, x = var_3685)[name = string("op_3700")]; tensor var_3704 = const()[name = string("op_3704"), val = tensor([0, 2, 1])]; int32 var_3711 = const()[name = string("op_3711"), val = int32(-1)]; fp16 const_78_promoted_to_fp16 = const()[name = string("const_78_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_89 = transpose(perm = var_3704, x = var_3700)[name = string("transpose_112")]; tensor var_3717_cast_fp16 = mul(x = x_89, y = const_78_promoted_to_fp16)[name = string("op_3717_cast_fp16")]; bool input_111_interleave_0 = const()[name = string("input_111_interleave_0"), val = bool(false)]; tensor input_111_cast_fp16 = concat(axis = var_3711, interleave = input_111_interleave_0, values = (x_89, var_3717_cast_fp16))[name = string("input_111_cast_fp16")]; tensor normed_155_axes_0 = const()[name = string("normed_155_axes_0"), val = tensor([-1])]; fp16 var_3709_to_fp16 = const()[name = string("op_3709_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_155_cast_fp16 = layer_norm(axes = normed_155_axes_0, epsilon = var_3709_to_fp16, x = input_111_cast_fp16)[name = string("normed_155_cast_fp16")]; tensor var_3722_split_sizes_0 = const()[name = string("op_3722_split_sizes_0"), val = tensor([640, 640])]; int32 var_3722_axis_0 = const()[name = string("op_3722_axis_0"), val = int32(-1)]; tensor var_3722_cast_fp16_0, tensor var_3722_cast_fp16_1 = split(axis = var_3722_axis_0, split_sizes = var_3722_split_sizes_0, x = normed_155_cast_fp16)[name = string("op_3722_cast_fp16")]; tensor var_3726_to_fp16 = const()[name = string("op_3726_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528787264)))]; tensor out_67_cast_fp16 = mul(x = var_3722_cast_fp16_0, y = var_3726_to_fp16)[name = string("out_67_cast_fp16")]; tensor x_91_cast_fp16 = add(x = x_81_cast_fp16, y = out_67_cast_fp16)[name = string("x_91_cast_fp16")]; int32 var_3740 = const()[name = string("op_3740"), val = int32(-1)]; fp16 const_80_promoted_to_fp16 = const()[name = string("const_80_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3746_cast_fp16 = mul(x = x_91_cast_fp16, y = const_80_promoted_to_fp16)[name = string("op_3746_cast_fp16")]; bool input_113_interleave_0 = const()[name = string("input_113_interleave_0"), val = bool(false)]; tensor input_113_cast_fp16 = concat(axis = var_3740, interleave = input_113_interleave_0, values = (x_91_cast_fp16, var_3746_cast_fp16))[name = string("input_113_cast_fp16")]; tensor normed_159_axes_0 = const()[name = string("normed_159_axes_0"), val = tensor([-1])]; fp16 var_3738_to_fp16 = const()[name = string("op_3738_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_159_cast_fp16 = layer_norm(axes = normed_159_axes_0, epsilon = var_3738_to_fp16, x = input_113_cast_fp16)[name = string("normed_159_cast_fp16")]; tensor var_3751_split_sizes_0 = const()[name = string("op_3751_split_sizes_0"), val = tensor([640, 640])]; int32 var_3751_axis_0 = const()[name = string("op_3751_axis_0"), val = int32(-1)]; tensor var_3751_cast_fp16_0, tensor var_3751_cast_fp16_1 = split(axis = var_3751_axis_0, split_sizes = var_3751_split_sizes_0, x = normed_159_cast_fp16)[name = string("op_3751_cast_fp16")]; tensor var_3755_to_fp16 = const()[name = string("op_3755_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528788608)))]; tensor out_69_cast_fp16 = mul(x = var_3751_cast_fp16_0, y = var_3755_to_fp16)[name = string("out_69_cast_fp16")]; tensor var_3769 = const()[name = string("op_3769"), val = tensor([0, 2, 1])]; tensor input_115_axes_0 = const()[name = string("input_115_axes_0"), val = tensor([2])]; tensor var_3770 = transpose(perm = var_3769, x = out_69_cast_fp16)[name = string("transpose_111")]; tensor input_115 = expand_dims(axes = input_115_axes_0, x = var_3770)[name = string("input_115")]; string gate_21_pad_type_0 = const()[name = string("gate_21_pad_type_0"), val = string("valid")]; tensor gate_21_strides_0 = const()[name = string("gate_21_strides_0"), val = tensor([1, 1])]; tensor gate_21_pad_0 = const()[name = string("gate_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_21_dilations_0 = const()[name = string("gate_21_dilations_0"), val = tensor([1, 1])]; int32 gate_21_groups_0 = const()[name = string("gate_21_groups_0"), val = int32(1)]; tensor gate_21 = conv(dilations = gate_21_dilations_0, groups = gate_21_groups_0, pad = gate_21_pad_0, pad_type = gate_21_pad_type_0, strides = gate_21_strides_0, weight = layers_5_mlp_gate_proj_weight, x = input_115)[name = string("gate_21")]; string up_11_pad_type_0 = const()[name = string("up_11_pad_type_0"), val = string("valid")]; tensor up_11_strides_0 = const()[name = string("up_11_strides_0"), val = tensor([1, 1])]; tensor up_11_pad_0 = const()[name = string("up_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_11_dilations_0 = const()[name = string("up_11_dilations_0"), val = tensor([1, 1])]; int32 up_11_groups_0 = const()[name = string("up_11_groups_0"), val = int32(1)]; tensor up_11 = conv(dilations = up_11_dilations_0, groups = up_11_groups_0, pad = up_11_pad_0, pad_type = up_11_pad_type_0, strides = up_11_strides_0, weight = layers_5_mlp_up_proj_weight, x = input_115)[name = string("up_11")]; string gate_23_mode_0 = const()[name = string("gate_23_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_23 = gelu(mode = gate_23_mode_0, x = gate_21)[name = string("gate_23")]; tensor input_117 = mul(x = gate_23, y = up_11)[name = string("input_117")]; string mlp_out_21_pad_type_0 = const()[name = string("mlp_out_21_pad_type_0"), val = string("valid")]; tensor mlp_out_21_strides_0 = const()[name = string("mlp_out_21_strides_0"), val = tensor([1, 1])]; tensor mlp_out_21_pad_0 = const()[name = string("mlp_out_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_21_dilations_0 = const()[name = string("mlp_out_21_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_21_groups_0 = const()[name = string("mlp_out_21_groups_0"), val = int32(1)]; tensor mlp_out_21 = conv(dilations = mlp_out_21_dilations_0, groups = mlp_out_21_groups_0, pad = mlp_out_21_pad_0, pad_type = mlp_out_21_pad_type_0, strides = mlp_out_21_strides_0, weight = layers_5_mlp_down_proj_weight, x = input_117)[name = string("mlp_out_21")]; tensor var_3810_axes_0 = const()[name = string("op_3810_axes_0"), val = tensor([2])]; tensor var_3810 = squeeze(axes = var_3810_axes_0, x = mlp_out_21)[name = string("op_3810")]; tensor var_3814 = const()[name = string("op_3814"), val = tensor([0, 2, 1])]; int32 var_3821 = const()[name = string("op_3821"), val = int32(-1)]; fp16 const_82_promoted_to_fp16 = const()[name = string("const_82_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_95 = transpose(perm = var_3814, x = var_3810)[name = string("transpose_110")]; tensor var_3827_cast_fp16 = mul(x = x_95, y = const_82_promoted_to_fp16)[name = string("op_3827_cast_fp16")]; bool input_119_interleave_0 = const()[name = string("input_119_interleave_0"), val = bool(false)]; tensor input_119_cast_fp16 = concat(axis = var_3821, interleave = input_119_interleave_0, values = (x_95, var_3827_cast_fp16))[name = string("input_119_cast_fp16")]; tensor normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor([-1])]; fp16 var_3819_to_fp16 = const()[name = string("op_3819_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_3819_to_fp16, x = input_119_cast_fp16)[name = string("normed_165_cast_fp16")]; tensor var_3832_split_sizes_0 = const()[name = string("op_3832_split_sizes_0"), val = tensor([640, 640])]; int32 var_3832_axis_0 = const()[name = string("op_3832_axis_0"), val = int32(-1)]; tensor var_3832_cast_fp16_0, tensor var_3832_cast_fp16_1 = split(axis = var_3832_axis_0, split_sizes = var_3832_split_sizes_0, x = normed_165_cast_fp16)[name = string("op_3832_cast_fp16")]; tensor var_3836_to_fp16 = const()[name = string("op_3836_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528789952)))]; tensor out_71_cast_fp16 = mul(x = var_3832_cast_fp16_0, y = var_3836_to_fp16)[name = string("out_71_cast_fp16")]; tensor x_97_cast_fp16 = add(x = x_91_cast_fp16, y = out_71_cast_fp16)[name = string("x_97_cast_fp16")]; int32 var_3850 = const()[name = string("op_3850"), val = int32(-1)]; fp16 const_84_promoted_to_fp16 = const()[name = string("const_84_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3856_cast_fp16 = mul(x = x_97_cast_fp16, y = const_84_promoted_to_fp16)[name = string("op_3856_cast_fp16")]; bool input_121_interleave_0 = const()[name = string("input_121_interleave_0"), val = bool(false)]; tensor input_121_cast_fp16 = concat(axis = var_3850, interleave = input_121_interleave_0, values = (x_97_cast_fp16, var_3856_cast_fp16))[name = string("input_121_cast_fp16")]; tensor normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor([-1])]; fp16 var_3848_to_fp16 = const()[name = string("op_3848_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_3848_to_fp16, x = input_121_cast_fp16)[name = string("normed_169_cast_fp16")]; tensor var_3861_split_sizes_0 = const()[name = string("op_3861_split_sizes_0"), val = tensor([640, 640])]; int32 var_3861_axis_0 = const()[name = string("op_3861_axis_0"), val = int32(-1)]; tensor var_3861_cast_fp16_0, tensor var_3861_cast_fp16_1 = split(axis = var_3861_axis_0, split_sizes = var_3861_split_sizes_0, x = normed_169_cast_fp16)[name = string("op_3861_cast_fp16")]; tensor var_3865_to_fp16 = const()[name = string("op_3865_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528791296)))]; tensor out_73_cast_fp16 = mul(x = var_3861_cast_fp16_0, y = var_3865_to_fp16)[name = string("out_73_cast_fp16")]; tensor var_3879 = const()[name = string("op_3879"), val = tensor([0, 2, 1])]; tensor input_123_axes_0 = const()[name = string("input_123_axes_0"), val = tensor([2])]; tensor var_3880 = transpose(perm = var_3879, x = out_73_cast_fp16)[name = string("transpose_109")]; tensor input_123 = expand_dims(axes = input_123_axes_0, x = var_3880)[name = string("input_123")]; string var_3893_pad_type_0 = const()[name = string("op_3893_pad_type_0"), val = string("valid")]; tensor var_3893_strides_0 = const()[name = string("op_3893_strides_0"), val = tensor([1, 1])]; tensor var_3893_pad_0 = const()[name = string("op_3893_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3893_dilations_0 = const()[name = string("op_3893_dilations_0"), val = tensor([1, 1])]; int32 var_3893_groups_0 = const()[name = string("op_3893_groups_0"), val = int32(1)]; tensor var_3893 = conv(dilations = var_3893_dilations_0, groups = var_3893_groups_0, pad = var_3893_pad_0, pad_type = var_3893_pad_type_0, strides = var_3893_strides_0, weight = layers_6_self_attn_q_proj_weight, x = input_123)[name = string("op_3893")]; tensor var_3898 = const()[name = string("op_3898"), val = tensor([1, 4, 256, 1])]; tensor var_3899 = reshape(shape = var_3898, x = var_3893)[name = string("op_3899")]; tensor var_3904 = const()[name = string("op_3904"), val = tensor([0, 1, 3, 2])]; tensor var_3909 = const()[name = string("op_3909"), val = tensor([1, 4, 256])]; tensor q_37 = transpose(perm = var_3904, x = var_3899)[name = string("transpose_108")]; tensor x_101 = reshape(shape = var_3909, x = q_37)[name = string("x_101")]; int32 var_3916 = const()[name = string("op_3916"), val = int32(-1)]; fp16 const_86_promoted_to_fp16 = const()[name = string("const_86_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3922_cast_fp16 = mul(x = x_101, y = const_86_promoted_to_fp16)[name = string("op_3922_cast_fp16")]; bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)]; tensor input_125_cast_fp16 = concat(axis = var_3916, interleave = input_125_interleave_0, values = (x_101, var_3922_cast_fp16))[name = string("input_125_cast_fp16")]; tensor normed_175_axes_0 = const()[name = string("normed_175_axes_0"), val = tensor([-1])]; fp16 var_3914_to_fp16 = const()[name = string("op_3914_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_175_cast_fp16 = layer_norm(axes = normed_175_axes_0, epsilon = var_3914_to_fp16, x = input_125_cast_fp16)[name = string("normed_175_cast_fp16")]; tensor var_3927_split_sizes_0 = const()[name = string("op_3927_split_sizes_0"), val = tensor([256, 256])]; int32 var_3927_axis_0 = const()[name = string("op_3927_axis_0"), val = int32(-1)]; tensor var_3927_cast_fp16_0, tensor var_3927_cast_fp16_1 = split(axis = var_3927_axis_0, split_sizes = var_3927_split_sizes_0, x = normed_175_cast_fp16)[name = string("op_3927_cast_fp16")]; tensor var_3931_to_fp16 = const()[name = string("op_3931_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528792640)))]; tensor out_75_cast_fp16 = mul(x = var_3927_cast_fp16_0, y = var_3931_to_fp16)[name = string("out_75_cast_fp16")]; tensor var_3938 = const()[name = string("op_3938"), val = tensor([1, 4, 1, 256])]; tensor q_39 = reshape(shape = var_3938, x = out_75_cast_fp16)[name = string("q_39")]; string var_3950_pad_type_0 = const()[name = string("op_3950_pad_type_0"), val = string("valid")]; tensor var_3950_strides_0 = const()[name = string("op_3950_strides_0"), val = tensor([1, 1])]; tensor var_3950_pad_0 = const()[name = string("op_3950_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3950_dilations_0 = const()[name = string("op_3950_dilations_0"), val = tensor([1, 1])]; int32 var_3950_groups_0 = const()[name = string("op_3950_groups_0"), val = int32(1)]; tensor var_3950 = conv(dilations = var_3950_dilations_0, groups = var_3950_groups_0, pad = var_3950_pad_0, pad_type = var_3950_pad_type_0, strides = var_3950_strides_0, weight = layers_6_self_attn_k_proj_weight, x = input_123)[name = string("op_3950")]; tensor var_3955 = const()[name = string("op_3955"), val = tensor([1, 1, 256, 1])]; tensor var_3956 = reshape(shape = var_3955, x = var_3950)[name = string("op_3956")]; tensor var_3961 = const()[name = string("op_3961"), val = tensor([0, 1, 3, 2])]; tensor var_3966 = const()[name = string("op_3966"), val = tensor([1, 1, 256])]; tensor k_37 = transpose(perm = var_3961, x = var_3956)[name = string("transpose_107")]; tensor x_103 = reshape(shape = var_3966, x = k_37)[name = string("x_103")]; int32 var_3973 = const()[name = string("op_3973"), val = int32(-1)]; fp16 const_88_promoted_to_fp16 = const()[name = string("const_88_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3979_cast_fp16 = mul(x = x_103, y = const_88_promoted_to_fp16)[name = string("op_3979_cast_fp16")]; bool input_127_interleave_0 = const()[name = string("input_127_interleave_0"), val = bool(false)]; tensor input_127_cast_fp16 = concat(axis = var_3973, interleave = input_127_interleave_0, values = (x_103, var_3979_cast_fp16))[name = string("input_127_cast_fp16")]; tensor normed_179_axes_0 = const()[name = string("normed_179_axes_0"), val = tensor([-1])]; fp16 var_3971_to_fp16 = const()[name = string("op_3971_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_179_cast_fp16 = layer_norm(axes = normed_179_axes_0, epsilon = var_3971_to_fp16, x = input_127_cast_fp16)[name = string("normed_179_cast_fp16")]; tensor var_3984_split_sizes_0 = const()[name = string("op_3984_split_sizes_0"), val = tensor([256, 256])]; int32 var_3984_axis_0 = const()[name = string("op_3984_axis_0"), val = int32(-1)]; tensor var_3984_cast_fp16_0, tensor var_3984_cast_fp16_1 = split(axis = var_3984_axis_0, split_sizes = var_3984_split_sizes_0, x = normed_179_cast_fp16)[name = string("op_3984_cast_fp16")]; tensor var_3988_to_fp16 = const()[name = string("op_3988_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528793216)))]; tensor out_77_cast_fp16 = mul(x = var_3984_cast_fp16_0, y = var_3988_to_fp16)[name = string("out_77_cast_fp16")]; tensor var_3995 = const()[name = string("op_3995"), val = tensor([1, 1, 1, 256])]; tensor k_39 = reshape(shape = var_3995, x = out_77_cast_fp16)[name = string("k_39")]; string var_4007_pad_type_0 = const()[name = string("op_4007_pad_type_0"), val = string("valid")]; tensor var_4007_strides_0 = const()[name = string("op_4007_strides_0"), val = tensor([1, 1])]; tensor var_4007_pad_0 = const()[name = string("op_4007_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4007_dilations_0 = const()[name = string("op_4007_dilations_0"), val = tensor([1, 1])]; int32 var_4007_groups_0 = const()[name = string("op_4007_groups_0"), val = int32(1)]; tensor var_4007 = conv(dilations = var_4007_dilations_0, groups = var_4007_groups_0, pad = var_4007_pad_0, pad_type = var_4007_pad_type_0, strides = var_4007_strides_0, weight = layers_6_self_attn_v_proj_weight, x = input_123)[name = string("op_4007")]; tensor var_4012 = const()[name = string("op_4012"), val = tensor([1, 1, 256, 1])]; tensor var_4013 = reshape(shape = var_4012, x = var_4007)[name = string("op_4013")]; tensor var_4018 = const()[name = string("op_4018"), val = tensor([0, 1, 3, 2])]; tensor var_4020 = mul(x = q_39, y = cos_1)[name = string("op_4020")]; tensor var_4021_split_sizes_0 = const()[name = string("op_4021_split_sizes_0"), val = tensor([128, 128])]; int32 var_4021_axis_0 = const()[name = string("op_4021_axis_0"), val = int32(-1)]; tensor var_4021_0, tensor var_4021_1 = split(axis = var_4021_axis_0, split_sizes = var_4021_split_sizes_0, x = q_39)[name = string("op_4021")]; fp16 const_90_promoted = const()[name = string("const_90_promoted"), val = fp16(-0x1p+0)]; tensor var_4023 = mul(x = var_4021_1, y = const_90_promoted)[name = string("op_4023")]; int32 var_4025 = const()[name = string("op_4025"), val = int32(-1)]; bool var_4026_interleave_0 = const()[name = string("op_4026_interleave_0"), val = bool(false)]; tensor var_4026 = concat(axis = var_4025, interleave = var_4026_interleave_0, values = (var_4023, var_4021_0))[name = string("op_4026")]; tensor var_4027 = mul(x = var_4026, y = sin_1)[name = string("op_4027")]; tensor q_41 = add(x = var_4020, y = var_4027)[name = string("q_41")]; tensor var_4030 = mul(x = k_39, y = cos_1)[name = string("op_4030")]; tensor var_4031_split_sizes_0 = const()[name = string("op_4031_split_sizes_0"), val = tensor([128, 128])]; int32 var_4031_axis_0 = const()[name = string("op_4031_axis_0"), val = int32(-1)]; tensor var_4031_0, tensor var_4031_1 = split(axis = var_4031_axis_0, split_sizes = var_4031_split_sizes_0, x = k_39)[name = string("op_4031")]; fp16 const_91_promoted = const()[name = string("const_91_promoted"), val = fp16(-0x1p+0)]; tensor var_4033 = mul(x = var_4031_1, y = const_91_promoted)[name = string("op_4033")]; int32 var_4035 = const()[name = string("op_4035"), val = int32(-1)]; bool var_4036_interleave_0 = const()[name = string("op_4036_interleave_0"), val = bool(false)]; tensor var_4036 = concat(axis = var_4035, interleave = var_4036_interleave_0, values = (var_4033, var_4031_0))[name = string("op_4036")]; tensor var_4037 = mul(x = var_4036, y = sin_1)[name = string("op_4037")]; tensor k_41 = add(x = var_4030, y = var_4037)[name = string("k_41")]; tensor var_4042_begin_0 = const()[name = string("op_4042_begin_0"), val = tensor([6, 0, 0, 0])]; tensor var_4042_end_0 = const()[name = string("op_4042_end_0"), val = tensor([7, 1, 2048, 256])]; tensor var_4042_end_mask_0 = const()[name = string("op_4042_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4042_squeeze_mask_0 = const()[name = string("op_4042_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_4042_cast_fp16 = slice_by_index(begin = var_4042_begin_0, end = var_4042_end_0, end_mask = var_4042_end_mask_0, squeeze_mask = var_4042_squeeze_mask_0, x = coreml_update_state_47)[name = string("op_4042_cast_fp16")]; tensor K_cache_13_axes_0 = const()[name = string("K_cache_13_axes_0"), val = tensor([0])]; tensor K_cache_13_cast_fp16 = expand_dims(axes = K_cache_13_axes_0, x = var_4042_cast_fp16)[name = string("K_cache_13_cast_fp16")]; tensor var_4047_begin_0 = const()[name = string("op_4047_begin_0"), val = tensor([24, 0, 0, 0])]; tensor var_4047_end_0 = const()[name = string("op_4047_end_0"), val = tensor([25, 1, 2048, 256])]; tensor var_4047_end_mask_0 = const()[name = string("op_4047_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4047_squeeze_mask_0 = const()[name = string("op_4047_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_4047_cast_fp16 = slice_by_index(begin = var_4047_begin_0, end = var_4047_end_0, end_mask = var_4047_end_mask_0, squeeze_mask = var_4047_squeeze_mask_0, x = coreml_update_state_47)[name = string("op_4047_cast_fp16")]; tensor V_cache_13_axes_0 = const()[name = string("V_cache_13_axes_0"), val = tensor([0])]; tensor V_cache_13_cast_fp16 = expand_dims(axes = V_cache_13_axes_0, x = var_4047_cast_fp16)[name = string("V_cache_13_cast_fp16")]; tensor k_broadcast_13_reps_0 = const()[name = string("k_broadcast_13_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor k_broadcast_13 = tile(reps = k_broadcast_13_reps_0, x = k_41)[name = string("k_broadcast_13")]; tensor v_broadcast_13_reps_0 = const()[name = string("v_broadcast_13_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor v_13 = transpose(perm = var_4018, x = var_4013)[name = string("transpose_106")]; tensor v_broadcast_13 = tile(reps = v_broadcast_13_reps_0, x = v_13)[name = string("v_broadcast_13")]; tensor var_4055_cast_fp16 = mul(x = K_cache_13_cast_fp16, y = var_1120_cast_fp16)[name = string("op_4055_cast_fp16")]; tensor var_4056_cast_fp16 = mul(x = k_broadcast_13, y = update_mask)[name = string("op_4056_cast_fp16")]; tensor K_new_13_cast_fp16 = add(x = var_4055_cast_fp16, y = var_4056_cast_fp16)[name = string("K_new_13_cast_fp16")]; tensor var_4062_cast_fp16 = mul(x = V_cache_13_cast_fp16, y = var_1120_cast_fp16)[name = string("op_4062_cast_fp16")]; tensor var_4063_cast_fp16 = mul(x = v_broadcast_13, y = update_mask)[name = string("op_4063_cast_fp16")]; tensor V_new_13_cast_fp16 = add(x = var_4062_cast_fp16, y = var_4063_cast_fp16)[name = string("V_new_13_cast_fp16")]; tensor var_4067_axes_0 = const()[name = string("op_4067_axes_0"), val = tensor([0])]; tensor var_4067_cast_fp16 = squeeze(axes = var_4067_axes_0, x = K_new_13_cast_fp16)[name = string("op_4067_cast_fp16")]; tensor concat_24 = const()[name = string("concat_24"), val = tensor([6, 0, 0, 0])]; tensor concat_25 = const()[name = string("concat_25"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_24, begin_mask = kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_25, end_mask = kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_13_stride_0, update = var_4067_cast_fp16, x = coreml_update_state_47)[name = string("kv_cache_0_internal_tensor_assign_13_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_13_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_48_write_state")]; tensor coreml_update_state_48 = read_state(input = kv_cache_0)[name = string("coreml_update_state_48")]; tensor var_4074_axes_0 = const()[name = string("op_4074_axes_0"), val = tensor([0])]; tensor var_4074_cast_fp16 = squeeze(axes = var_4074_axes_0, x = V_new_13_cast_fp16)[name = string("op_4074_cast_fp16")]; tensor concat_26 = const()[name = string("concat_26"), val = tensor([24, 0, 0, 0])]; tensor concat_27 = const()[name = string("concat_27"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_26, begin_mask = kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_27, end_mask = kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_14_stride_0, update = var_4074_cast_fp16, x = coreml_update_state_48)[name = string("kv_cache_0_internal_tensor_assign_14_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_14_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_49_write_state")]; tensor coreml_update_state_49 = read_state(input = kv_cache_0)[name = string("coreml_update_state_49")]; tensor hidden_states_51_axes_0 = const()[name = string("hidden_states_51_axes_0"), val = tensor([2])]; tensor hidden_states_51_cast_fp16 = expand_dims(axes = hidden_states_51_axes_0, x = K_new_13_cast_fp16)[name = string("hidden_states_51_cast_fp16")]; tensor var_4087 = const()[name = string("op_4087"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_53_cast_fp16 = tile(reps = var_4087, x = hidden_states_51_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; tensor var_4093 = const()[name = string("op_4093"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_13_cast_fp16 = reshape(shape = var_4093, x = hidden_states_53_cast_fp16)[name = string("K_expanded_13_cast_fp16")]; tensor hidden_states_55_axes_0 = const()[name = string("hidden_states_55_axes_0"), val = tensor([2])]; tensor hidden_states_55_cast_fp16 = expand_dims(axes = hidden_states_55_axes_0, x = V_new_13_cast_fp16)[name = string("hidden_states_55_cast_fp16")]; tensor var_4102 = const()[name = string("op_4102"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_57_cast_fp16 = tile(reps = var_4102, x = hidden_states_55_cast_fp16)[name = string("hidden_states_57_cast_fp16")]; tensor var_4108 = const()[name = string("op_4108"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_13_cast_fp16 = reshape(shape = var_4108, x = hidden_states_57_cast_fp16)[name = string("V_expanded_13_cast_fp16")]; bool var_4123_transpose_x_1 = const()[name = string("op_4123_transpose_x_1"), val = bool(false)]; bool var_4123_transpose_y_1 = const()[name = string("op_4123_transpose_y_1"), val = bool(true)]; tensor var_4123_cast_fp16 = matmul(transpose_x = var_4123_transpose_x_1, transpose_y = var_4123_transpose_y_1, x = q_41, y = K_expanded_13_cast_fp16)[name = string("op_4123_cast_fp16")]; fp16 var_4124_to_fp16 = const()[name = string("op_4124_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_37_cast_fp16 = mul(x = var_4123_cast_fp16, y = var_4124_to_fp16)[name = string("attn_weights_37_cast_fp16")]; tensor attn_weights_39_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = causal_mask)[name = string("attn_weights_39_cast_fp16")]; int32 var_4133 = const()[name = string("op_4133"), val = int32(-1)]; tensor var_4135_cast_fp16 = softmax(axis = var_4133, x = attn_weights_39_cast_fp16)[name = string("op_4135_cast_fp16")]; bool var_4151_transpose_x_0 = const()[name = string("op_4151_transpose_x_0"), val = bool(false)]; bool var_4151_transpose_y_0 = const()[name = string("op_4151_transpose_y_0"), val = bool(false)]; tensor var_4151_cast_fp16 = matmul(transpose_x = var_4151_transpose_x_0, transpose_y = var_4151_transpose_y_0, x = var_4135_cast_fp16, y = V_expanded_13_cast_fp16)[name = string("op_4151_cast_fp16")]; tensor var_4161 = const()[name = string("op_4161"), val = tensor([0, 2, 1, 3])]; tensor var_4168 = const()[name = string("op_4168"), val = tensor([1, 1, -1])]; tensor var_4162 = transpose(perm = var_4161, x = var_4151_cast_fp16)[name = string("transpose_105")]; tensor attn_output_39 = reshape(shape = var_4168, x = var_4162)[name = string("attn_output_39")]; tensor var_4173 = const()[name = string("op_4173"), val = tensor([0, 2, 1])]; tensor squeeze_6 = const()[name = string("squeeze_6"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528793792)))]; string var_4189_pad_type_0 = const()[name = string("op_4189_pad_type_0"), val = string("valid")]; int32 var_4189_groups_0 = const()[name = string("op_4189_groups_0"), val = int32(1)]; tensor var_4189_strides_0 = const()[name = string("op_4189_strides_0"), val = tensor([1])]; tensor var_4189_pad_0 = const()[name = string("op_4189_pad_0"), val = tensor([0, 0])]; tensor var_4189_dilations_0 = const()[name = string("op_4189_dilations_0"), val = tensor([1])]; tensor var_4174 = transpose(perm = var_4173, x = attn_output_39)[name = string("transpose_104")]; tensor var_4189 = conv(dilations = var_4189_dilations_0, groups = var_4189_groups_0, pad = var_4189_pad_0, pad_type = var_4189_pad_type_0, strides = var_4189_strides_0, weight = squeeze_6, x = var_4174)[name = string("op_4189")]; tensor var_4193 = const()[name = string("op_4193"), val = tensor([0, 2, 1])]; int32 var_4200 = const()[name = string("op_4200"), val = int32(-1)]; fp16 const_92_promoted_to_fp16 = const()[name = string("const_92_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_105 = transpose(perm = var_4193, x = var_4189)[name = string("transpose_103")]; tensor var_4206_cast_fp16 = mul(x = x_105, y = const_92_promoted_to_fp16)[name = string("op_4206_cast_fp16")]; bool input_131_interleave_0 = const()[name = string("input_131_interleave_0"), val = bool(false)]; tensor input_131_cast_fp16 = concat(axis = var_4200, interleave = input_131_interleave_0, values = (x_105, var_4206_cast_fp16))[name = string("input_131_cast_fp16")]; tensor normed_183_axes_0 = const()[name = string("normed_183_axes_0"), val = tensor([-1])]; fp16 var_4198_to_fp16 = const()[name = string("op_4198_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_183_cast_fp16 = layer_norm(axes = normed_183_axes_0, epsilon = var_4198_to_fp16, x = input_131_cast_fp16)[name = string("normed_183_cast_fp16")]; tensor var_4211_split_sizes_0 = const()[name = string("op_4211_split_sizes_0"), val = tensor([640, 640])]; int32 var_4211_axis_0 = const()[name = string("op_4211_axis_0"), val = int32(-1)]; tensor var_4211_cast_fp16_0, tensor var_4211_cast_fp16_1 = split(axis = var_4211_axis_0, split_sizes = var_4211_split_sizes_0, x = normed_183_cast_fp16)[name = string("op_4211_cast_fp16")]; tensor var_4215_to_fp16 = const()[name = string("op_4215_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530104576)))]; tensor out_79_cast_fp16 = mul(x = var_4211_cast_fp16_0, y = var_4215_to_fp16)[name = string("out_79_cast_fp16")]; tensor x_107_cast_fp16 = add(x = x_97_cast_fp16, y = out_79_cast_fp16)[name = string("x_107_cast_fp16")]; int32 var_4229 = const()[name = string("op_4229"), val = int32(-1)]; fp16 const_94_promoted_to_fp16 = const()[name = string("const_94_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4235_cast_fp16 = mul(x = x_107_cast_fp16, y = const_94_promoted_to_fp16)[name = string("op_4235_cast_fp16")]; bool input_133_interleave_0 = const()[name = string("input_133_interleave_0"), val = bool(false)]; tensor input_133_cast_fp16 = concat(axis = var_4229, interleave = input_133_interleave_0, values = (x_107_cast_fp16, var_4235_cast_fp16))[name = string("input_133_cast_fp16")]; tensor normed_187_axes_0 = const()[name = string("normed_187_axes_0"), val = tensor([-1])]; fp16 var_4227_to_fp16 = const()[name = string("op_4227_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_187_cast_fp16 = layer_norm(axes = normed_187_axes_0, epsilon = var_4227_to_fp16, x = input_133_cast_fp16)[name = string("normed_187_cast_fp16")]; tensor var_4240_split_sizes_0 = const()[name = string("op_4240_split_sizes_0"), val = tensor([640, 640])]; int32 var_4240_axis_0 = const()[name = string("op_4240_axis_0"), val = int32(-1)]; tensor var_4240_cast_fp16_0, tensor var_4240_cast_fp16_1 = split(axis = var_4240_axis_0, split_sizes = var_4240_split_sizes_0, x = normed_187_cast_fp16)[name = string("op_4240_cast_fp16")]; tensor var_4244_to_fp16 = const()[name = string("op_4244_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530105920)))]; tensor out_81_cast_fp16 = mul(x = var_4240_cast_fp16_0, y = var_4244_to_fp16)[name = string("out_81_cast_fp16")]; tensor var_4258 = const()[name = string("op_4258"), val = tensor([0, 2, 1])]; tensor input_135_axes_0 = const()[name = string("input_135_axes_0"), val = tensor([2])]; tensor var_4259 = transpose(perm = var_4258, x = out_81_cast_fp16)[name = string("transpose_102")]; tensor input_135 = expand_dims(axes = input_135_axes_0, x = var_4259)[name = string("input_135")]; string gate_25_pad_type_0 = const()[name = string("gate_25_pad_type_0"), val = string("valid")]; tensor gate_25_strides_0 = const()[name = string("gate_25_strides_0"), val = tensor([1, 1])]; tensor gate_25_pad_0 = const()[name = string("gate_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_25_dilations_0 = const()[name = string("gate_25_dilations_0"), val = tensor([1, 1])]; int32 gate_25_groups_0 = const()[name = string("gate_25_groups_0"), val = int32(1)]; tensor gate_25 = conv(dilations = gate_25_dilations_0, groups = gate_25_groups_0, pad = gate_25_pad_0, pad_type = gate_25_pad_type_0, strides = gate_25_strides_0, weight = layers_6_mlp_gate_proj_weight, x = input_135)[name = string("gate_25")]; string up_13_pad_type_0 = const()[name = string("up_13_pad_type_0"), val = string("valid")]; tensor up_13_strides_0 = const()[name = string("up_13_strides_0"), val = tensor([1, 1])]; tensor up_13_pad_0 = const()[name = string("up_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_13_dilations_0 = const()[name = string("up_13_dilations_0"), val = tensor([1, 1])]; int32 up_13_groups_0 = const()[name = string("up_13_groups_0"), val = int32(1)]; tensor up_13 = conv(dilations = up_13_dilations_0, groups = up_13_groups_0, pad = up_13_pad_0, pad_type = up_13_pad_type_0, strides = up_13_strides_0, weight = layers_6_mlp_up_proj_weight, x = input_135)[name = string("up_13")]; string gate_27_mode_0 = const()[name = string("gate_27_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_27 = gelu(mode = gate_27_mode_0, x = gate_25)[name = string("gate_27")]; tensor input_137 = mul(x = gate_27, y = up_13)[name = string("input_137")]; string mlp_out_25_pad_type_0 = const()[name = string("mlp_out_25_pad_type_0"), val = string("valid")]; tensor mlp_out_25_strides_0 = const()[name = string("mlp_out_25_strides_0"), val = tensor([1, 1])]; tensor mlp_out_25_pad_0 = const()[name = string("mlp_out_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_25_dilations_0 = const()[name = string("mlp_out_25_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_25_groups_0 = const()[name = string("mlp_out_25_groups_0"), val = int32(1)]; tensor mlp_out_25 = conv(dilations = mlp_out_25_dilations_0, groups = mlp_out_25_groups_0, pad = mlp_out_25_pad_0, pad_type = mlp_out_25_pad_type_0, strides = mlp_out_25_strides_0, weight = layers_6_mlp_down_proj_weight, x = input_137)[name = string("mlp_out_25")]; tensor var_4299_axes_0 = const()[name = string("op_4299_axes_0"), val = tensor([2])]; tensor var_4299 = squeeze(axes = var_4299_axes_0, x = mlp_out_25)[name = string("op_4299")]; tensor var_4303 = const()[name = string("op_4303"), val = tensor([0, 2, 1])]; int32 var_4310 = const()[name = string("op_4310"), val = int32(-1)]; fp16 const_96_promoted_to_fp16 = const()[name = string("const_96_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_111 = transpose(perm = var_4303, x = var_4299)[name = string("transpose_101")]; tensor var_4316_cast_fp16 = mul(x = x_111, y = const_96_promoted_to_fp16)[name = string("op_4316_cast_fp16")]; bool input_139_interleave_0 = const()[name = string("input_139_interleave_0"), val = bool(false)]; tensor input_139_cast_fp16 = concat(axis = var_4310, interleave = input_139_interleave_0, values = (x_111, var_4316_cast_fp16))[name = string("input_139_cast_fp16")]; tensor normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor([-1])]; fp16 var_4308_to_fp16 = const()[name = string("op_4308_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_4308_to_fp16, x = input_139_cast_fp16)[name = string("normed_193_cast_fp16")]; tensor var_4321_split_sizes_0 = const()[name = string("op_4321_split_sizes_0"), val = tensor([640, 640])]; int32 var_4321_axis_0 = const()[name = string("op_4321_axis_0"), val = int32(-1)]; tensor var_4321_cast_fp16_0, tensor var_4321_cast_fp16_1 = split(axis = var_4321_axis_0, split_sizes = var_4321_split_sizes_0, x = normed_193_cast_fp16)[name = string("op_4321_cast_fp16")]; tensor var_4325_to_fp16 = const()[name = string("op_4325_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530107264)))]; tensor out_83_cast_fp16 = mul(x = var_4321_cast_fp16_0, y = var_4325_to_fp16)[name = string("out_83_cast_fp16")]; tensor x_113_cast_fp16 = add(x = x_107_cast_fp16, y = out_83_cast_fp16)[name = string("x_113_cast_fp16")]; int32 var_4339 = const()[name = string("op_4339"), val = int32(-1)]; fp16 const_98_promoted_to_fp16 = const()[name = string("const_98_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4345_cast_fp16 = mul(x = x_113_cast_fp16, y = const_98_promoted_to_fp16)[name = string("op_4345_cast_fp16")]; bool input_141_interleave_0 = const()[name = string("input_141_interleave_0"), val = bool(false)]; tensor input_141_cast_fp16 = concat(axis = var_4339, interleave = input_141_interleave_0, values = (x_113_cast_fp16, var_4345_cast_fp16))[name = string("input_141_cast_fp16")]; tensor normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor([-1])]; fp16 var_4337_to_fp16 = const()[name = string("op_4337_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_4337_to_fp16, x = input_141_cast_fp16)[name = string("normed_197_cast_fp16")]; tensor var_4350_split_sizes_0 = const()[name = string("op_4350_split_sizes_0"), val = tensor([640, 640])]; int32 var_4350_axis_0 = const()[name = string("op_4350_axis_0"), val = int32(-1)]; tensor var_4350_cast_fp16_0, tensor var_4350_cast_fp16_1 = split(axis = var_4350_axis_0, split_sizes = var_4350_split_sizes_0, x = normed_197_cast_fp16)[name = string("op_4350_cast_fp16")]; tensor var_4354_to_fp16 = const()[name = string("op_4354_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530108608)))]; tensor out_85_cast_fp16 = mul(x = var_4350_cast_fp16_0, y = var_4354_to_fp16)[name = string("out_85_cast_fp16")]; tensor var_4368 = const()[name = string("op_4368"), val = tensor([0, 2, 1])]; tensor input_143_axes_0 = const()[name = string("input_143_axes_0"), val = tensor([2])]; tensor var_4369 = transpose(perm = var_4368, x = out_85_cast_fp16)[name = string("transpose_100")]; tensor input_143 = expand_dims(axes = input_143_axes_0, x = var_4369)[name = string("input_143")]; string var_4382_pad_type_0 = const()[name = string("op_4382_pad_type_0"), val = string("valid")]; tensor var_4382_strides_0 = const()[name = string("op_4382_strides_0"), val = tensor([1, 1])]; tensor var_4382_pad_0 = const()[name = string("op_4382_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4382_dilations_0 = const()[name = string("op_4382_dilations_0"), val = tensor([1, 1])]; int32 var_4382_groups_0 = const()[name = string("op_4382_groups_0"), val = int32(1)]; tensor var_4382 = conv(dilations = var_4382_dilations_0, groups = var_4382_groups_0, pad = var_4382_pad_0, pad_type = var_4382_pad_type_0, strides = var_4382_strides_0, weight = layers_7_self_attn_q_proj_weight, x = input_143)[name = string("op_4382")]; tensor var_4387 = const()[name = string("op_4387"), val = tensor([1, 4, 256, 1])]; tensor var_4388 = reshape(shape = var_4387, x = var_4382)[name = string("op_4388")]; tensor var_4393 = const()[name = string("op_4393"), val = tensor([0, 1, 3, 2])]; tensor var_4398 = const()[name = string("op_4398"), val = tensor([1, 4, 256])]; tensor q_43 = transpose(perm = var_4393, x = var_4388)[name = string("transpose_99")]; tensor x_117 = reshape(shape = var_4398, x = q_43)[name = string("x_117")]; int32 var_4405 = const()[name = string("op_4405"), val = int32(-1)]; fp16 const_100_promoted_to_fp16 = const()[name = string("const_100_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4411_cast_fp16 = mul(x = x_117, y = const_100_promoted_to_fp16)[name = string("op_4411_cast_fp16")]; bool input_145_interleave_0 = const()[name = string("input_145_interleave_0"), val = bool(false)]; tensor input_145_cast_fp16 = concat(axis = var_4405, interleave = input_145_interleave_0, values = (x_117, var_4411_cast_fp16))[name = string("input_145_cast_fp16")]; tensor normed_203_axes_0 = const()[name = string("normed_203_axes_0"), val = tensor([-1])]; fp16 var_4403_to_fp16 = const()[name = string("op_4403_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_203_cast_fp16 = layer_norm(axes = normed_203_axes_0, epsilon = var_4403_to_fp16, x = input_145_cast_fp16)[name = string("normed_203_cast_fp16")]; tensor var_4416_split_sizes_0 = const()[name = string("op_4416_split_sizes_0"), val = tensor([256, 256])]; int32 var_4416_axis_0 = const()[name = string("op_4416_axis_0"), val = int32(-1)]; tensor var_4416_cast_fp16_0, tensor var_4416_cast_fp16_1 = split(axis = var_4416_axis_0, split_sizes = var_4416_split_sizes_0, x = normed_203_cast_fp16)[name = string("op_4416_cast_fp16")]; tensor var_4420_to_fp16 = const()[name = string("op_4420_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530109952)))]; tensor out_87_cast_fp16 = mul(x = var_4416_cast_fp16_0, y = var_4420_to_fp16)[name = string("out_87_cast_fp16")]; tensor var_4427 = const()[name = string("op_4427"), val = tensor([1, 4, 1, 256])]; tensor q_45 = reshape(shape = var_4427, x = out_87_cast_fp16)[name = string("q_45")]; string var_4439_pad_type_0 = const()[name = string("op_4439_pad_type_0"), val = string("valid")]; tensor var_4439_strides_0 = const()[name = string("op_4439_strides_0"), val = tensor([1, 1])]; tensor var_4439_pad_0 = const()[name = string("op_4439_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4439_dilations_0 = const()[name = string("op_4439_dilations_0"), val = tensor([1, 1])]; int32 var_4439_groups_0 = const()[name = string("op_4439_groups_0"), val = int32(1)]; tensor var_4439 = conv(dilations = var_4439_dilations_0, groups = var_4439_groups_0, pad = var_4439_pad_0, pad_type = var_4439_pad_type_0, strides = var_4439_strides_0, weight = layers_7_self_attn_k_proj_weight, x = input_143)[name = string("op_4439")]; tensor var_4444 = const()[name = string("op_4444"), val = tensor([1, 1, 256, 1])]; tensor var_4445 = reshape(shape = var_4444, x = var_4439)[name = string("op_4445")]; tensor var_4450 = const()[name = string("op_4450"), val = tensor([0, 1, 3, 2])]; tensor var_4455 = const()[name = string("op_4455"), val = tensor([1, 1, 256])]; tensor k_43 = transpose(perm = var_4450, x = var_4445)[name = string("transpose_98")]; tensor x_119 = reshape(shape = var_4455, x = k_43)[name = string("x_119")]; int32 var_4462 = const()[name = string("op_4462"), val = int32(-1)]; fp16 const_102_promoted_to_fp16 = const()[name = string("const_102_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4468_cast_fp16 = mul(x = x_119, y = const_102_promoted_to_fp16)[name = string("op_4468_cast_fp16")]; bool input_147_interleave_0 = const()[name = string("input_147_interleave_0"), val = bool(false)]; tensor input_147_cast_fp16 = concat(axis = var_4462, interleave = input_147_interleave_0, values = (x_119, var_4468_cast_fp16))[name = string("input_147_cast_fp16")]; tensor normed_207_axes_0 = const()[name = string("normed_207_axes_0"), val = tensor([-1])]; fp16 var_4460_to_fp16 = const()[name = string("op_4460_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_207_cast_fp16 = layer_norm(axes = normed_207_axes_0, epsilon = var_4460_to_fp16, x = input_147_cast_fp16)[name = string("normed_207_cast_fp16")]; tensor var_4473_split_sizes_0 = const()[name = string("op_4473_split_sizes_0"), val = tensor([256, 256])]; int32 var_4473_axis_0 = const()[name = string("op_4473_axis_0"), val = int32(-1)]; tensor var_4473_cast_fp16_0, tensor var_4473_cast_fp16_1 = split(axis = var_4473_axis_0, split_sizes = var_4473_split_sizes_0, x = normed_207_cast_fp16)[name = string("op_4473_cast_fp16")]; tensor var_4477_to_fp16 = const()[name = string("op_4477_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530110528)))]; tensor out_89_cast_fp16 = mul(x = var_4473_cast_fp16_0, y = var_4477_to_fp16)[name = string("out_89_cast_fp16")]; tensor var_4484 = const()[name = string("op_4484"), val = tensor([1, 1, 1, 256])]; tensor k_45 = reshape(shape = var_4484, x = out_89_cast_fp16)[name = string("k_45")]; string var_4496_pad_type_0 = const()[name = string("op_4496_pad_type_0"), val = string("valid")]; tensor var_4496_strides_0 = const()[name = string("op_4496_strides_0"), val = tensor([1, 1])]; tensor var_4496_pad_0 = const()[name = string("op_4496_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4496_dilations_0 = const()[name = string("op_4496_dilations_0"), val = tensor([1, 1])]; int32 var_4496_groups_0 = const()[name = string("op_4496_groups_0"), val = int32(1)]; tensor var_4496 = conv(dilations = var_4496_dilations_0, groups = var_4496_groups_0, pad = var_4496_pad_0, pad_type = var_4496_pad_type_0, strides = var_4496_strides_0, weight = layers_7_self_attn_v_proj_weight, x = input_143)[name = string("op_4496")]; tensor var_4501 = const()[name = string("op_4501"), val = tensor([1, 1, 256, 1])]; tensor var_4502 = reshape(shape = var_4501, x = var_4496)[name = string("op_4502")]; tensor var_4507 = const()[name = string("op_4507"), val = tensor([0, 1, 3, 2])]; tensor var_4509 = mul(x = q_45, y = cos_1)[name = string("op_4509")]; tensor var_4510_split_sizes_0 = const()[name = string("op_4510_split_sizes_0"), val = tensor([128, 128])]; int32 var_4510_axis_0 = const()[name = string("op_4510_axis_0"), val = int32(-1)]; tensor var_4510_0, tensor var_4510_1 = split(axis = var_4510_axis_0, split_sizes = var_4510_split_sizes_0, x = q_45)[name = string("op_4510")]; fp16 const_104_promoted = const()[name = string("const_104_promoted"), val = fp16(-0x1p+0)]; tensor var_4512 = mul(x = var_4510_1, y = const_104_promoted)[name = string("op_4512")]; int32 var_4514 = const()[name = string("op_4514"), val = int32(-1)]; bool var_4515_interleave_0 = const()[name = string("op_4515_interleave_0"), val = bool(false)]; tensor var_4515 = concat(axis = var_4514, interleave = var_4515_interleave_0, values = (var_4512, var_4510_0))[name = string("op_4515")]; tensor var_4516 = mul(x = var_4515, y = sin_1)[name = string("op_4516")]; tensor q_47 = add(x = var_4509, y = var_4516)[name = string("q_47")]; tensor var_4519 = mul(x = k_45, y = cos_1)[name = string("op_4519")]; tensor var_4520_split_sizes_0 = const()[name = string("op_4520_split_sizes_0"), val = tensor([128, 128])]; int32 var_4520_axis_0 = const()[name = string("op_4520_axis_0"), val = int32(-1)]; tensor var_4520_0, tensor var_4520_1 = split(axis = var_4520_axis_0, split_sizes = var_4520_split_sizes_0, x = k_45)[name = string("op_4520")]; fp16 const_105_promoted = const()[name = string("const_105_promoted"), val = fp16(-0x1p+0)]; tensor var_4522 = mul(x = var_4520_1, y = const_105_promoted)[name = string("op_4522")]; int32 var_4524 = const()[name = string("op_4524"), val = int32(-1)]; bool var_4525_interleave_0 = const()[name = string("op_4525_interleave_0"), val = bool(false)]; tensor var_4525 = concat(axis = var_4524, interleave = var_4525_interleave_0, values = (var_4522, var_4520_0))[name = string("op_4525")]; tensor var_4526 = mul(x = var_4525, y = sin_1)[name = string("op_4526")]; tensor k_47 = add(x = var_4519, y = var_4526)[name = string("k_47")]; tensor var_4531_begin_0 = const()[name = string("op_4531_begin_0"), val = tensor([7, 0, 0, 0])]; tensor var_4531_end_0 = const()[name = string("op_4531_end_0"), val = tensor([8, 1, 2048, 256])]; tensor var_4531_end_mask_0 = const()[name = string("op_4531_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4531_squeeze_mask_0 = const()[name = string("op_4531_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_4531_cast_fp16 = slice_by_index(begin = var_4531_begin_0, end = var_4531_end_0, end_mask = var_4531_end_mask_0, squeeze_mask = var_4531_squeeze_mask_0, x = coreml_update_state_49)[name = string("op_4531_cast_fp16")]; tensor K_cache_15_axes_0 = const()[name = string("K_cache_15_axes_0"), val = tensor([0])]; tensor K_cache_15_cast_fp16 = expand_dims(axes = K_cache_15_axes_0, x = var_4531_cast_fp16)[name = string("K_cache_15_cast_fp16")]; tensor var_4536_begin_0 = const()[name = string("op_4536_begin_0"), val = tensor([25, 0, 0, 0])]; tensor var_4536_end_0 = const()[name = string("op_4536_end_0"), val = tensor([26, 1, 2048, 256])]; tensor var_4536_end_mask_0 = const()[name = string("op_4536_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4536_squeeze_mask_0 = const()[name = string("op_4536_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_4536_cast_fp16 = slice_by_index(begin = var_4536_begin_0, end = var_4536_end_0, end_mask = var_4536_end_mask_0, squeeze_mask = var_4536_squeeze_mask_0, x = coreml_update_state_49)[name = string("op_4536_cast_fp16")]; tensor V_cache_15_axes_0 = const()[name = string("V_cache_15_axes_0"), val = tensor([0])]; tensor V_cache_15_cast_fp16 = expand_dims(axes = V_cache_15_axes_0, x = var_4536_cast_fp16)[name = string("V_cache_15_cast_fp16")]; tensor k_broadcast_15_reps_0 = const()[name = string("k_broadcast_15_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor k_broadcast_15 = tile(reps = k_broadcast_15_reps_0, x = k_47)[name = string("k_broadcast_15")]; tensor v_broadcast_15_reps_0 = const()[name = string("v_broadcast_15_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor v_15 = transpose(perm = var_4507, x = var_4502)[name = string("transpose_97")]; tensor v_broadcast_15 = tile(reps = v_broadcast_15_reps_0, x = v_15)[name = string("v_broadcast_15")]; tensor var_4544_cast_fp16 = mul(x = K_cache_15_cast_fp16, y = var_1120_cast_fp16)[name = string("op_4544_cast_fp16")]; tensor var_4545_cast_fp16 = mul(x = k_broadcast_15, y = update_mask)[name = string("op_4545_cast_fp16")]; tensor K_new_15_cast_fp16 = add(x = var_4544_cast_fp16, y = var_4545_cast_fp16)[name = string("K_new_15_cast_fp16")]; tensor var_4551_cast_fp16 = mul(x = V_cache_15_cast_fp16, y = var_1120_cast_fp16)[name = string("op_4551_cast_fp16")]; tensor var_4552_cast_fp16 = mul(x = v_broadcast_15, y = update_mask)[name = string("op_4552_cast_fp16")]; tensor V_new_15_cast_fp16 = add(x = var_4551_cast_fp16, y = var_4552_cast_fp16)[name = string("V_new_15_cast_fp16")]; tensor var_4556_axes_0 = const()[name = string("op_4556_axes_0"), val = tensor([0])]; tensor var_4556_cast_fp16 = squeeze(axes = var_4556_axes_0, x = K_new_15_cast_fp16)[name = string("op_4556_cast_fp16")]; tensor concat_28 = const()[name = string("concat_28"), val = tensor([7, 0, 0, 0])]; tensor concat_29 = const()[name = string("concat_29"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_15_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_15_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_15_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_28, begin_mask = kv_cache_0_internal_tensor_assign_15_begin_mask_0, end = concat_29, end_mask = kv_cache_0_internal_tensor_assign_15_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_15_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_15_stride_0, update = var_4556_cast_fp16, x = coreml_update_state_49)[name = string("kv_cache_0_internal_tensor_assign_15_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_15_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_50_write_state")]; tensor coreml_update_state_50 = read_state(input = kv_cache_0)[name = string("coreml_update_state_50")]; tensor var_4563_axes_0 = const()[name = string("op_4563_axes_0"), val = tensor([0])]; tensor var_4563_cast_fp16 = squeeze(axes = var_4563_axes_0, x = V_new_15_cast_fp16)[name = string("op_4563_cast_fp16")]; tensor concat_30 = const()[name = string("concat_30"), val = tensor([25, 0, 0, 0])]; tensor concat_31 = const()[name = string("concat_31"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_16_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_16_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_16_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_30, begin_mask = kv_cache_0_internal_tensor_assign_16_begin_mask_0, end = concat_31, end_mask = kv_cache_0_internal_tensor_assign_16_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_16_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_16_stride_0, update = var_4563_cast_fp16, x = coreml_update_state_50)[name = string("kv_cache_0_internal_tensor_assign_16_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_16_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_51_write_state")]; tensor coreml_update_state_51 = read_state(input = kv_cache_0)[name = string("coreml_update_state_51")]; tensor hidden_states_59_axes_0 = const()[name = string("hidden_states_59_axes_0"), val = tensor([2])]; tensor hidden_states_59_cast_fp16 = expand_dims(axes = hidden_states_59_axes_0, x = K_new_15_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; tensor var_4576 = const()[name = string("op_4576"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_61_cast_fp16 = tile(reps = var_4576, x = hidden_states_59_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; tensor var_4582 = const()[name = string("op_4582"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_15_cast_fp16 = reshape(shape = var_4582, x = hidden_states_61_cast_fp16)[name = string("K_expanded_15_cast_fp16")]; tensor hidden_states_63_axes_0 = const()[name = string("hidden_states_63_axes_0"), val = tensor([2])]; tensor hidden_states_63_cast_fp16 = expand_dims(axes = hidden_states_63_axes_0, x = V_new_15_cast_fp16)[name = string("hidden_states_63_cast_fp16")]; tensor var_4591 = const()[name = string("op_4591"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_65_cast_fp16 = tile(reps = var_4591, x = hidden_states_63_cast_fp16)[name = string("hidden_states_65_cast_fp16")]; tensor var_4597 = const()[name = string("op_4597"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_15_cast_fp16 = reshape(shape = var_4597, x = hidden_states_65_cast_fp16)[name = string("V_expanded_15_cast_fp16")]; bool var_4612_transpose_x_1 = const()[name = string("op_4612_transpose_x_1"), val = bool(false)]; bool var_4612_transpose_y_1 = const()[name = string("op_4612_transpose_y_1"), val = bool(true)]; tensor var_4612_cast_fp16 = matmul(transpose_x = var_4612_transpose_x_1, transpose_y = var_4612_transpose_y_1, x = q_47, y = K_expanded_15_cast_fp16)[name = string("op_4612_cast_fp16")]; fp16 var_4613_to_fp16 = const()[name = string("op_4613_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_43_cast_fp16 = mul(x = var_4612_cast_fp16, y = var_4613_to_fp16)[name = string("attn_weights_43_cast_fp16")]; tensor attn_weights_45_cast_fp16 = add(x = attn_weights_43_cast_fp16, y = causal_mask)[name = string("attn_weights_45_cast_fp16")]; int32 var_4622 = const()[name = string("op_4622"), val = int32(-1)]; tensor var_4624_cast_fp16 = softmax(axis = var_4622, x = attn_weights_45_cast_fp16)[name = string("op_4624_cast_fp16")]; bool var_4640_transpose_x_0 = const()[name = string("op_4640_transpose_x_0"), val = bool(false)]; bool var_4640_transpose_y_0 = const()[name = string("op_4640_transpose_y_0"), val = bool(false)]; tensor var_4640_cast_fp16 = matmul(transpose_x = var_4640_transpose_x_0, transpose_y = var_4640_transpose_y_0, x = var_4624_cast_fp16, y = V_expanded_15_cast_fp16)[name = string("op_4640_cast_fp16")]; tensor var_4650 = const()[name = string("op_4650"), val = tensor([0, 2, 1, 3])]; tensor var_4657 = const()[name = string("op_4657"), val = tensor([1, 1, -1])]; tensor var_4651 = transpose(perm = var_4650, x = var_4640_cast_fp16)[name = string("transpose_96")]; tensor attn_output_45 = reshape(shape = var_4657, x = var_4651)[name = string("attn_output_45")]; tensor var_4662 = const()[name = string("op_4662"), val = tensor([0, 2, 1])]; tensor squeeze_7 = const()[name = string("squeeze_7"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530111104)))]; string var_4678_pad_type_0 = const()[name = string("op_4678_pad_type_0"), val = string("valid")]; int32 var_4678_groups_0 = const()[name = string("op_4678_groups_0"), val = int32(1)]; tensor var_4678_strides_0 = const()[name = string("op_4678_strides_0"), val = tensor([1])]; tensor var_4678_pad_0 = const()[name = string("op_4678_pad_0"), val = tensor([0, 0])]; tensor var_4678_dilations_0 = const()[name = string("op_4678_dilations_0"), val = tensor([1])]; tensor var_4663 = transpose(perm = var_4662, x = attn_output_45)[name = string("transpose_95")]; tensor var_4678 = conv(dilations = var_4678_dilations_0, groups = var_4678_groups_0, pad = var_4678_pad_0, pad_type = var_4678_pad_type_0, strides = var_4678_strides_0, weight = squeeze_7, x = var_4663)[name = string("op_4678")]; tensor var_4682 = const()[name = string("op_4682"), val = tensor([0, 2, 1])]; int32 var_4689 = const()[name = string("op_4689"), val = int32(-1)]; fp16 const_106_promoted_to_fp16 = const()[name = string("const_106_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_121 = transpose(perm = var_4682, x = var_4678)[name = string("transpose_94")]; tensor var_4695_cast_fp16 = mul(x = x_121, y = const_106_promoted_to_fp16)[name = string("op_4695_cast_fp16")]; bool input_151_interleave_0 = const()[name = string("input_151_interleave_0"), val = bool(false)]; tensor input_151_cast_fp16 = concat(axis = var_4689, interleave = input_151_interleave_0, values = (x_121, var_4695_cast_fp16))[name = string("input_151_cast_fp16")]; tensor normed_211_axes_0 = const()[name = string("normed_211_axes_0"), val = tensor([-1])]; fp16 var_4687_to_fp16 = const()[name = string("op_4687_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_211_cast_fp16 = layer_norm(axes = normed_211_axes_0, epsilon = var_4687_to_fp16, x = input_151_cast_fp16)[name = string("normed_211_cast_fp16")]; tensor var_4700_split_sizes_0 = const()[name = string("op_4700_split_sizes_0"), val = tensor([640, 640])]; int32 var_4700_axis_0 = const()[name = string("op_4700_axis_0"), val = int32(-1)]; tensor var_4700_cast_fp16_0, tensor var_4700_cast_fp16_1 = split(axis = var_4700_axis_0, split_sizes = var_4700_split_sizes_0, x = normed_211_cast_fp16)[name = string("op_4700_cast_fp16")]; tensor var_4704_to_fp16 = const()[name = string("op_4704_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531421888)))]; tensor out_91_cast_fp16 = mul(x = var_4700_cast_fp16_0, y = var_4704_to_fp16)[name = string("out_91_cast_fp16")]; tensor x_123_cast_fp16 = add(x = x_113_cast_fp16, y = out_91_cast_fp16)[name = string("x_123_cast_fp16")]; int32 var_4718 = const()[name = string("op_4718"), val = int32(-1)]; fp16 const_108_promoted_to_fp16 = const()[name = string("const_108_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4724_cast_fp16 = mul(x = x_123_cast_fp16, y = const_108_promoted_to_fp16)[name = string("op_4724_cast_fp16")]; bool input_153_interleave_0 = const()[name = string("input_153_interleave_0"), val = bool(false)]; tensor input_153_cast_fp16 = concat(axis = var_4718, interleave = input_153_interleave_0, values = (x_123_cast_fp16, var_4724_cast_fp16))[name = string("input_153_cast_fp16")]; tensor normed_215_axes_0 = const()[name = string("normed_215_axes_0"), val = tensor([-1])]; fp16 var_4716_to_fp16 = const()[name = string("op_4716_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_215_cast_fp16 = layer_norm(axes = normed_215_axes_0, epsilon = var_4716_to_fp16, x = input_153_cast_fp16)[name = string("normed_215_cast_fp16")]; tensor var_4729_split_sizes_0 = const()[name = string("op_4729_split_sizes_0"), val = tensor([640, 640])]; int32 var_4729_axis_0 = const()[name = string("op_4729_axis_0"), val = int32(-1)]; tensor var_4729_cast_fp16_0, tensor var_4729_cast_fp16_1 = split(axis = var_4729_axis_0, split_sizes = var_4729_split_sizes_0, x = normed_215_cast_fp16)[name = string("op_4729_cast_fp16")]; tensor var_4733_to_fp16 = const()[name = string("op_4733_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531423232)))]; tensor out_93_cast_fp16 = mul(x = var_4729_cast_fp16_0, y = var_4733_to_fp16)[name = string("out_93_cast_fp16")]; tensor var_4747 = const()[name = string("op_4747"), val = tensor([0, 2, 1])]; tensor input_155_axes_0 = const()[name = string("input_155_axes_0"), val = tensor([2])]; tensor var_4748 = transpose(perm = var_4747, x = out_93_cast_fp16)[name = string("transpose_93")]; tensor input_155 = expand_dims(axes = input_155_axes_0, x = var_4748)[name = string("input_155")]; string gate_29_pad_type_0 = const()[name = string("gate_29_pad_type_0"), val = string("valid")]; tensor gate_29_strides_0 = const()[name = string("gate_29_strides_0"), val = tensor([1, 1])]; tensor gate_29_pad_0 = const()[name = string("gate_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_29_dilations_0 = const()[name = string("gate_29_dilations_0"), val = tensor([1, 1])]; int32 gate_29_groups_0 = const()[name = string("gate_29_groups_0"), val = int32(1)]; tensor gate_29 = conv(dilations = gate_29_dilations_0, groups = gate_29_groups_0, pad = gate_29_pad_0, pad_type = gate_29_pad_type_0, strides = gate_29_strides_0, weight = layers_7_mlp_gate_proj_weight, x = input_155)[name = string("gate_29")]; string up_15_pad_type_0 = const()[name = string("up_15_pad_type_0"), val = string("valid")]; tensor up_15_strides_0 = const()[name = string("up_15_strides_0"), val = tensor([1, 1])]; tensor up_15_pad_0 = const()[name = string("up_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_15_dilations_0 = const()[name = string("up_15_dilations_0"), val = tensor([1, 1])]; int32 up_15_groups_0 = const()[name = string("up_15_groups_0"), val = int32(1)]; tensor up_15 = conv(dilations = up_15_dilations_0, groups = up_15_groups_0, pad = up_15_pad_0, pad_type = up_15_pad_type_0, strides = up_15_strides_0, weight = layers_7_mlp_up_proj_weight, x = input_155)[name = string("up_15")]; string gate_31_mode_0 = const()[name = string("gate_31_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_31 = gelu(mode = gate_31_mode_0, x = gate_29)[name = string("gate_31")]; tensor input_157 = mul(x = gate_31, y = up_15)[name = string("input_157")]; string mlp_out_29_pad_type_0 = const()[name = string("mlp_out_29_pad_type_0"), val = string("valid")]; tensor mlp_out_29_strides_0 = const()[name = string("mlp_out_29_strides_0"), val = tensor([1, 1])]; tensor mlp_out_29_pad_0 = const()[name = string("mlp_out_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_29_dilations_0 = const()[name = string("mlp_out_29_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_29_groups_0 = const()[name = string("mlp_out_29_groups_0"), val = int32(1)]; tensor mlp_out_29 = conv(dilations = mlp_out_29_dilations_0, groups = mlp_out_29_groups_0, pad = mlp_out_29_pad_0, pad_type = mlp_out_29_pad_type_0, strides = mlp_out_29_strides_0, weight = layers_7_mlp_down_proj_weight, x = input_157)[name = string("mlp_out_29")]; tensor var_4788_axes_0 = const()[name = string("op_4788_axes_0"), val = tensor([2])]; tensor var_4788 = squeeze(axes = var_4788_axes_0, x = mlp_out_29)[name = string("op_4788")]; tensor var_4792 = const()[name = string("op_4792"), val = tensor([0, 2, 1])]; int32 var_4799 = const()[name = string("op_4799"), val = int32(-1)]; fp16 const_110_promoted_to_fp16 = const()[name = string("const_110_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_127 = transpose(perm = var_4792, x = var_4788)[name = string("transpose_92")]; tensor var_4805_cast_fp16 = mul(x = x_127, y = const_110_promoted_to_fp16)[name = string("op_4805_cast_fp16")]; bool input_159_interleave_0 = const()[name = string("input_159_interleave_0"), val = bool(false)]; tensor input_159_cast_fp16 = concat(axis = var_4799, interleave = input_159_interleave_0, values = (x_127, var_4805_cast_fp16))[name = string("input_159_cast_fp16")]; tensor normed_221_axes_0 = const()[name = string("normed_221_axes_0"), val = tensor([-1])]; fp16 var_4797_to_fp16 = const()[name = string("op_4797_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_221_cast_fp16 = layer_norm(axes = normed_221_axes_0, epsilon = var_4797_to_fp16, x = input_159_cast_fp16)[name = string("normed_221_cast_fp16")]; tensor var_4810_split_sizes_0 = const()[name = string("op_4810_split_sizes_0"), val = tensor([640, 640])]; int32 var_4810_axis_0 = const()[name = string("op_4810_axis_0"), val = int32(-1)]; tensor var_4810_cast_fp16_0, tensor var_4810_cast_fp16_1 = split(axis = var_4810_axis_0, split_sizes = var_4810_split_sizes_0, x = normed_221_cast_fp16)[name = string("op_4810_cast_fp16")]; tensor var_4814_to_fp16 = const()[name = string("op_4814_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531424576)))]; tensor out_95_cast_fp16 = mul(x = var_4810_cast_fp16_0, y = var_4814_to_fp16)[name = string("out_95_cast_fp16")]; tensor x_129_cast_fp16 = add(x = x_123_cast_fp16, y = out_95_cast_fp16)[name = string("x_129_cast_fp16")]; int32 var_4828 = const()[name = string("op_4828"), val = int32(-1)]; fp16 const_112_promoted_to_fp16 = const()[name = string("const_112_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4834_cast_fp16 = mul(x = x_129_cast_fp16, y = const_112_promoted_to_fp16)[name = string("op_4834_cast_fp16")]; bool input_161_interleave_0 = const()[name = string("input_161_interleave_0"), val = bool(false)]; tensor input_161_cast_fp16 = concat(axis = var_4828, interleave = input_161_interleave_0, values = (x_129_cast_fp16, var_4834_cast_fp16))[name = string("input_161_cast_fp16")]; tensor normed_225_axes_0 = const()[name = string("normed_225_axes_0"), val = tensor([-1])]; fp16 var_4826_to_fp16 = const()[name = string("op_4826_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_225_cast_fp16 = layer_norm(axes = normed_225_axes_0, epsilon = var_4826_to_fp16, x = input_161_cast_fp16)[name = string("normed_225_cast_fp16")]; tensor var_4839_split_sizes_0 = const()[name = string("op_4839_split_sizes_0"), val = tensor([640, 640])]; int32 var_4839_axis_0 = const()[name = string("op_4839_axis_0"), val = int32(-1)]; tensor var_4839_cast_fp16_0, tensor var_4839_cast_fp16_1 = split(axis = var_4839_axis_0, split_sizes = var_4839_split_sizes_0, x = normed_225_cast_fp16)[name = string("op_4839_cast_fp16")]; tensor var_4843_to_fp16 = const()[name = string("op_4843_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531425920)))]; tensor out_97_cast_fp16 = mul(x = var_4839_cast_fp16_0, y = var_4843_to_fp16)[name = string("out_97_cast_fp16")]; tensor var_4857 = const()[name = string("op_4857"), val = tensor([0, 2, 1])]; tensor input_163_axes_0 = const()[name = string("input_163_axes_0"), val = tensor([2])]; tensor var_4858 = transpose(perm = var_4857, x = out_97_cast_fp16)[name = string("transpose_91")]; tensor input_163 = expand_dims(axes = input_163_axes_0, x = var_4858)[name = string("input_163")]; string var_4871_pad_type_0 = const()[name = string("op_4871_pad_type_0"), val = string("valid")]; tensor var_4871_strides_0 = const()[name = string("op_4871_strides_0"), val = tensor([1, 1])]; tensor var_4871_pad_0 = const()[name = string("op_4871_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4871_dilations_0 = const()[name = string("op_4871_dilations_0"), val = tensor([1, 1])]; int32 var_4871_groups_0 = const()[name = string("op_4871_groups_0"), val = int32(1)]; tensor var_4871 = conv(dilations = var_4871_dilations_0, groups = var_4871_groups_0, pad = var_4871_pad_0, pad_type = var_4871_pad_type_0, strides = var_4871_strides_0, weight = layers_8_self_attn_q_proj_weight, x = input_163)[name = string("op_4871")]; tensor var_4876 = const()[name = string("op_4876"), val = tensor([1, 4, 256, 1])]; tensor var_4877 = reshape(shape = var_4876, x = var_4871)[name = string("op_4877")]; tensor var_4882 = const()[name = string("op_4882"), val = tensor([0, 1, 3, 2])]; tensor var_4887 = const()[name = string("op_4887"), val = tensor([1, 4, 256])]; tensor q_49 = transpose(perm = var_4882, x = var_4877)[name = string("transpose_90")]; tensor x_133 = reshape(shape = var_4887, x = q_49)[name = string("x_133")]; int32 var_4894 = const()[name = string("op_4894"), val = int32(-1)]; fp16 const_114_promoted_to_fp16 = const()[name = string("const_114_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4900_cast_fp16 = mul(x = x_133, y = const_114_promoted_to_fp16)[name = string("op_4900_cast_fp16")]; bool input_165_interleave_0 = const()[name = string("input_165_interleave_0"), val = bool(false)]; tensor input_165_cast_fp16 = concat(axis = var_4894, interleave = input_165_interleave_0, values = (x_133, var_4900_cast_fp16))[name = string("input_165_cast_fp16")]; tensor normed_231_axes_0 = const()[name = string("normed_231_axes_0"), val = tensor([-1])]; fp16 var_4892_to_fp16 = const()[name = string("op_4892_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_231_cast_fp16 = layer_norm(axes = normed_231_axes_0, epsilon = var_4892_to_fp16, x = input_165_cast_fp16)[name = string("normed_231_cast_fp16")]; tensor var_4905_split_sizes_0 = const()[name = string("op_4905_split_sizes_0"), val = tensor([256, 256])]; int32 var_4905_axis_0 = const()[name = string("op_4905_axis_0"), val = int32(-1)]; tensor var_4905_cast_fp16_0, tensor var_4905_cast_fp16_1 = split(axis = var_4905_axis_0, split_sizes = var_4905_split_sizes_0, x = normed_231_cast_fp16)[name = string("op_4905_cast_fp16")]; tensor var_4909_to_fp16 = const()[name = string("op_4909_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531427264)))]; tensor out_99_cast_fp16 = mul(x = var_4905_cast_fp16_0, y = var_4909_to_fp16)[name = string("out_99_cast_fp16")]; tensor var_4916 = const()[name = string("op_4916"), val = tensor([1, 4, 1, 256])]; tensor q_51 = reshape(shape = var_4916, x = out_99_cast_fp16)[name = string("q_51")]; string var_4928_pad_type_0 = const()[name = string("op_4928_pad_type_0"), val = string("valid")]; tensor var_4928_strides_0 = const()[name = string("op_4928_strides_0"), val = tensor([1, 1])]; tensor var_4928_pad_0 = const()[name = string("op_4928_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4928_dilations_0 = const()[name = string("op_4928_dilations_0"), val = tensor([1, 1])]; int32 var_4928_groups_0 = const()[name = string("op_4928_groups_0"), val = int32(1)]; tensor var_4928 = conv(dilations = var_4928_dilations_0, groups = var_4928_groups_0, pad = var_4928_pad_0, pad_type = var_4928_pad_type_0, strides = var_4928_strides_0, weight = layers_8_self_attn_k_proj_weight, x = input_163)[name = string("op_4928")]; tensor var_4933 = const()[name = string("op_4933"), val = tensor([1, 1, 256, 1])]; tensor var_4934 = reshape(shape = var_4933, x = var_4928)[name = string("op_4934")]; tensor var_4939 = const()[name = string("op_4939"), val = tensor([0, 1, 3, 2])]; tensor var_4944 = const()[name = string("op_4944"), val = tensor([1, 1, 256])]; tensor k_49 = transpose(perm = var_4939, x = var_4934)[name = string("transpose_89")]; tensor x_135 = reshape(shape = var_4944, x = k_49)[name = string("x_135")]; int32 var_4951 = const()[name = string("op_4951"), val = int32(-1)]; fp16 const_116_promoted_to_fp16 = const()[name = string("const_116_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4957_cast_fp16 = mul(x = x_135, y = const_116_promoted_to_fp16)[name = string("op_4957_cast_fp16")]; bool input_167_interleave_0 = const()[name = string("input_167_interleave_0"), val = bool(false)]; tensor input_167_cast_fp16 = concat(axis = var_4951, interleave = input_167_interleave_0, values = (x_135, var_4957_cast_fp16))[name = string("input_167_cast_fp16")]; tensor normed_235_axes_0 = const()[name = string("normed_235_axes_0"), val = tensor([-1])]; fp16 var_4949_to_fp16 = const()[name = string("op_4949_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_235_cast_fp16 = layer_norm(axes = normed_235_axes_0, epsilon = var_4949_to_fp16, x = input_167_cast_fp16)[name = string("normed_235_cast_fp16")]; tensor var_4962_split_sizes_0 = const()[name = string("op_4962_split_sizes_0"), val = tensor([256, 256])]; int32 var_4962_axis_0 = const()[name = string("op_4962_axis_0"), val = int32(-1)]; tensor var_4962_cast_fp16_0, tensor var_4962_cast_fp16_1 = split(axis = var_4962_axis_0, split_sizes = var_4962_split_sizes_0, x = normed_235_cast_fp16)[name = string("op_4962_cast_fp16")]; tensor var_4966_to_fp16 = const()[name = string("op_4966_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531427840)))]; tensor out_101_cast_fp16 = mul(x = var_4962_cast_fp16_0, y = var_4966_to_fp16)[name = string("out_101_cast_fp16")]; tensor var_4973 = const()[name = string("op_4973"), val = tensor([1, 1, 1, 256])]; tensor k_51 = reshape(shape = var_4973, x = out_101_cast_fp16)[name = string("k_51")]; string var_4985_pad_type_0 = const()[name = string("op_4985_pad_type_0"), val = string("valid")]; tensor var_4985_strides_0 = const()[name = string("op_4985_strides_0"), val = tensor([1, 1])]; tensor var_4985_pad_0 = const()[name = string("op_4985_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4985_dilations_0 = const()[name = string("op_4985_dilations_0"), val = tensor([1, 1])]; int32 var_4985_groups_0 = const()[name = string("op_4985_groups_0"), val = int32(1)]; tensor var_4985 = conv(dilations = var_4985_dilations_0, groups = var_4985_groups_0, pad = var_4985_pad_0, pad_type = var_4985_pad_type_0, strides = var_4985_strides_0, weight = layers_8_self_attn_v_proj_weight, x = input_163)[name = string("op_4985")]; tensor var_4990 = const()[name = string("op_4990"), val = tensor([1, 1, 256, 1])]; tensor var_4991 = reshape(shape = var_4990, x = var_4985)[name = string("op_4991")]; tensor var_4996 = const()[name = string("op_4996"), val = tensor([0, 1, 3, 2])]; tensor var_4998 = mul(x = q_51, y = cos_1)[name = string("op_4998")]; tensor var_4999_split_sizes_0 = const()[name = string("op_4999_split_sizes_0"), val = tensor([128, 128])]; int32 var_4999_axis_0 = const()[name = string("op_4999_axis_0"), val = int32(-1)]; tensor var_4999_0, tensor var_4999_1 = split(axis = var_4999_axis_0, split_sizes = var_4999_split_sizes_0, x = q_51)[name = string("op_4999")]; fp16 const_118_promoted = const()[name = string("const_118_promoted"), val = fp16(-0x1p+0)]; tensor var_5001 = mul(x = var_4999_1, y = const_118_promoted)[name = string("op_5001")]; int32 var_5003 = const()[name = string("op_5003"), val = int32(-1)]; bool var_5004_interleave_0 = const()[name = string("op_5004_interleave_0"), val = bool(false)]; tensor var_5004 = concat(axis = var_5003, interleave = var_5004_interleave_0, values = (var_5001, var_4999_0))[name = string("op_5004")]; tensor var_5005 = mul(x = var_5004, y = sin_1)[name = string("op_5005")]; tensor q_53 = add(x = var_4998, y = var_5005)[name = string("q_53")]; tensor var_5008 = mul(x = k_51, y = cos_1)[name = string("op_5008")]; tensor var_5009_split_sizes_0 = const()[name = string("op_5009_split_sizes_0"), val = tensor([128, 128])]; int32 var_5009_axis_0 = const()[name = string("op_5009_axis_0"), val = int32(-1)]; tensor var_5009_0, tensor var_5009_1 = split(axis = var_5009_axis_0, split_sizes = var_5009_split_sizes_0, x = k_51)[name = string("op_5009")]; fp16 const_119_promoted = const()[name = string("const_119_promoted"), val = fp16(-0x1p+0)]; tensor var_5011 = mul(x = var_5009_1, y = const_119_promoted)[name = string("op_5011")]; int32 var_5013 = const()[name = string("op_5013"), val = int32(-1)]; bool var_5014_interleave_0 = const()[name = string("op_5014_interleave_0"), val = bool(false)]; tensor var_5014 = concat(axis = var_5013, interleave = var_5014_interleave_0, values = (var_5011, var_5009_0))[name = string("op_5014")]; tensor var_5015 = mul(x = var_5014, y = sin_1)[name = string("op_5015")]; tensor k_53 = add(x = var_5008, y = var_5015)[name = string("k_53")]; tensor var_5020_begin_0 = const()[name = string("op_5020_begin_0"), val = tensor([8, 0, 0, 0])]; tensor var_5020_end_0 = const()[name = string("op_5020_end_0"), val = tensor([9, 1, 2048, 256])]; tensor var_5020_end_mask_0 = const()[name = string("op_5020_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5020_squeeze_mask_0 = const()[name = string("op_5020_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_5020_cast_fp16 = slice_by_index(begin = var_5020_begin_0, end = var_5020_end_0, end_mask = var_5020_end_mask_0, squeeze_mask = var_5020_squeeze_mask_0, x = coreml_update_state_51)[name = string("op_5020_cast_fp16")]; tensor K_cache_17_axes_0 = const()[name = string("K_cache_17_axes_0"), val = tensor([0])]; tensor K_cache_17_cast_fp16 = expand_dims(axes = K_cache_17_axes_0, x = var_5020_cast_fp16)[name = string("K_cache_17_cast_fp16")]; tensor var_5025_begin_0 = const()[name = string("op_5025_begin_0"), val = tensor([26, 0, 0, 0])]; tensor var_5025_end_0 = const()[name = string("op_5025_end_0"), val = tensor([27, 1, 2048, 256])]; tensor var_5025_end_mask_0 = const()[name = string("op_5025_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5025_squeeze_mask_0 = const()[name = string("op_5025_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_5025_cast_fp16 = slice_by_index(begin = var_5025_begin_0, end = var_5025_end_0, end_mask = var_5025_end_mask_0, squeeze_mask = var_5025_squeeze_mask_0, x = coreml_update_state_51)[name = string("op_5025_cast_fp16")]; tensor V_cache_17_axes_0 = const()[name = string("V_cache_17_axes_0"), val = tensor([0])]; tensor V_cache_17_cast_fp16 = expand_dims(axes = V_cache_17_axes_0, x = var_5025_cast_fp16)[name = string("V_cache_17_cast_fp16")]; tensor k_broadcast_17_reps_0 = const()[name = string("k_broadcast_17_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor k_broadcast_17 = tile(reps = k_broadcast_17_reps_0, x = k_53)[name = string("k_broadcast_17")]; tensor v_broadcast_17_reps_0 = const()[name = string("v_broadcast_17_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor v_17 = transpose(perm = var_4996, x = var_4991)[name = string("transpose_88")]; tensor v_broadcast_17 = tile(reps = v_broadcast_17_reps_0, x = v_17)[name = string("v_broadcast_17")]; tensor var_5033_cast_fp16 = mul(x = K_cache_17_cast_fp16, y = var_1120_cast_fp16)[name = string("op_5033_cast_fp16")]; tensor var_5034_cast_fp16 = mul(x = k_broadcast_17, y = update_mask)[name = string("op_5034_cast_fp16")]; tensor K_new_17_cast_fp16 = add(x = var_5033_cast_fp16, y = var_5034_cast_fp16)[name = string("K_new_17_cast_fp16")]; tensor var_5040_cast_fp16 = mul(x = V_cache_17_cast_fp16, y = var_1120_cast_fp16)[name = string("op_5040_cast_fp16")]; tensor var_5041_cast_fp16 = mul(x = v_broadcast_17, y = update_mask)[name = string("op_5041_cast_fp16")]; tensor V_new_17_cast_fp16 = add(x = var_5040_cast_fp16, y = var_5041_cast_fp16)[name = string("V_new_17_cast_fp16")]; tensor var_5045_axes_0 = const()[name = string("op_5045_axes_0"), val = tensor([0])]; tensor var_5045_cast_fp16 = squeeze(axes = var_5045_axes_0, x = K_new_17_cast_fp16)[name = string("op_5045_cast_fp16")]; tensor concat_32 = const()[name = string("concat_32"), val = tensor([8, 0, 0, 0])]; tensor concat_33 = const()[name = string("concat_33"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_17_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_17_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_17_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_32, begin_mask = kv_cache_0_internal_tensor_assign_17_begin_mask_0, end = concat_33, end_mask = kv_cache_0_internal_tensor_assign_17_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_17_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_17_stride_0, update = var_5045_cast_fp16, x = coreml_update_state_51)[name = string("kv_cache_0_internal_tensor_assign_17_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_17_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_52_write_state")]; tensor coreml_update_state_52 = read_state(input = kv_cache_0)[name = string("coreml_update_state_52")]; tensor var_5052_axes_0 = const()[name = string("op_5052_axes_0"), val = tensor([0])]; tensor var_5052_cast_fp16 = squeeze(axes = var_5052_axes_0, x = V_new_17_cast_fp16)[name = string("op_5052_cast_fp16")]; tensor concat_34 = const()[name = string("concat_34"), val = tensor([26, 0, 0, 0])]; tensor concat_35 = const()[name = string("concat_35"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_18_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_18_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_18_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_34, begin_mask = kv_cache_0_internal_tensor_assign_18_begin_mask_0, end = concat_35, end_mask = kv_cache_0_internal_tensor_assign_18_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_18_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_18_stride_0, update = var_5052_cast_fp16, x = coreml_update_state_52)[name = string("kv_cache_0_internal_tensor_assign_18_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_18_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_53_write_state")]; tensor coreml_update_state_53 = read_state(input = kv_cache_0)[name = string("coreml_update_state_53")]; tensor hidden_states_67_axes_0 = const()[name = string("hidden_states_67_axes_0"), val = tensor([2])]; tensor hidden_states_67_cast_fp16 = expand_dims(axes = hidden_states_67_axes_0, x = K_new_17_cast_fp16)[name = string("hidden_states_67_cast_fp16")]; tensor var_5065 = const()[name = string("op_5065"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_69_cast_fp16 = tile(reps = var_5065, x = hidden_states_67_cast_fp16)[name = string("hidden_states_69_cast_fp16")]; tensor var_5071 = const()[name = string("op_5071"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_17_cast_fp16 = reshape(shape = var_5071, x = hidden_states_69_cast_fp16)[name = string("K_expanded_17_cast_fp16")]; tensor hidden_states_71_axes_0 = const()[name = string("hidden_states_71_axes_0"), val = tensor([2])]; tensor hidden_states_71_cast_fp16 = expand_dims(axes = hidden_states_71_axes_0, x = V_new_17_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; tensor var_5080 = const()[name = string("op_5080"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_73_cast_fp16 = tile(reps = var_5080, x = hidden_states_71_cast_fp16)[name = string("hidden_states_73_cast_fp16")]; tensor var_5086 = const()[name = string("op_5086"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_17_cast_fp16 = reshape(shape = var_5086, x = hidden_states_73_cast_fp16)[name = string("V_expanded_17_cast_fp16")]; bool var_5101_transpose_x_1 = const()[name = string("op_5101_transpose_x_1"), val = bool(false)]; bool var_5101_transpose_y_1 = const()[name = string("op_5101_transpose_y_1"), val = bool(true)]; tensor var_5101_cast_fp16 = matmul(transpose_x = var_5101_transpose_x_1, transpose_y = var_5101_transpose_y_1, x = q_53, y = K_expanded_17_cast_fp16)[name = string("op_5101_cast_fp16")]; fp16 var_5102_to_fp16 = const()[name = string("op_5102_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_49_cast_fp16 = mul(x = var_5101_cast_fp16, y = var_5102_to_fp16)[name = string("attn_weights_49_cast_fp16")]; tensor attn_weights_51_cast_fp16 = add(x = attn_weights_49_cast_fp16, y = causal_mask)[name = string("attn_weights_51_cast_fp16")]; int32 var_5111 = const()[name = string("op_5111"), val = int32(-1)]; tensor var_5113_cast_fp16 = softmax(axis = var_5111, x = attn_weights_51_cast_fp16)[name = string("op_5113_cast_fp16")]; bool var_5129_transpose_x_0 = const()[name = string("op_5129_transpose_x_0"), val = bool(false)]; bool var_5129_transpose_y_0 = const()[name = string("op_5129_transpose_y_0"), val = bool(false)]; tensor var_5129_cast_fp16 = matmul(transpose_x = var_5129_transpose_x_0, transpose_y = var_5129_transpose_y_0, x = var_5113_cast_fp16, y = V_expanded_17_cast_fp16)[name = string("op_5129_cast_fp16")]; tensor var_5139 = const()[name = string("op_5139"), val = tensor([0, 2, 1, 3])]; tensor var_5146 = const()[name = string("op_5146"), val = tensor([1, 1, -1])]; tensor var_5140 = transpose(perm = var_5139, x = var_5129_cast_fp16)[name = string("transpose_87")]; tensor attn_output_51 = reshape(shape = var_5146, x = var_5140)[name = string("attn_output_51")]; tensor var_5151 = const()[name = string("op_5151"), val = tensor([0, 2, 1])]; tensor squeeze_8 = const()[name = string("squeeze_8"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531428416)))]; string var_5167_pad_type_0 = const()[name = string("op_5167_pad_type_0"), val = string("valid")]; int32 var_5167_groups_0 = const()[name = string("op_5167_groups_0"), val = int32(1)]; tensor var_5167_strides_0 = const()[name = string("op_5167_strides_0"), val = tensor([1])]; tensor var_5167_pad_0 = const()[name = string("op_5167_pad_0"), val = tensor([0, 0])]; tensor var_5167_dilations_0 = const()[name = string("op_5167_dilations_0"), val = tensor([1])]; tensor var_5152 = transpose(perm = var_5151, x = attn_output_51)[name = string("transpose_86")]; tensor var_5167 = conv(dilations = var_5167_dilations_0, groups = var_5167_groups_0, pad = var_5167_pad_0, pad_type = var_5167_pad_type_0, strides = var_5167_strides_0, weight = squeeze_8, x = var_5152)[name = string("op_5167")]; tensor var_5171 = const()[name = string("op_5171"), val = tensor([0, 2, 1])]; int32 var_5178 = const()[name = string("op_5178"), val = int32(-1)]; fp16 const_120_promoted_to_fp16 = const()[name = string("const_120_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_137 = transpose(perm = var_5171, x = var_5167)[name = string("transpose_85")]; tensor var_5184_cast_fp16 = mul(x = x_137, y = const_120_promoted_to_fp16)[name = string("op_5184_cast_fp16")]; bool input_171_interleave_0 = const()[name = string("input_171_interleave_0"), val = bool(false)]; tensor input_171_cast_fp16 = concat(axis = var_5178, interleave = input_171_interleave_0, values = (x_137, var_5184_cast_fp16))[name = string("input_171_cast_fp16")]; tensor normed_239_axes_0 = const()[name = string("normed_239_axes_0"), val = tensor([-1])]; fp16 var_5176_to_fp16 = const()[name = string("op_5176_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_239_cast_fp16 = layer_norm(axes = normed_239_axes_0, epsilon = var_5176_to_fp16, x = input_171_cast_fp16)[name = string("normed_239_cast_fp16")]; tensor var_5189_split_sizes_0 = const()[name = string("op_5189_split_sizes_0"), val = tensor([640, 640])]; int32 var_5189_axis_0 = const()[name = string("op_5189_axis_0"), val = int32(-1)]; tensor var_5189_cast_fp16_0, tensor var_5189_cast_fp16_1 = split(axis = var_5189_axis_0, split_sizes = var_5189_split_sizes_0, x = normed_239_cast_fp16)[name = string("op_5189_cast_fp16")]; tensor var_5193_to_fp16 = const()[name = string("op_5193_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(532739200)))]; tensor out_103_cast_fp16 = mul(x = var_5189_cast_fp16_0, y = var_5193_to_fp16)[name = string("out_103_cast_fp16")]; tensor x_139_cast_fp16 = add(x = x_129_cast_fp16, y = out_103_cast_fp16)[name = string("x_139_cast_fp16")]; int32 var_5207 = const()[name = string("op_5207"), val = int32(-1)]; fp16 const_122_promoted_to_fp16 = const()[name = string("const_122_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5213_cast_fp16 = mul(x = x_139_cast_fp16, y = const_122_promoted_to_fp16)[name = string("op_5213_cast_fp16")]; bool input_173_interleave_0 = const()[name = string("input_173_interleave_0"), val = bool(false)]; tensor input_173_cast_fp16 = concat(axis = var_5207, interleave = input_173_interleave_0, values = (x_139_cast_fp16, var_5213_cast_fp16))[name = string("input_173_cast_fp16")]; tensor normed_243_axes_0 = const()[name = string("normed_243_axes_0"), val = tensor([-1])]; fp16 var_5205_to_fp16 = const()[name = string("op_5205_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_243_cast_fp16 = layer_norm(axes = normed_243_axes_0, epsilon = var_5205_to_fp16, x = input_173_cast_fp16)[name = string("normed_243_cast_fp16")]; tensor var_5218_split_sizes_0 = const()[name = string("op_5218_split_sizes_0"), val = tensor([640, 640])]; int32 var_5218_axis_0 = const()[name = string("op_5218_axis_0"), val = int32(-1)]; tensor var_5218_cast_fp16_0, tensor var_5218_cast_fp16_1 = split(axis = var_5218_axis_0, split_sizes = var_5218_split_sizes_0, x = normed_243_cast_fp16)[name = string("op_5218_cast_fp16")]; tensor var_5222_to_fp16 = const()[name = string("op_5222_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(532740544)))]; tensor out_105_cast_fp16 = mul(x = var_5218_cast_fp16_0, y = var_5222_to_fp16)[name = string("out_105_cast_fp16")]; tensor var_5236 = const()[name = string("op_5236"), val = tensor([0, 2, 1])]; tensor input_175_axes_0 = const()[name = string("input_175_axes_0"), val = tensor([2])]; tensor var_5237 = transpose(perm = var_5236, x = out_105_cast_fp16)[name = string("transpose_84")]; tensor input_175 = expand_dims(axes = input_175_axes_0, x = var_5237)[name = string("input_175")]; string gate_33_pad_type_0 = const()[name = string("gate_33_pad_type_0"), val = string("valid")]; tensor gate_33_strides_0 = const()[name = string("gate_33_strides_0"), val = tensor([1, 1])]; tensor gate_33_pad_0 = const()[name = string("gate_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_33_dilations_0 = const()[name = string("gate_33_dilations_0"), val = tensor([1, 1])]; int32 gate_33_groups_0 = const()[name = string("gate_33_groups_0"), val = int32(1)]; tensor gate_33 = conv(dilations = gate_33_dilations_0, groups = gate_33_groups_0, pad = gate_33_pad_0, pad_type = gate_33_pad_type_0, strides = gate_33_strides_0, weight = layers_8_mlp_gate_proj_weight, x = input_175)[name = string("gate_33")]; string up_17_pad_type_0 = const()[name = string("up_17_pad_type_0"), val = string("valid")]; tensor up_17_strides_0 = const()[name = string("up_17_strides_0"), val = tensor([1, 1])]; tensor up_17_pad_0 = const()[name = string("up_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_17_dilations_0 = const()[name = string("up_17_dilations_0"), val = tensor([1, 1])]; int32 up_17_groups_0 = const()[name = string("up_17_groups_0"), val = int32(1)]; tensor up_17 = conv(dilations = up_17_dilations_0, groups = up_17_groups_0, pad = up_17_pad_0, pad_type = up_17_pad_type_0, strides = up_17_strides_0, weight = layers_8_mlp_up_proj_weight, x = input_175)[name = string("up_17")]; string gate_35_mode_0 = const()[name = string("gate_35_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_35 = gelu(mode = gate_35_mode_0, x = gate_33)[name = string("gate_35")]; tensor input_177 = mul(x = gate_35, y = up_17)[name = string("input_177")]; string mlp_out_33_pad_type_0 = const()[name = string("mlp_out_33_pad_type_0"), val = string("valid")]; tensor mlp_out_33_strides_0 = const()[name = string("mlp_out_33_strides_0"), val = tensor([1, 1])]; tensor mlp_out_33_pad_0 = const()[name = string("mlp_out_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_33_dilations_0 = const()[name = string("mlp_out_33_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_33_groups_0 = const()[name = string("mlp_out_33_groups_0"), val = int32(1)]; tensor mlp_out_33 = conv(dilations = mlp_out_33_dilations_0, groups = mlp_out_33_groups_0, pad = mlp_out_33_pad_0, pad_type = mlp_out_33_pad_type_0, strides = mlp_out_33_strides_0, weight = layers_8_mlp_down_proj_weight, x = input_177)[name = string("mlp_out_33")]; tensor var_5277_axes_0 = const()[name = string("op_5277_axes_0"), val = tensor([2])]; tensor var_5277 = squeeze(axes = var_5277_axes_0, x = mlp_out_33)[name = string("op_5277")]; tensor var_5281 = const()[name = string("op_5281"), val = tensor([0, 2, 1])]; int32 var_5288 = const()[name = string("op_5288"), val = int32(-1)]; fp16 const_124_promoted_to_fp16 = const()[name = string("const_124_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_143 = transpose(perm = var_5281, x = var_5277)[name = string("transpose_83")]; tensor var_5294_cast_fp16 = mul(x = x_143, y = const_124_promoted_to_fp16)[name = string("op_5294_cast_fp16")]; bool input_179_interleave_0 = const()[name = string("input_179_interleave_0"), val = bool(false)]; tensor input_179_cast_fp16 = concat(axis = var_5288, interleave = input_179_interleave_0, values = (x_143, var_5294_cast_fp16))[name = string("input_179_cast_fp16")]; tensor normed_249_axes_0 = const()[name = string("normed_249_axes_0"), val = tensor([-1])]; fp16 var_5286_to_fp16 = const()[name = string("op_5286_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_249_cast_fp16 = layer_norm(axes = normed_249_axes_0, epsilon = var_5286_to_fp16, x = input_179_cast_fp16)[name = string("normed_249_cast_fp16")]; tensor var_5299_split_sizes_0 = const()[name = string("op_5299_split_sizes_0"), val = tensor([640, 640])]; int32 var_5299_axis_0 = const()[name = string("op_5299_axis_0"), val = int32(-1)]; tensor var_5299_cast_fp16_0, tensor var_5299_cast_fp16_1 = split(axis = var_5299_axis_0, split_sizes = var_5299_split_sizes_0, x = normed_249_cast_fp16)[name = string("op_5299_cast_fp16")]; tensor var_5303_to_fp16 = const()[name = string("op_5303_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(532741888)))]; tensor out_107_cast_fp16 = mul(x = var_5299_cast_fp16_0, y = var_5303_to_fp16)[name = string("out_107_cast_fp16")]; tensor x_145_cast_fp16 = add(x = x_139_cast_fp16, y = out_107_cast_fp16)[name = string("x_145_cast_fp16")]; int32 var_5317 = const()[name = string("op_5317"), val = int32(-1)]; fp16 const_126_promoted_to_fp16 = const()[name = string("const_126_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5323_cast_fp16 = mul(x = x_145_cast_fp16, y = const_126_promoted_to_fp16)[name = string("op_5323_cast_fp16")]; bool input_181_interleave_0 = const()[name = string("input_181_interleave_0"), val = bool(false)]; tensor input_181_cast_fp16 = concat(axis = var_5317, interleave = input_181_interleave_0, values = (x_145_cast_fp16, var_5323_cast_fp16))[name = string("input_181_cast_fp16")]; tensor normed_253_axes_0 = const()[name = string("normed_253_axes_0"), val = tensor([-1])]; fp16 var_5315_to_fp16 = const()[name = string("op_5315_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_253_cast_fp16 = layer_norm(axes = normed_253_axes_0, epsilon = var_5315_to_fp16, x = input_181_cast_fp16)[name = string("normed_253_cast_fp16")]; tensor var_5328_split_sizes_0 = const()[name = string("op_5328_split_sizes_0"), val = tensor([640, 640])]; int32 var_5328_axis_0 = const()[name = string("op_5328_axis_0"), val = int32(-1)]; tensor var_5328_cast_fp16_0, tensor var_5328_cast_fp16_1 = split(axis = var_5328_axis_0, split_sizes = var_5328_split_sizes_0, x = normed_253_cast_fp16)[name = string("op_5328_cast_fp16")]; tensor var_5332_to_fp16 = const()[name = string("op_5332_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(532743232)))]; tensor out_109_cast_fp16 = mul(x = var_5328_cast_fp16_0, y = var_5332_to_fp16)[name = string("out_109_cast_fp16")]; tensor var_5346 = const()[name = string("op_5346"), val = tensor([0, 2, 1])]; tensor input_183_axes_0 = const()[name = string("input_183_axes_0"), val = tensor([2])]; tensor var_5347 = transpose(perm = var_5346, x = out_109_cast_fp16)[name = string("transpose_82")]; tensor input_183 = expand_dims(axes = input_183_axes_0, x = var_5347)[name = string("input_183")]; string var_5360_pad_type_0 = const()[name = string("op_5360_pad_type_0"), val = string("valid")]; tensor var_5360_strides_0 = const()[name = string("op_5360_strides_0"), val = tensor([1, 1])]; tensor var_5360_pad_0 = const()[name = string("op_5360_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5360_dilations_0 = const()[name = string("op_5360_dilations_0"), val = tensor([1, 1])]; int32 var_5360_groups_0 = const()[name = string("op_5360_groups_0"), val = int32(1)]; tensor var_5360 = conv(dilations = var_5360_dilations_0, groups = var_5360_groups_0, pad = var_5360_pad_0, pad_type = var_5360_pad_type_0, strides = var_5360_strides_0, weight = layers_9_self_attn_q_proj_weight, x = input_183)[name = string("op_5360")]; tensor var_5365 = const()[name = string("op_5365"), val = tensor([1, 4, 256, 1])]; tensor var_5366 = reshape(shape = var_5365, x = var_5360)[name = string("op_5366")]; tensor var_5371 = const()[name = string("op_5371"), val = tensor([0, 1, 3, 2])]; tensor var_5376 = const()[name = string("op_5376"), val = tensor([1, 4, 256])]; tensor q_55 = transpose(perm = var_5371, x = var_5366)[name = string("transpose_81")]; tensor x_149 = reshape(shape = var_5376, x = q_55)[name = string("x_149")]; int32 var_5383 = const()[name = string("op_5383"), val = int32(-1)]; fp16 const_128_promoted_to_fp16 = const()[name = string("const_128_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5389_cast_fp16 = mul(x = x_149, y = const_128_promoted_to_fp16)[name = string("op_5389_cast_fp16")]; bool input_185_interleave_0 = const()[name = string("input_185_interleave_0"), val = bool(false)]; tensor input_185_cast_fp16 = concat(axis = var_5383, interleave = input_185_interleave_0, values = (x_149, var_5389_cast_fp16))[name = string("input_185_cast_fp16")]; tensor normed_259_axes_0 = const()[name = string("normed_259_axes_0"), val = tensor([-1])]; fp16 var_5381_to_fp16 = const()[name = string("op_5381_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_259_cast_fp16 = layer_norm(axes = normed_259_axes_0, epsilon = var_5381_to_fp16, x = input_185_cast_fp16)[name = string("normed_259_cast_fp16")]; tensor var_5394_split_sizes_0 = const()[name = string("op_5394_split_sizes_0"), val = tensor([256, 256])]; int32 var_5394_axis_0 = const()[name = string("op_5394_axis_0"), val = int32(-1)]; tensor var_5394_cast_fp16_0, tensor var_5394_cast_fp16_1 = split(axis = var_5394_axis_0, split_sizes = var_5394_split_sizes_0, x = normed_259_cast_fp16)[name = string("op_5394_cast_fp16")]; tensor var_5398_to_fp16 = const()[name = string("op_5398_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(532744576)))]; tensor out_111_cast_fp16 = mul(x = var_5394_cast_fp16_0, y = var_5398_to_fp16)[name = string("out_111_cast_fp16")]; tensor var_5405 = const()[name = string("op_5405"), val = tensor([1, 4, 1, 256])]; tensor q_57 = reshape(shape = var_5405, x = out_111_cast_fp16)[name = string("q_57")]; string var_5417_pad_type_0 = const()[name = string("op_5417_pad_type_0"), val = string("valid")]; tensor var_5417_strides_0 = const()[name = string("op_5417_strides_0"), val = tensor([1, 1])]; tensor var_5417_pad_0 = const()[name = string("op_5417_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5417_dilations_0 = const()[name = string("op_5417_dilations_0"), val = tensor([1, 1])]; int32 var_5417_groups_0 = const()[name = string("op_5417_groups_0"), val = int32(1)]; tensor var_5417 = conv(dilations = var_5417_dilations_0, groups = var_5417_groups_0, pad = var_5417_pad_0, pad_type = var_5417_pad_type_0, strides = var_5417_strides_0, weight = layers_9_self_attn_k_proj_weight, x = input_183)[name = string("op_5417")]; tensor var_5422 = const()[name = string("op_5422"), val = tensor([1, 1, 256, 1])]; tensor var_5423 = reshape(shape = var_5422, x = var_5417)[name = string("op_5423")]; tensor var_5428 = const()[name = string("op_5428"), val = tensor([0, 1, 3, 2])]; tensor var_5433 = const()[name = string("op_5433"), val = tensor([1, 1, 256])]; tensor k_55 = transpose(perm = var_5428, x = var_5423)[name = string("transpose_80")]; tensor x_151 = reshape(shape = var_5433, x = k_55)[name = string("x_151")]; int32 var_5440 = const()[name = string("op_5440"), val = int32(-1)]; fp16 const_130_promoted_to_fp16 = const()[name = string("const_130_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5446_cast_fp16 = mul(x = x_151, y = const_130_promoted_to_fp16)[name = string("op_5446_cast_fp16")]; bool input_187_interleave_0 = const()[name = string("input_187_interleave_0"), val = bool(false)]; tensor input_187_cast_fp16 = concat(axis = var_5440, interleave = input_187_interleave_0, values = (x_151, var_5446_cast_fp16))[name = string("input_187_cast_fp16")]; tensor normed_263_axes_0 = const()[name = string("normed_263_axes_0"), val = tensor([-1])]; fp16 var_5438_to_fp16 = const()[name = string("op_5438_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_263_cast_fp16 = layer_norm(axes = normed_263_axes_0, epsilon = var_5438_to_fp16, x = input_187_cast_fp16)[name = string("normed_263_cast_fp16")]; tensor var_5451_split_sizes_0 = const()[name = string("op_5451_split_sizes_0"), val = tensor([256, 256])]; int32 var_5451_axis_0 = const()[name = string("op_5451_axis_0"), val = int32(-1)]; tensor var_5451_cast_fp16_0, tensor var_5451_cast_fp16_1 = split(axis = var_5451_axis_0, split_sizes = var_5451_split_sizes_0, x = normed_263_cast_fp16)[name = string("op_5451_cast_fp16")]; tensor var_5455_to_fp16 = const()[name = string("op_5455_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(532745152)))]; tensor out_113_cast_fp16 = mul(x = var_5451_cast_fp16_0, y = var_5455_to_fp16)[name = string("out_113_cast_fp16")]; tensor var_5462 = const()[name = string("op_5462"), val = tensor([1, 1, 1, 256])]; tensor k_57 = reshape(shape = var_5462, x = out_113_cast_fp16)[name = string("k_57")]; string var_5474_pad_type_0 = const()[name = string("op_5474_pad_type_0"), val = string("valid")]; tensor var_5474_strides_0 = const()[name = string("op_5474_strides_0"), val = tensor([1, 1])]; tensor var_5474_pad_0 = const()[name = string("op_5474_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5474_dilations_0 = const()[name = string("op_5474_dilations_0"), val = tensor([1, 1])]; int32 var_5474_groups_0 = const()[name = string("op_5474_groups_0"), val = int32(1)]; tensor var_5474 = conv(dilations = var_5474_dilations_0, groups = var_5474_groups_0, pad = var_5474_pad_0, pad_type = var_5474_pad_type_0, strides = var_5474_strides_0, weight = layers_9_self_attn_v_proj_weight, x = input_183)[name = string("op_5474")]; tensor var_5479 = const()[name = string("op_5479"), val = tensor([1, 1, 256, 1])]; tensor var_5480 = reshape(shape = var_5479, x = var_5474)[name = string("op_5480")]; tensor var_5485 = const()[name = string("op_5485"), val = tensor([0, 1, 3, 2])]; tensor var_5487 = mul(x = q_57, y = cos_1)[name = string("op_5487")]; tensor var_5488_split_sizes_0 = const()[name = string("op_5488_split_sizes_0"), val = tensor([128, 128])]; int32 var_5488_axis_0 = const()[name = string("op_5488_axis_0"), val = int32(-1)]; tensor var_5488_0, tensor var_5488_1 = split(axis = var_5488_axis_0, split_sizes = var_5488_split_sizes_0, x = q_57)[name = string("op_5488")]; fp16 const_132_promoted = const()[name = string("const_132_promoted"), val = fp16(-0x1p+0)]; tensor var_5490 = mul(x = var_5488_1, y = const_132_promoted)[name = string("op_5490")]; int32 var_5492 = const()[name = string("op_5492"), val = int32(-1)]; bool var_5493_interleave_0 = const()[name = string("op_5493_interleave_0"), val = bool(false)]; tensor var_5493 = concat(axis = var_5492, interleave = var_5493_interleave_0, values = (var_5490, var_5488_0))[name = string("op_5493")]; tensor var_5494 = mul(x = var_5493, y = sin_1)[name = string("op_5494")]; tensor q_59 = add(x = var_5487, y = var_5494)[name = string("q_59")]; tensor var_5497 = mul(x = k_57, y = cos_1)[name = string("op_5497")]; tensor var_5498_split_sizes_0 = const()[name = string("op_5498_split_sizes_0"), val = tensor([128, 128])]; int32 var_5498_axis_0 = const()[name = string("op_5498_axis_0"), val = int32(-1)]; tensor var_5498_0, tensor var_5498_1 = split(axis = var_5498_axis_0, split_sizes = var_5498_split_sizes_0, x = k_57)[name = string("op_5498")]; fp16 const_133_promoted = const()[name = string("const_133_promoted"), val = fp16(-0x1p+0)]; tensor var_5500 = mul(x = var_5498_1, y = const_133_promoted)[name = string("op_5500")]; int32 var_5502 = const()[name = string("op_5502"), val = int32(-1)]; bool var_5503_interleave_0 = const()[name = string("op_5503_interleave_0"), val = bool(false)]; tensor var_5503 = concat(axis = var_5502, interleave = var_5503_interleave_0, values = (var_5500, var_5498_0))[name = string("op_5503")]; tensor var_5504 = mul(x = var_5503, y = sin_1)[name = string("op_5504")]; tensor k_59 = add(x = var_5497, y = var_5504)[name = string("k_59")]; tensor var_5509_begin_0 = const()[name = string("op_5509_begin_0"), val = tensor([9, 0, 0, 0])]; tensor var_5509_end_0 = const()[name = string("op_5509_end_0"), val = tensor([10, 1, 2048, 256])]; tensor var_5509_end_mask_0 = const()[name = string("op_5509_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5509_squeeze_mask_0 = const()[name = string("op_5509_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_5509_cast_fp16 = slice_by_index(begin = var_5509_begin_0, end = var_5509_end_0, end_mask = var_5509_end_mask_0, squeeze_mask = var_5509_squeeze_mask_0, x = coreml_update_state_53)[name = string("op_5509_cast_fp16")]; tensor K_cache_19_axes_0 = const()[name = string("K_cache_19_axes_0"), val = tensor([0])]; tensor K_cache_19_cast_fp16 = expand_dims(axes = K_cache_19_axes_0, x = var_5509_cast_fp16)[name = string("K_cache_19_cast_fp16")]; tensor var_5514_begin_0 = const()[name = string("op_5514_begin_0"), val = tensor([27, 0, 0, 0])]; tensor var_5514_end_0 = const()[name = string("op_5514_end_0"), val = tensor([28, 1, 2048, 256])]; tensor var_5514_end_mask_0 = const()[name = string("op_5514_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5514_squeeze_mask_0 = const()[name = string("op_5514_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_5514_cast_fp16 = slice_by_index(begin = var_5514_begin_0, end = var_5514_end_0, end_mask = var_5514_end_mask_0, squeeze_mask = var_5514_squeeze_mask_0, x = coreml_update_state_53)[name = string("op_5514_cast_fp16")]; tensor V_cache_19_axes_0 = const()[name = string("V_cache_19_axes_0"), val = tensor([0])]; tensor V_cache_19_cast_fp16 = expand_dims(axes = V_cache_19_axes_0, x = var_5514_cast_fp16)[name = string("V_cache_19_cast_fp16")]; tensor k_broadcast_19_reps_0 = const()[name = string("k_broadcast_19_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor k_broadcast_19 = tile(reps = k_broadcast_19_reps_0, x = k_59)[name = string("k_broadcast_19")]; tensor v_broadcast_19_reps_0 = const()[name = string("v_broadcast_19_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor v_19 = transpose(perm = var_5485, x = var_5480)[name = string("transpose_79")]; tensor v_broadcast_19 = tile(reps = v_broadcast_19_reps_0, x = v_19)[name = string("v_broadcast_19")]; tensor var_5522_cast_fp16 = mul(x = K_cache_19_cast_fp16, y = var_1120_cast_fp16)[name = string("op_5522_cast_fp16")]; tensor var_5523_cast_fp16 = mul(x = k_broadcast_19, y = update_mask)[name = string("op_5523_cast_fp16")]; tensor K_new_19_cast_fp16 = add(x = var_5522_cast_fp16, y = var_5523_cast_fp16)[name = string("K_new_19_cast_fp16")]; tensor var_5529_cast_fp16 = mul(x = V_cache_19_cast_fp16, y = var_1120_cast_fp16)[name = string("op_5529_cast_fp16")]; tensor var_5530_cast_fp16 = mul(x = v_broadcast_19, y = update_mask)[name = string("op_5530_cast_fp16")]; tensor V_new_19_cast_fp16 = add(x = var_5529_cast_fp16, y = var_5530_cast_fp16)[name = string("V_new_19_cast_fp16")]; tensor var_5534_axes_0 = const()[name = string("op_5534_axes_0"), val = tensor([0])]; tensor var_5534_cast_fp16 = squeeze(axes = var_5534_axes_0, x = K_new_19_cast_fp16)[name = string("op_5534_cast_fp16")]; tensor concat_36 = const()[name = string("concat_36"), val = tensor([9, 0, 0, 0])]; tensor concat_37 = const()[name = string("concat_37"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_19_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_19_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_19_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_36, begin_mask = kv_cache_0_internal_tensor_assign_19_begin_mask_0, end = concat_37, end_mask = kv_cache_0_internal_tensor_assign_19_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_19_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_19_stride_0, update = var_5534_cast_fp16, x = coreml_update_state_53)[name = string("kv_cache_0_internal_tensor_assign_19_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_19_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_54_write_state")]; tensor coreml_update_state_54 = read_state(input = kv_cache_0)[name = string("coreml_update_state_54")]; tensor var_5541_axes_0 = const()[name = string("op_5541_axes_0"), val = tensor([0])]; tensor var_5541_cast_fp16 = squeeze(axes = var_5541_axes_0, x = V_new_19_cast_fp16)[name = string("op_5541_cast_fp16")]; tensor concat_38 = const()[name = string("concat_38"), val = tensor([27, 0, 0, 0])]; tensor concat_39 = const()[name = string("concat_39"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_20_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_20_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_20_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_38, begin_mask = kv_cache_0_internal_tensor_assign_20_begin_mask_0, end = concat_39, end_mask = kv_cache_0_internal_tensor_assign_20_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_20_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_20_stride_0, update = var_5541_cast_fp16, x = coreml_update_state_54)[name = string("kv_cache_0_internal_tensor_assign_20_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_20_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_55_write_state")]; tensor coreml_update_state_55 = read_state(input = kv_cache_0)[name = string("coreml_update_state_55")]; tensor hidden_states_75_axes_0 = const()[name = string("hidden_states_75_axes_0"), val = tensor([2])]; tensor hidden_states_75_cast_fp16 = expand_dims(axes = hidden_states_75_axes_0, x = K_new_19_cast_fp16)[name = string("hidden_states_75_cast_fp16")]; tensor var_5554 = const()[name = string("op_5554"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_77_cast_fp16 = tile(reps = var_5554, x = hidden_states_75_cast_fp16)[name = string("hidden_states_77_cast_fp16")]; tensor var_5560 = const()[name = string("op_5560"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_19_cast_fp16 = reshape(shape = var_5560, x = hidden_states_77_cast_fp16)[name = string("K_expanded_19_cast_fp16")]; tensor hidden_states_79_axes_0 = const()[name = string("hidden_states_79_axes_0"), val = tensor([2])]; tensor hidden_states_79_cast_fp16 = expand_dims(axes = hidden_states_79_axes_0, x = V_new_19_cast_fp16)[name = string("hidden_states_79_cast_fp16")]; tensor var_5569 = const()[name = string("op_5569"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_81_cast_fp16 = tile(reps = var_5569, x = hidden_states_79_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; tensor var_5575 = const()[name = string("op_5575"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_19_cast_fp16 = reshape(shape = var_5575, x = hidden_states_81_cast_fp16)[name = string("V_expanded_19_cast_fp16")]; bool var_5590_transpose_x_1 = const()[name = string("op_5590_transpose_x_1"), val = bool(false)]; bool var_5590_transpose_y_1 = const()[name = string("op_5590_transpose_y_1"), val = bool(true)]; tensor var_5590_cast_fp16 = matmul(transpose_x = var_5590_transpose_x_1, transpose_y = var_5590_transpose_y_1, x = q_59, y = K_expanded_19_cast_fp16)[name = string("op_5590_cast_fp16")]; fp16 var_5591_to_fp16 = const()[name = string("op_5591_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_55_cast_fp16 = mul(x = var_5590_cast_fp16, y = var_5591_to_fp16)[name = string("attn_weights_55_cast_fp16")]; tensor attn_weights_57_cast_fp16 = add(x = attn_weights_55_cast_fp16, y = causal_mask)[name = string("attn_weights_57_cast_fp16")]; int32 var_5600 = const()[name = string("op_5600"), val = int32(-1)]; tensor var_5602_cast_fp16 = softmax(axis = var_5600, x = attn_weights_57_cast_fp16)[name = string("op_5602_cast_fp16")]; bool var_5618_transpose_x_0 = const()[name = string("op_5618_transpose_x_0"), val = bool(false)]; bool var_5618_transpose_y_0 = const()[name = string("op_5618_transpose_y_0"), val = bool(false)]; tensor var_5618_cast_fp16 = matmul(transpose_x = var_5618_transpose_x_0, transpose_y = var_5618_transpose_y_0, x = var_5602_cast_fp16, y = V_expanded_19_cast_fp16)[name = string("op_5618_cast_fp16")]; tensor var_5628 = const()[name = string("op_5628"), val = tensor([0, 2, 1, 3])]; tensor var_5635 = const()[name = string("op_5635"), val = tensor([1, 1, -1])]; tensor var_5629 = transpose(perm = var_5628, x = var_5618_cast_fp16)[name = string("transpose_78")]; tensor attn_output_57 = reshape(shape = var_5635, x = var_5629)[name = string("attn_output_57")]; tensor var_5640 = const()[name = string("op_5640"), val = tensor([0, 2, 1])]; tensor squeeze_9 = const()[name = string("squeeze_9"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(532745728)))]; string var_5656_pad_type_0 = const()[name = string("op_5656_pad_type_0"), val = string("valid")]; int32 var_5656_groups_0 = const()[name = string("op_5656_groups_0"), val = int32(1)]; tensor var_5656_strides_0 = const()[name = string("op_5656_strides_0"), val = tensor([1])]; tensor var_5656_pad_0 = const()[name = string("op_5656_pad_0"), val = tensor([0, 0])]; tensor var_5656_dilations_0 = const()[name = string("op_5656_dilations_0"), val = tensor([1])]; tensor var_5641 = transpose(perm = var_5640, x = attn_output_57)[name = string("transpose_77")]; tensor var_5656 = conv(dilations = var_5656_dilations_0, groups = var_5656_groups_0, pad = var_5656_pad_0, pad_type = var_5656_pad_type_0, strides = var_5656_strides_0, weight = squeeze_9, x = var_5641)[name = string("op_5656")]; tensor var_5660 = const()[name = string("op_5660"), val = tensor([0, 2, 1])]; int32 var_5667 = const()[name = string("op_5667"), val = int32(-1)]; fp16 const_134_promoted_to_fp16 = const()[name = string("const_134_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_153 = transpose(perm = var_5660, x = var_5656)[name = string("transpose_76")]; tensor var_5673_cast_fp16 = mul(x = x_153, y = const_134_promoted_to_fp16)[name = string("op_5673_cast_fp16")]; bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)]; tensor input_191_cast_fp16 = concat(axis = var_5667, interleave = input_191_interleave_0, values = (x_153, var_5673_cast_fp16))[name = string("input_191_cast_fp16")]; tensor normed_267_axes_0 = const()[name = string("normed_267_axes_0"), val = tensor([-1])]; fp16 var_5665_to_fp16 = const()[name = string("op_5665_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_267_cast_fp16 = layer_norm(axes = normed_267_axes_0, epsilon = var_5665_to_fp16, x = input_191_cast_fp16)[name = string("normed_267_cast_fp16")]; tensor var_5678_split_sizes_0 = const()[name = string("op_5678_split_sizes_0"), val = tensor([640, 640])]; int32 var_5678_axis_0 = const()[name = string("op_5678_axis_0"), val = int32(-1)]; tensor var_5678_cast_fp16_0, tensor var_5678_cast_fp16_1 = split(axis = var_5678_axis_0, split_sizes = var_5678_split_sizes_0, x = normed_267_cast_fp16)[name = string("op_5678_cast_fp16")]; tensor var_5682_to_fp16 = const()[name = string("op_5682_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534056512)))]; tensor out_115_cast_fp16 = mul(x = var_5678_cast_fp16_0, y = var_5682_to_fp16)[name = string("out_115_cast_fp16")]; tensor x_155_cast_fp16 = add(x = x_145_cast_fp16, y = out_115_cast_fp16)[name = string("x_155_cast_fp16")]; int32 var_5696 = const()[name = string("op_5696"), val = int32(-1)]; fp16 const_136_promoted_to_fp16 = const()[name = string("const_136_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5702_cast_fp16 = mul(x = x_155_cast_fp16, y = const_136_promoted_to_fp16)[name = string("op_5702_cast_fp16")]; bool input_193_interleave_0 = const()[name = string("input_193_interleave_0"), val = bool(false)]; tensor input_193_cast_fp16 = concat(axis = var_5696, interleave = input_193_interleave_0, values = (x_155_cast_fp16, var_5702_cast_fp16))[name = string("input_193_cast_fp16")]; tensor normed_271_axes_0 = const()[name = string("normed_271_axes_0"), val = tensor([-1])]; fp16 var_5694_to_fp16 = const()[name = string("op_5694_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_271_cast_fp16 = layer_norm(axes = normed_271_axes_0, epsilon = var_5694_to_fp16, x = input_193_cast_fp16)[name = string("normed_271_cast_fp16")]; tensor var_5707_split_sizes_0 = const()[name = string("op_5707_split_sizes_0"), val = tensor([640, 640])]; int32 var_5707_axis_0 = const()[name = string("op_5707_axis_0"), val = int32(-1)]; tensor var_5707_cast_fp16_0, tensor var_5707_cast_fp16_1 = split(axis = var_5707_axis_0, split_sizes = var_5707_split_sizes_0, x = normed_271_cast_fp16)[name = string("op_5707_cast_fp16")]; tensor var_5711_to_fp16 = const()[name = string("op_5711_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534057856)))]; tensor out_117_cast_fp16 = mul(x = var_5707_cast_fp16_0, y = var_5711_to_fp16)[name = string("out_117_cast_fp16")]; tensor var_5725 = const()[name = string("op_5725"), val = tensor([0, 2, 1])]; tensor input_195_axes_0 = const()[name = string("input_195_axes_0"), val = tensor([2])]; tensor var_5726 = transpose(perm = var_5725, x = out_117_cast_fp16)[name = string("transpose_75")]; tensor input_195 = expand_dims(axes = input_195_axes_0, x = var_5726)[name = string("input_195")]; string gate_37_pad_type_0 = const()[name = string("gate_37_pad_type_0"), val = string("valid")]; tensor gate_37_strides_0 = const()[name = string("gate_37_strides_0"), val = tensor([1, 1])]; tensor gate_37_pad_0 = const()[name = string("gate_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_37_dilations_0 = const()[name = string("gate_37_dilations_0"), val = tensor([1, 1])]; int32 gate_37_groups_0 = const()[name = string("gate_37_groups_0"), val = int32(1)]; tensor gate_37 = conv(dilations = gate_37_dilations_0, groups = gate_37_groups_0, pad = gate_37_pad_0, pad_type = gate_37_pad_type_0, strides = gate_37_strides_0, weight = layers_9_mlp_gate_proj_weight, x = input_195)[name = string("gate_37")]; string up_19_pad_type_0 = const()[name = string("up_19_pad_type_0"), val = string("valid")]; tensor up_19_strides_0 = const()[name = string("up_19_strides_0"), val = tensor([1, 1])]; tensor up_19_pad_0 = const()[name = string("up_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_19_dilations_0 = const()[name = string("up_19_dilations_0"), val = tensor([1, 1])]; int32 up_19_groups_0 = const()[name = string("up_19_groups_0"), val = int32(1)]; tensor up_19 = conv(dilations = up_19_dilations_0, groups = up_19_groups_0, pad = up_19_pad_0, pad_type = up_19_pad_type_0, strides = up_19_strides_0, weight = layers_9_mlp_up_proj_weight, x = input_195)[name = string("up_19")]; string gate_39_mode_0 = const()[name = string("gate_39_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_39 = gelu(mode = gate_39_mode_0, x = gate_37)[name = string("gate_39")]; tensor input_197 = mul(x = gate_39, y = up_19)[name = string("input_197")]; string mlp_out_37_pad_type_0 = const()[name = string("mlp_out_37_pad_type_0"), val = string("valid")]; tensor mlp_out_37_strides_0 = const()[name = string("mlp_out_37_strides_0"), val = tensor([1, 1])]; tensor mlp_out_37_pad_0 = const()[name = string("mlp_out_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_37_dilations_0 = const()[name = string("mlp_out_37_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_37_groups_0 = const()[name = string("mlp_out_37_groups_0"), val = int32(1)]; tensor mlp_out_37 = conv(dilations = mlp_out_37_dilations_0, groups = mlp_out_37_groups_0, pad = mlp_out_37_pad_0, pad_type = mlp_out_37_pad_type_0, strides = mlp_out_37_strides_0, weight = layers_9_mlp_down_proj_weight, x = input_197)[name = string("mlp_out_37")]; tensor var_5766_axes_0 = const()[name = string("op_5766_axes_0"), val = tensor([2])]; tensor var_5766 = squeeze(axes = var_5766_axes_0, x = mlp_out_37)[name = string("op_5766")]; tensor var_5770 = const()[name = string("op_5770"), val = tensor([0, 2, 1])]; int32 var_5777 = const()[name = string("op_5777"), val = int32(-1)]; fp16 const_138_promoted_to_fp16 = const()[name = string("const_138_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_159 = transpose(perm = var_5770, x = var_5766)[name = string("transpose_74")]; tensor var_5783_cast_fp16 = mul(x = x_159, y = const_138_promoted_to_fp16)[name = string("op_5783_cast_fp16")]; bool input_199_interleave_0 = const()[name = string("input_199_interleave_0"), val = bool(false)]; tensor input_199_cast_fp16 = concat(axis = var_5777, interleave = input_199_interleave_0, values = (x_159, var_5783_cast_fp16))[name = string("input_199_cast_fp16")]; tensor normed_277_axes_0 = const()[name = string("normed_277_axes_0"), val = tensor([-1])]; fp16 var_5775_to_fp16 = const()[name = string("op_5775_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_277_cast_fp16 = layer_norm(axes = normed_277_axes_0, epsilon = var_5775_to_fp16, x = input_199_cast_fp16)[name = string("normed_277_cast_fp16")]; tensor var_5788_split_sizes_0 = const()[name = string("op_5788_split_sizes_0"), val = tensor([640, 640])]; int32 var_5788_axis_0 = const()[name = string("op_5788_axis_0"), val = int32(-1)]; tensor var_5788_cast_fp16_0, tensor var_5788_cast_fp16_1 = split(axis = var_5788_axis_0, split_sizes = var_5788_split_sizes_0, x = normed_277_cast_fp16)[name = string("op_5788_cast_fp16")]; tensor var_5792_to_fp16 = const()[name = string("op_5792_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534059200)))]; tensor out_119_cast_fp16 = mul(x = var_5788_cast_fp16_0, y = var_5792_to_fp16)[name = string("out_119_cast_fp16")]; tensor x_161_cast_fp16 = add(x = x_155_cast_fp16, y = out_119_cast_fp16)[name = string("x_161_cast_fp16")]; int32 var_5806 = const()[name = string("op_5806"), val = int32(-1)]; fp16 const_140_promoted_to_fp16 = const()[name = string("const_140_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5812_cast_fp16 = mul(x = x_161_cast_fp16, y = const_140_promoted_to_fp16)[name = string("op_5812_cast_fp16")]; bool input_201_interleave_0 = const()[name = string("input_201_interleave_0"), val = bool(false)]; tensor input_201_cast_fp16 = concat(axis = var_5806, interleave = input_201_interleave_0, values = (x_161_cast_fp16, var_5812_cast_fp16))[name = string("input_201_cast_fp16")]; tensor normed_281_axes_0 = const()[name = string("normed_281_axes_0"), val = tensor([-1])]; fp16 var_5804_to_fp16 = const()[name = string("op_5804_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_281_cast_fp16 = layer_norm(axes = normed_281_axes_0, epsilon = var_5804_to_fp16, x = input_201_cast_fp16)[name = string("normed_281_cast_fp16")]; tensor var_5817_split_sizes_0 = const()[name = string("op_5817_split_sizes_0"), val = tensor([640, 640])]; int32 var_5817_axis_0 = const()[name = string("op_5817_axis_0"), val = int32(-1)]; tensor var_5817_cast_fp16_0, tensor var_5817_cast_fp16_1 = split(axis = var_5817_axis_0, split_sizes = var_5817_split_sizes_0, x = normed_281_cast_fp16)[name = string("op_5817_cast_fp16")]; tensor var_5821_to_fp16 = const()[name = string("op_5821_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534060544)))]; tensor out_121_cast_fp16 = mul(x = var_5817_cast_fp16_0, y = var_5821_to_fp16)[name = string("out_121_cast_fp16")]; tensor var_5835 = const()[name = string("op_5835"), val = tensor([0, 2, 1])]; tensor input_203_axes_0 = const()[name = string("input_203_axes_0"), val = tensor([2])]; tensor var_5836 = transpose(perm = var_5835, x = out_121_cast_fp16)[name = string("transpose_73")]; tensor input_203 = expand_dims(axes = input_203_axes_0, x = var_5836)[name = string("input_203")]; string var_5849_pad_type_0 = const()[name = string("op_5849_pad_type_0"), val = string("valid")]; tensor var_5849_strides_0 = const()[name = string("op_5849_strides_0"), val = tensor([1, 1])]; tensor var_5849_pad_0 = const()[name = string("op_5849_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5849_dilations_0 = const()[name = string("op_5849_dilations_0"), val = tensor([1, 1])]; int32 var_5849_groups_0 = const()[name = string("op_5849_groups_0"), val = int32(1)]; tensor var_5849 = conv(dilations = var_5849_dilations_0, groups = var_5849_groups_0, pad = var_5849_pad_0, pad_type = var_5849_pad_type_0, strides = var_5849_strides_0, weight = layers_10_self_attn_q_proj_weight, x = input_203)[name = string("op_5849")]; tensor var_5854 = const()[name = string("op_5854"), val = tensor([1, 4, 256, 1])]; tensor var_5855 = reshape(shape = var_5854, x = var_5849)[name = string("op_5855")]; tensor var_5860 = const()[name = string("op_5860"), val = tensor([0, 1, 3, 2])]; tensor var_5865 = const()[name = string("op_5865"), val = tensor([1, 4, 256])]; tensor q_61 = transpose(perm = var_5860, x = var_5855)[name = string("transpose_72")]; tensor x_165 = reshape(shape = var_5865, x = q_61)[name = string("x_165")]; int32 var_5872 = const()[name = string("op_5872"), val = int32(-1)]; fp16 const_142_promoted_to_fp16 = const()[name = string("const_142_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5878_cast_fp16 = mul(x = x_165, y = const_142_promoted_to_fp16)[name = string("op_5878_cast_fp16")]; bool input_205_interleave_0 = const()[name = string("input_205_interleave_0"), val = bool(false)]; tensor input_205_cast_fp16 = concat(axis = var_5872, interleave = input_205_interleave_0, values = (x_165, var_5878_cast_fp16))[name = string("input_205_cast_fp16")]; tensor normed_287_axes_0 = const()[name = string("normed_287_axes_0"), val = tensor([-1])]; fp16 var_5870_to_fp16 = const()[name = string("op_5870_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_287_cast_fp16 = layer_norm(axes = normed_287_axes_0, epsilon = var_5870_to_fp16, x = input_205_cast_fp16)[name = string("normed_287_cast_fp16")]; tensor var_5883_split_sizes_0 = const()[name = string("op_5883_split_sizes_0"), val = tensor([256, 256])]; int32 var_5883_axis_0 = const()[name = string("op_5883_axis_0"), val = int32(-1)]; tensor var_5883_cast_fp16_0, tensor var_5883_cast_fp16_1 = split(axis = var_5883_axis_0, split_sizes = var_5883_split_sizes_0, x = normed_287_cast_fp16)[name = string("op_5883_cast_fp16")]; tensor var_5887_to_fp16 = const()[name = string("op_5887_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534061888)))]; tensor out_123_cast_fp16 = mul(x = var_5883_cast_fp16_0, y = var_5887_to_fp16)[name = string("out_123_cast_fp16")]; tensor var_5894 = const()[name = string("op_5894"), val = tensor([1, 4, 1, 256])]; tensor q_63 = reshape(shape = var_5894, x = out_123_cast_fp16)[name = string("q_63")]; string var_5906_pad_type_0 = const()[name = string("op_5906_pad_type_0"), val = string("valid")]; tensor var_5906_strides_0 = const()[name = string("op_5906_strides_0"), val = tensor([1, 1])]; tensor var_5906_pad_0 = const()[name = string("op_5906_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5906_dilations_0 = const()[name = string("op_5906_dilations_0"), val = tensor([1, 1])]; int32 var_5906_groups_0 = const()[name = string("op_5906_groups_0"), val = int32(1)]; tensor var_5906 = conv(dilations = var_5906_dilations_0, groups = var_5906_groups_0, pad = var_5906_pad_0, pad_type = var_5906_pad_type_0, strides = var_5906_strides_0, weight = layers_10_self_attn_k_proj_weight, x = input_203)[name = string("op_5906")]; tensor var_5911 = const()[name = string("op_5911"), val = tensor([1, 1, 256, 1])]; tensor var_5912 = reshape(shape = var_5911, x = var_5906)[name = string("op_5912")]; tensor var_5917 = const()[name = string("op_5917"), val = tensor([0, 1, 3, 2])]; tensor var_5922 = const()[name = string("op_5922"), val = tensor([1, 1, 256])]; tensor k_61 = transpose(perm = var_5917, x = var_5912)[name = string("transpose_71")]; tensor x_167 = reshape(shape = var_5922, x = k_61)[name = string("x_167")]; int32 var_5929 = const()[name = string("op_5929"), val = int32(-1)]; fp16 const_144_promoted_to_fp16 = const()[name = string("const_144_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5935_cast_fp16 = mul(x = x_167, y = const_144_promoted_to_fp16)[name = string("op_5935_cast_fp16")]; bool input_207_interleave_0 = const()[name = string("input_207_interleave_0"), val = bool(false)]; tensor input_207_cast_fp16 = concat(axis = var_5929, interleave = input_207_interleave_0, values = (x_167, var_5935_cast_fp16))[name = string("input_207_cast_fp16")]; tensor normed_291_axes_0 = const()[name = string("normed_291_axes_0"), val = tensor([-1])]; fp16 var_5927_to_fp16 = const()[name = string("op_5927_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_291_cast_fp16 = layer_norm(axes = normed_291_axes_0, epsilon = var_5927_to_fp16, x = input_207_cast_fp16)[name = string("normed_291_cast_fp16")]; tensor var_5940_split_sizes_0 = const()[name = string("op_5940_split_sizes_0"), val = tensor([256, 256])]; int32 var_5940_axis_0 = const()[name = string("op_5940_axis_0"), val = int32(-1)]; tensor var_5940_cast_fp16_0, tensor var_5940_cast_fp16_1 = split(axis = var_5940_axis_0, split_sizes = var_5940_split_sizes_0, x = normed_291_cast_fp16)[name = string("op_5940_cast_fp16")]; tensor var_5944_to_fp16 = const()[name = string("op_5944_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534062464)))]; tensor out_125_cast_fp16 = mul(x = var_5940_cast_fp16_0, y = var_5944_to_fp16)[name = string("out_125_cast_fp16")]; tensor var_5951 = const()[name = string("op_5951"), val = tensor([1, 1, 1, 256])]; tensor k_63 = reshape(shape = var_5951, x = out_125_cast_fp16)[name = string("k_63")]; string var_5963_pad_type_0 = const()[name = string("op_5963_pad_type_0"), val = string("valid")]; tensor var_5963_strides_0 = const()[name = string("op_5963_strides_0"), val = tensor([1, 1])]; tensor var_5963_pad_0 = const()[name = string("op_5963_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5963_dilations_0 = const()[name = string("op_5963_dilations_0"), val = tensor([1, 1])]; int32 var_5963_groups_0 = const()[name = string("op_5963_groups_0"), val = int32(1)]; tensor var_5963 = conv(dilations = var_5963_dilations_0, groups = var_5963_groups_0, pad = var_5963_pad_0, pad_type = var_5963_pad_type_0, strides = var_5963_strides_0, weight = layers_10_self_attn_v_proj_weight, x = input_203)[name = string("op_5963")]; tensor var_5968 = const()[name = string("op_5968"), val = tensor([1, 1, 256, 1])]; tensor var_5969 = reshape(shape = var_5968, x = var_5963)[name = string("op_5969")]; tensor var_5974 = const()[name = string("op_5974"), val = tensor([0, 1, 3, 2])]; tensor var_5976 = mul(x = q_63, y = cos_1)[name = string("op_5976")]; tensor var_5977_split_sizes_0 = const()[name = string("op_5977_split_sizes_0"), val = tensor([128, 128])]; int32 var_5977_axis_0 = const()[name = string("op_5977_axis_0"), val = int32(-1)]; tensor var_5977_0, tensor var_5977_1 = split(axis = var_5977_axis_0, split_sizes = var_5977_split_sizes_0, x = q_63)[name = string("op_5977")]; fp16 const_146_promoted = const()[name = string("const_146_promoted"), val = fp16(-0x1p+0)]; tensor var_5979 = mul(x = var_5977_1, y = const_146_promoted)[name = string("op_5979")]; int32 var_5981 = const()[name = string("op_5981"), val = int32(-1)]; bool var_5982_interleave_0 = const()[name = string("op_5982_interleave_0"), val = bool(false)]; tensor var_5982 = concat(axis = var_5981, interleave = var_5982_interleave_0, values = (var_5979, var_5977_0))[name = string("op_5982")]; tensor var_5983 = mul(x = var_5982, y = sin_1)[name = string("op_5983")]; tensor q_65 = add(x = var_5976, y = var_5983)[name = string("q_65")]; tensor var_5986 = mul(x = k_63, y = cos_1)[name = string("op_5986")]; tensor var_5987_split_sizes_0 = const()[name = string("op_5987_split_sizes_0"), val = tensor([128, 128])]; int32 var_5987_axis_0 = const()[name = string("op_5987_axis_0"), val = int32(-1)]; tensor var_5987_0, tensor var_5987_1 = split(axis = var_5987_axis_0, split_sizes = var_5987_split_sizes_0, x = k_63)[name = string("op_5987")]; fp16 const_147_promoted = const()[name = string("const_147_promoted"), val = fp16(-0x1p+0)]; tensor var_5989 = mul(x = var_5987_1, y = const_147_promoted)[name = string("op_5989")]; int32 var_5991 = const()[name = string("op_5991"), val = int32(-1)]; bool var_5992_interleave_0 = const()[name = string("op_5992_interleave_0"), val = bool(false)]; tensor var_5992 = concat(axis = var_5991, interleave = var_5992_interleave_0, values = (var_5989, var_5987_0))[name = string("op_5992")]; tensor var_5993 = mul(x = var_5992, y = sin_1)[name = string("op_5993")]; tensor k_65 = add(x = var_5986, y = var_5993)[name = string("k_65")]; tensor var_5998_begin_0 = const()[name = string("op_5998_begin_0"), val = tensor([10, 0, 0, 0])]; tensor var_5998_end_0 = const()[name = string("op_5998_end_0"), val = tensor([11, 1, 2048, 256])]; tensor var_5998_end_mask_0 = const()[name = string("op_5998_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5998_squeeze_mask_0 = const()[name = string("op_5998_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_5998_cast_fp16 = slice_by_index(begin = var_5998_begin_0, end = var_5998_end_0, end_mask = var_5998_end_mask_0, squeeze_mask = var_5998_squeeze_mask_0, x = coreml_update_state_55)[name = string("op_5998_cast_fp16")]; tensor K_cache_21_axes_0 = const()[name = string("K_cache_21_axes_0"), val = tensor([0])]; tensor K_cache_21_cast_fp16 = expand_dims(axes = K_cache_21_axes_0, x = var_5998_cast_fp16)[name = string("K_cache_21_cast_fp16")]; tensor var_6003_begin_0 = const()[name = string("op_6003_begin_0"), val = tensor([28, 0, 0, 0])]; tensor var_6003_end_0 = const()[name = string("op_6003_end_0"), val = tensor([29, 1, 2048, 256])]; tensor var_6003_end_mask_0 = const()[name = string("op_6003_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6003_squeeze_mask_0 = const()[name = string("op_6003_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_6003_cast_fp16 = slice_by_index(begin = var_6003_begin_0, end = var_6003_end_0, end_mask = var_6003_end_mask_0, squeeze_mask = var_6003_squeeze_mask_0, x = coreml_update_state_55)[name = string("op_6003_cast_fp16")]; tensor V_cache_21_axes_0 = const()[name = string("V_cache_21_axes_0"), val = tensor([0])]; tensor V_cache_21_cast_fp16 = expand_dims(axes = V_cache_21_axes_0, x = var_6003_cast_fp16)[name = string("V_cache_21_cast_fp16")]; tensor k_broadcast_21_reps_0 = const()[name = string("k_broadcast_21_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor k_broadcast_21 = tile(reps = k_broadcast_21_reps_0, x = k_65)[name = string("k_broadcast_21")]; tensor v_broadcast_21_reps_0 = const()[name = string("v_broadcast_21_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor v_21 = transpose(perm = var_5974, x = var_5969)[name = string("transpose_70")]; tensor v_broadcast_21 = tile(reps = v_broadcast_21_reps_0, x = v_21)[name = string("v_broadcast_21")]; tensor var_6011_cast_fp16 = mul(x = K_cache_21_cast_fp16, y = var_1120_cast_fp16)[name = string("op_6011_cast_fp16")]; tensor var_6012_cast_fp16 = mul(x = k_broadcast_21, y = update_mask)[name = string("op_6012_cast_fp16")]; tensor K_new_21_cast_fp16 = add(x = var_6011_cast_fp16, y = var_6012_cast_fp16)[name = string("K_new_21_cast_fp16")]; tensor var_6018_cast_fp16 = mul(x = V_cache_21_cast_fp16, y = var_1120_cast_fp16)[name = string("op_6018_cast_fp16")]; tensor var_6019_cast_fp16 = mul(x = v_broadcast_21, y = update_mask)[name = string("op_6019_cast_fp16")]; tensor V_new_21_cast_fp16 = add(x = var_6018_cast_fp16, y = var_6019_cast_fp16)[name = string("V_new_21_cast_fp16")]; tensor var_6023_axes_0 = const()[name = string("op_6023_axes_0"), val = tensor([0])]; tensor var_6023_cast_fp16 = squeeze(axes = var_6023_axes_0, x = K_new_21_cast_fp16)[name = string("op_6023_cast_fp16")]; tensor concat_40 = const()[name = string("concat_40"), val = tensor([10, 0, 0, 0])]; tensor concat_41 = const()[name = string("concat_41"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_21_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_21_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_21_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_40, begin_mask = kv_cache_0_internal_tensor_assign_21_begin_mask_0, end = concat_41, end_mask = kv_cache_0_internal_tensor_assign_21_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_21_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_21_stride_0, update = var_6023_cast_fp16, x = coreml_update_state_55)[name = string("kv_cache_0_internal_tensor_assign_21_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_21_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_56_write_state")]; tensor coreml_update_state_56 = read_state(input = kv_cache_0)[name = string("coreml_update_state_56")]; tensor var_6030_axes_0 = const()[name = string("op_6030_axes_0"), val = tensor([0])]; tensor var_6030_cast_fp16 = squeeze(axes = var_6030_axes_0, x = V_new_21_cast_fp16)[name = string("op_6030_cast_fp16")]; tensor concat_42 = const()[name = string("concat_42"), val = tensor([28, 0, 0, 0])]; tensor concat_43 = const()[name = string("concat_43"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_22_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_22_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_22_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_42, begin_mask = kv_cache_0_internal_tensor_assign_22_begin_mask_0, end = concat_43, end_mask = kv_cache_0_internal_tensor_assign_22_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_22_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_22_stride_0, update = var_6030_cast_fp16, x = coreml_update_state_56)[name = string("kv_cache_0_internal_tensor_assign_22_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_22_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_57_write_state")]; tensor coreml_update_state_57 = read_state(input = kv_cache_0)[name = string("coreml_update_state_57")]; tensor hidden_states_83_axes_0 = const()[name = string("hidden_states_83_axes_0"), val = tensor([2])]; tensor hidden_states_83_cast_fp16 = expand_dims(axes = hidden_states_83_axes_0, x = K_new_21_cast_fp16)[name = string("hidden_states_83_cast_fp16")]; tensor var_6043 = const()[name = string("op_6043"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_85_cast_fp16 = tile(reps = var_6043, x = hidden_states_83_cast_fp16)[name = string("hidden_states_85_cast_fp16")]; tensor var_6049 = const()[name = string("op_6049"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_21_cast_fp16 = reshape(shape = var_6049, x = hidden_states_85_cast_fp16)[name = string("K_expanded_21_cast_fp16")]; tensor hidden_states_87_axes_0 = const()[name = string("hidden_states_87_axes_0"), val = tensor([2])]; tensor hidden_states_87_cast_fp16 = expand_dims(axes = hidden_states_87_axes_0, x = V_new_21_cast_fp16)[name = string("hidden_states_87_cast_fp16")]; tensor var_6058 = const()[name = string("op_6058"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_89_cast_fp16 = tile(reps = var_6058, x = hidden_states_87_cast_fp16)[name = string("hidden_states_89_cast_fp16")]; tensor var_6064 = const()[name = string("op_6064"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_21_cast_fp16 = reshape(shape = var_6064, x = hidden_states_89_cast_fp16)[name = string("V_expanded_21_cast_fp16")]; bool var_6079_transpose_x_1 = const()[name = string("op_6079_transpose_x_1"), val = bool(false)]; bool var_6079_transpose_y_1 = const()[name = string("op_6079_transpose_y_1"), val = bool(true)]; tensor var_6079_cast_fp16 = matmul(transpose_x = var_6079_transpose_x_1, transpose_y = var_6079_transpose_y_1, x = q_65, y = K_expanded_21_cast_fp16)[name = string("op_6079_cast_fp16")]; fp16 var_6080_to_fp16 = const()[name = string("op_6080_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_61_cast_fp16 = mul(x = var_6079_cast_fp16, y = var_6080_to_fp16)[name = string("attn_weights_61_cast_fp16")]; tensor attn_weights_63_cast_fp16 = add(x = attn_weights_61_cast_fp16, y = causal_mask)[name = string("attn_weights_63_cast_fp16")]; int32 var_6089 = const()[name = string("op_6089"), val = int32(-1)]; tensor var_6091_cast_fp16 = softmax(axis = var_6089, x = attn_weights_63_cast_fp16)[name = string("op_6091_cast_fp16")]; bool var_6107_transpose_x_0 = const()[name = string("op_6107_transpose_x_0"), val = bool(false)]; bool var_6107_transpose_y_0 = const()[name = string("op_6107_transpose_y_0"), val = bool(false)]; tensor var_6107_cast_fp16 = matmul(transpose_x = var_6107_transpose_x_0, transpose_y = var_6107_transpose_y_0, x = var_6091_cast_fp16, y = V_expanded_21_cast_fp16)[name = string("op_6107_cast_fp16")]; tensor var_6117 = const()[name = string("op_6117"), val = tensor([0, 2, 1, 3])]; tensor var_6124 = const()[name = string("op_6124"), val = tensor([1, 1, -1])]; tensor var_6118 = transpose(perm = var_6117, x = var_6107_cast_fp16)[name = string("transpose_69")]; tensor attn_output_63 = reshape(shape = var_6124, x = var_6118)[name = string("attn_output_63")]; tensor var_6129 = const()[name = string("op_6129"), val = tensor([0, 2, 1])]; tensor squeeze_10 = const()[name = string("squeeze_10"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534063040)))]; string var_6145_pad_type_0 = const()[name = string("op_6145_pad_type_0"), val = string("valid")]; int32 var_6145_groups_0 = const()[name = string("op_6145_groups_0"), val = int32(1)]; tensor var_6145_strides_0 = const()[name = string("op_6145_strides_0"), val = tensor([1])]; tensor var_6145_pad_0 = const()[name = string("op_6145_pad_0"), val = tensor([0, 0])]; tensor var_6145_dilations_0 = const()[name = string("op_6145_dilations_0"), val = tensor([1])]; tensor var_6130 = transpose(perm = var_6129, x = attn_output_63)[name = string("transpose_68")]; tensor var_6145 = conv(dilations = var_6145_dilations_0, groups = var_6145_groups_0, pad = var_6145_pad_0, pad_type = var_6145_pad_type_0, strides = var_6145_strides_0, weight = squeeze_10, x = var_6130)[name = string("op_6145")]; tensor var_6149 = const()[name = string("op_6149"), val = tensor([0, 2, 1])]; int32 var_6156 = const()[name = string("op_6156"), val = int32(-1)]; fp16 const_148_promoted_to_fp16 = const()[name = string("const_148_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_169 = transpose(perm = var_6149, x = var_6145)[name = string("transpose_67")]; tensor var_6162_cast_fp16 = mul(x = x_169, y = const_148_promoted_to_fp16)[name = string("op_6162_cast_fp16")]; bool input_211_interleave_0 = const()[name = string("input_211_interleave_0"), val = bool(false)]; tensor input_211_cast_fp16 = concat(axis = var_6156, interleave = input_211_interleave_0, values = (x_169, var_6162_cast_fp16))[name = string("input_211_cast_fp16")]; tensor normed_295_axes_0 = const()[name = string("normed_295_axes_0"), val = tensor([-1])]; fp16 var_6154_to_fp16 = const()[name = string("op_6154_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_295_cast_fp16 = layer_norm(axes = normed_295_axes_0, epsilon = var_6154_to_fp16, x = input_211_cast_fp16)[name = string("normed_295_cast_fp16")]; tensor var_6167_split_sizes_0 = const()[name = string("op_6167_split_sizes_0"), val = tensor([640, 640])]; int32 var_6167_axis_0 = const()[name = string("op_6167_axis_0"), val = int32(-1)]; tensor var_6167_cast_fp16_0, tensor var_6167_cast_fp16_1 = split(axis = var_6167_axis_0, split_sizes = var_6167_split_sizes_0, x = normed_295_cast_fp16)[name = string("op_6167_cast_fp16")]; tensor var_6171_to_fp16 = const()[name = string("op_6171_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535373824)))]; tensor out_127_cast_fp16 = mul(x = var_6167_cast_fp16_0, y = var_6171_to_fp16)[name = string("out_127_cast_fp16")]; tensor x_171_cast_fp16 = add(x = x_161_cast_fp16, y = out_127_cast_fp16)[name = string("x_171_cast_fp16")]; int32 var_6185 = const()[name = string("op_6185"), val = int32(-1)]; fp16 const_150_promoted_to_fp16 = const()[name = string("const_150_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6191_cast_fp16 = mul(x = x_171_cast_fp16, y = const_150_promoted_to_fp16)[name = string("op_6191_cast_fp16")]; bool input_213_interleave_0 = const()[name = string("input_213_interleave_0"), val = bool(false)]; tensor input_213_cast_fp16 = concat(axis = var_6185, interleave = input_213_interleave_0, values = (x_171_cast_fp16, var_6191_cast_fp16))[name = string("input_213_cast_fp16")]; tensor normed_299_axes_0 = const()[name = string("normed_299_axes_0"), val = tensor([-1])]; fp16 var_6183_to_fp16 = const()[name = string("op_6183_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_299_cast_fp16 = layer_norm(axes = normed_299_axes_0, epsilon = var_6183_to_fp16, x = input_213_cast_fp16)[name = string("normed_299_cast_fp16")]; tensor var_6196_split_sizes_0 = const()[name = string("op_6196_split_sizes_0"), val = tensor([640, 640])]; int32 var_6196_axis_0 = const()[name = string("op_6196_axis_0"), val = int32(-1)]; tensor var_6196_cast_fp16_0, tensor var_6196_cast_fp16_1 = split(axis = var_6196_axis_0, split_sizes = var_6196_split_sizes_0, x = normed_299_cast_fp16)[name = string("op_6196_cast_fp16")]; tensor var_6200_to_fp16 = const()[name = string("op_6200_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535375168)))]; tensor out_129_cast_fp16 = mul(x = var_6196_cast_fp16_0, y = var_6200_to_fp16)[name = string("out_129_cast_fp16")]; tensor var_6214 = const()[name = string("op_6214"), val = tensor([0, 2, 1])]; tensor input_215_axes_0 = const()[name = string("input_215_axes_0"), val = tensor([2])]; tensor var_6215 = transpose(perm = var_6214, x = out_129_cast_fp16)[name = string("transpose_66")]; tensor input_215 = expand_dims(axes = input_215_axes_0, x = var_6215)[name = string("input_215")]; string gate_41_pad_type_0 = const()[name = string("gate_41_pad_type_0"), val = string("valid")]; tensor gate_41_strides_0 = const()[name = string("gate_41_strides_0"), val = tensor([1, 1])]; tensor gate_41_pad_0 = const()[name = string("gate_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_41_dilations_0 = const()[name = string("gate_41_dilations_0"), val = tensor([1, 1])]; int32 gate_41_groups_0 = const()[name = string("gate_41_groups_0"), val = int32(1)]; tensor gate_41 = conv(dilations = gate_41_dilations_0, groups = gate_41_groups_0, pad = gate_41_pad_0, pad_type = gate_41_pad_type_0, strides = gate_41_strides_0, weight = layers_10_mlp_gate_proj_weight, x = input_215)[name = string("gate_41")]; string up_21_pad_type_0 = const()[name = string("up_21_pad_type_0"), val = string("valid")]; tensor up_21_strides_0 = const()[name = string("up_21_strides_0"), val = tensor([1, 1])]; tensor up_21_pad_0 = const()[name = string("up_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_21_dilations_0 = const()[name = string("up_21_dilations_0"), val = tensor([1, 1])]; int32 up_21_groups_0 = const()[name = string("up_21_groups_0"), val = int32(1)]; tensor up_21 = conv(dilations = up_21_dilations_0, groups = up_21_groups_0, pad = up_21_pad_0, pad_type = up_21_pad_type_0, strides = up_21_strides_0, weight = layers_10_mlp_up_proj_weight, x = input_215)[name = string("up_21")]; string gate_43_mode_0 = const()[name = string("gate_43_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_43 = gelu(mode = gate_43_mode_0, x = gate_41)[name = string("gate_43")]; tensor input_217 = mul(x = gate_43, y = up_21)[name = string("input_217")]; string mlp_out_41_pad_type_0 = const()[name = string("mlp_out_41_pad_type_0"), val = string("valid")]; tensor mlp_out_41_strides_0 = const()[name = string("mlp_out_41_strides_0"), val = tensor([1, 1])]; tensor mlp_out_41_pad_0 = const()[name = string("mlp_out_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_41_dilations_0 = const()[name = string("mlp_out_41_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_41_groups_0 = const()[name = string("mlp_out_41_groups_0"), val = int32(1)]; tensor mlp_out_41 = conv(dilations = mlp_out_41_dilations_0, groups = mlp_out_41_groups_0, pad = mlp_out_41_pad_0, pad_type = mlp_out_41_pad_type_0, strides = mlp_out_41_strides_0, weight = layers_10_mlp_down_proj_weight, x = input_217)[name = string("mlp_out_41")]; tensor var_6255_axes_0 = const()[name = string("op_6255_axes_0"), val = tensor([2])]; tensor var_6255 = squeeze(axes = var_6255_axes_0, x = mlp_out_41)[name = string("op_6255")]; tensor var_6259 = const()[name = string("op_6259"), val = tensor([0, 2, 1])]; int32 var_6266 = const()[name = string("op_6266"), val = int32(-1)]; fp16 const_152_promoted_to_fp16 = const()[name = string("const_152_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_175 = transpose(perm = var_6259, x = var_6255)[name = string("transpose_65")]; tensor var_6272_cast_fp16 = mul(x = x_175, y = const_152_promoted_to_fp16)[name = string("op_6272_cast_fp16")]; bool input_219_interleave_0 = const()[name = string("input_219_interleave_0"), val = bool(false)]; tensor input_219_cast_fp16 = concat(axis = var_6266, interleave = input_219_interleave_0, values = (x_175, var_6272_cast_fp16))[name = string("input_219_cast_fp16")]; tensor normed_305_axes_0 = const()[name = string("normed_305_axes_0"), val = tensor([-1])]; fp16 var_6264_to_fp16 = const()[name = string("op_6264_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_305_cast_fp16 = layer_norm(axes = normed_305_axes_0, epsilon = var_6264_to_fp16, x = input_219_cast_fp16)[name = string("normed_305_cast_fp16")]; tensor var_6277_split_sizes_0 = const()[name = string("op_6277_split_sizes_0"), val = tensor([640, 640])]; int32 var_6277_axis_0 = const()[name = string("op_6277_axis_0"), val = int32(-1)]; tensor var_6277_cast_fp16_0, tensor var_6277_cast_fp16_1 = split(axis = var_6277_axis_0, split_sizes = var_6277_split_sizes_0, x = normed_305_cast_fp16)[name = string("op_6277_cast_fp16")]; tensor var_6281_to_fp16 = const()[name = string("op_6281_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535376512)))]; tensor out_131_cast_fp16 = mul(x = var_6277_cast_fp16_0, y = var_6281_to_fp16)[name = string("out_131_cast_fp16")]; tensor x_177_cast_fp16 = add(x = x_171_cast_fp16, y = out_131_cast_fp16)[name = string("x_177_cast_fp16")]; int32 var_6295 = const()[name = string("op_6295"), val = int32(-1)]; fp16 const_154_promoted_to_fp16 = const()[name = string("const_154_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6301_cast_fp16 = mul(x = x_177_cast_fp16, y = const_154_promoted_to_fp16)[name = string("op_6301_cast_fp16")]; bool input_221_interleave_0 = const()[name = string("input_221_interleave_0"), val = bool(false)]; tensor input_221_cast_fp16 = concat(axis = var_6295, interleave = input_221_interleave_0, values = (x_177_cast_fp16, var_6301_cast_fp16))[name = string("input_221_cast_fp16")]; tensor normed_309_axes_0 = const()[name = string("normed_309_axes_0"), val = tensor([-1])]; fp16 var_6293_to_fp16 = const()[name = string("op_6293_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_309_cast_fp16 = layer_norm(axes = normed_309_axes_0, epsilon = var_6293_to_fp16, x = input_221_cast_fp16)[name = string("normed_309_cast_fp16")]; tensor var_6306_split_sizes_0 = const()[name = string("op_6306_split_sizes_0"), val = tensor([640, 640])]; int32 var_6306_axis_0 = const()[name = string("op_6306_axis_0"), val = int32(-1)]; tensor var_6306_cast_fp16_0, tensor var_6306_cast_fp16_1 = split(axis = var_6306_axis_0, split_sizes = var_6306_split_sizes_0, x = normed_309_cast_fp16)[name = string("op_6306_cast_fp16")]; tensor var_6310_to_fp16 = const()[name = string("op_6310_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535377856)))]; tensor out_133_cast_fp16 = mul(x = var_6306_cast_fp16_0, y = var_6310_to_fp16)[name = string("out_133_cast_fp16")]; tensor var_6324 = const()[name = string("op_6324"), val = tensor([0, 2, 1])]; tensor input_223_axes_0 = const()[name = string("input_223_axes_0"), val = tensor([2])]; tensor var_6325 = transpose(perm = var_6324, x = out_133_cast_fp16)[name = string("transpose_64")]; tensor input_223 = expand_dims(axes = input_223_axes_0, x = var_6325)[name = string("input_223")]; string var_6338_pad_type_0 = const()[name = string("op_6338_pad_type_0"), val = string("valid")]; tensor var_6338_strides_0 = const()[name = string("op_6338_strides_0"), val = tensor([1, 1])]; tensor var_6338_pad_0 = const()[name = string("op_6338_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6338_dilations_0 = const()[name = string("op_6338_dilations_0"), val = tensor([1, 1])]; int32 var_6338_groups_0 = const()[name = string("op_6338_groups_0"), val = int32(1)]; tensor var_6338 = conv(dilations = var_6338_dilations_0, groups = var_6338_groups_0, pad = var_6338_pad_0, pad_type = var_6338_pad_type_0, strides = var_6338_strides_0, weight = layers_11_self_attn_q_proj_weight, x = input_223)[name = string("op_6338")]; tensor var_6343 = const()[name = string("op_6343"), val = tensor([1, 4, 256, 1])]; tensor var_6344 = reshape(shape = var_6343, x = var_6338)[name = string("op_6344")]; tensor var_6349 = const()[name = string("op_6349"), val = tensor([0, 1, 3, 2])]; tensor var_6354 = const()[name = string("op_6354"), val = tensor([1, 4, 256])]; tensor q_67 = transpose(perm = var_6349, x = var_6344)[name = string("transpose_63")]; tensor x_181 = reshape(shape = var_6354, x = q_67)[name = string("x_181")]; int32 var_6361 = const()[name = string("op_6361"), val = int32(-1)]; fp16 const_156_promoted_to_fp16 = const()[name = string("const_156_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6367_cast_fp16 = mul(x = x_181, y = const_156_promoted_to_fp16)[name = string("op_6367_cast_fp16")]; bool input_225_interleave_0 = const()[name = string("input_225_interleave_0"), val = bool(false)]; tensor input_225_cast_fp16 = concat(axis = var_6361, interleave = input_225_interleave_0, values = (x_181, var_6367_cast_fp16))[name = string("input_225_cast_fp16")]; tensor normed_315_axes_0 = const()[name = string("normed_315_axes_0"), val = tensor([-1])]; fp16 var_6359_to_fp16 = const()[name = string("op_6359_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_315_cast_fp16 = layer_norm(axes = normed_315_axes_0, epsilon = var_6359_to_fp16, x = input_225_cast_fp16)[name = string("normed_315_cast_fp16")]; tensor var_6372_split_sizes_0 = const()[name = string("op_6372_split_sizes_0"), val = tensor([256, 256])]; int32 var_6372_axis_0 = const()[name = string("op_6372_axis_0"), val = int32(-1)]; tensor var_6372_cast_fp16_0, tensor var_6372_cast_fp16_1 = split(axis = var_6372_axis_0, split_sizes = var_6372_split_sizes_0, x = normed_315_cast_fp16)[name = string("op_6372_cast_fp16")]; tensor var_6376_to_fp16 = const()[name = string("op_6376_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535379200)))]; tensor out_135_cast_fp16 = mul(x = var_6372_cast_fp16_0, y = var_6376_to_fp16)[name = string("out_135_cast_fp16")]; tensor var_6383 = const()[name = string("op_6383"), val = tensor([1, 4, 1, 256])]; tensor q_69 = reshape(shape = var_6383, x = out_135_cast_fp16)[name = string("q_69")]; string var_6395_pad_type_0 = const()[name = string("op_6395_pad_type_0"), val = string("valid")]; tensor var_6395_strides_0 = const()[name = string("op_6395_strides_0"), val = tensor([1, 1])]; tensor var_6395_pad_0 = const()[name = string("op_6395_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6395_dilations_0 = const()[name = string("op_6395_dilations_0"), val = tensor([1, 1])]; int32 var_6395_groups_0 = const()[name = string("op_6395_groups_0"), val = int32(1)]; tensor var_6395 = conv(dilations = var_6395_dilations_0, groups = var_6395_groups_0, pad = var_6395_pad_0, pad_type = var_6395_pad_type_0, strides = var_6395_strides_0, weight = layers_11_self_attn_k_proj_weight, x = input_223)[name = string("op_6395")]; tensor var_6400 = const()[name = string("op_6400"), val = tensor([1, 1, 256, 1])]; tensor var_6401 = reshape(shape = var_6400, x = var_6395)[name = string("op_6401")]; tensor var_6406 = const()[name = string("op_6406"), val = tensor([0, 1, 3, 2])]; tensor var_6411 = const()[name = string("op_6411"), val = tensor([1, 1, 256])]; tensor k_67 = transpose(perm = var_6406, x = var_6401)[name = string("transpose_62")]; tensor x_183 = reshape(shape = var_6411, x = k_67)[name = string("x_183")]; int32 var_6418 = const()[name = string("op_6418"), val = int32(-1)]; fp16 const_158_promoted_to_fp16 = const()[name = string("const_158_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6424_cast_fp16 = mul(x = x_183, y = const_158_promoted_to_fp16)[name = string("op_6424_cast_fp16")]; bool input_227_interleave_0 = const()[name = string("input_227_interleave_0"), val = bool(false)]; tensor input_227_cast_fp16 = concat(axis = var_6418, interleave = input_227_interleave_0, values = (x_183, var_6424_cast_fp16))[name = string("input_227_cast_fp16")]; tensor normed_319_axes_0 = const()[name = string("normed_319_axes_0"), val = tensor([-1])]; fp16 var_6416_to_fp16 = const()[name = string("op_6416_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_319_cast_fp16 = layer_norm(axes = normed_319_axes_0, epsilon = var_6416_to_fp16, x = input_227_cast_fp16)[name = string("normed_319_cast_fp16")]; tensor var_6429_split_sizes_0 = const()[name = string("op_6429_split_sizes_0"), val = tensor([256, 256])]; int32 var_6429_axis_0 = const()[name = string("op_6429_axis_0"), val = int32(-1)]; tensor var_6429_cast_fp16_0, tensor var_6429_cast_fp16_1 = split(axis = var_6429_axis_0, split_sizes = var_6429_split_sizes_0, x = normed_319_cast_fp16)[name = string("op_6429_cast_fp16")]; tensor var_6433_to_fp16 = const()[name = string("op_6433_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535379776)))]; tensor out_137_cast_fp16 = mul(x = var_6429_cast_fp16_0, y = var_6433_to_fp16)[name = string("out_137_cast_fp16")]; tensor var_6440 = const()[name = string("op_6440"), val = tensor([1, 1, 1, 256])]; tensor k_69 = reshape(shape = var_6440, x = out_137_cast_fp16)[name = string("k_69")]; string var_6452_pad_type_0 = const()[name = string("op_6452_pad_type_0"), val = string("valid")]; tensor var_6452_strides_0 = const()[name = string("op_6452_strides_0"), val = tensor([1, 1])]; tensor var_6452_pad_0 = const()[name = string("op_6452_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6452_dilations_0 = const()[name = string("op_6452_dilations_0"), val = tensor([1, 1])]; int32 var_6452_groups_0 = const()[name = string("op_6452_groups_0"), val = int32(1)]; tensor var_6452 = conv(dilations = var_6452_dilations_0, groups = var_6452_groups_0, pad = var_6452_pad_0, pad_type = var_6452_pad_type_0, strides = var_6452_strides_0, weight = layers_11_self_attn_v_proj_weight, x = input_223)[name = string("op_6452")]; tensor var_6457 = const()[name = string("op_6457"), val = tensor([1, 1, 256, 1])]; tensor var_6458 = reshape(shape = var_6457, x = var_6452)[name = string("op_6458")]; tensor var_6463 = const()[name = string("op_6463"), val = tensor([0, 1, 3, 2])]; tensor var_6465 = mul(x = q_69, y = cos)[name = string("op_6465")]; tensor var_6466_split_sizes_0 = const()[name = string("op_6466_split_sizes_0"), val = tensor([128, 128])]; int32 var_6466_axis_0 = const()[name = string("op_6466_axis_0"), val = int32(-1)]; tensor var_6466_0, tensor var_6466_1 = split(axis = var_6466_axis_0, split_sizes = var_6466_split_sizes_0, x = q_69)[name = string("op_6466")]; fp16 const_160_promoted = const()[name = string("const_160_promoted"), val = fp16(-0x1p+0)]; tensor var_6468 = mul(x = var_6466_1, y = const_160_promoted)[name = string("op_6468")]; int32 var_6470 = const()[name = string("op_6470"), val = int32(-1)]; bool var_6471_interleave_0 = const()[name = string("op_6471_interleave_0"), val = bool(false)]; tensor var_6471 = concat(axis = var_6470, interleave = var_6471_interleave_0, values = (var_6468, var_6466_0))[name = string("op_6471")]; tensor var_6472 = mul(x = var_6471, y = sin)[name = string("op_6472")]; tensor q_71 = add(x = var_6465, y = var_6472)[name = string("q_71")]; tensor var_6475 = mul(x = k_69, y = cos)[name = string("op_6475")]; tensor var_6476_split_sizes_0 = const()[name = string("op_6476_split_sizes_0"), val = tensor([128, 128])]; int32 var_6476_axis_0 = const()[name = string("op_6476_axis_0"), val = int32(-1)]; tensor var_6476_0, tensor var_6476_1 = split(axis = var_6476_axis_0, split_sizes = var_6476_split_sizes_0, x = k_69)[name = string("op_6476")]; fp16 const_161_promoted = const()[name = string("const_161_promoted"), val = fp16(-0x1p+0)]; tensor var_6478 = mul(x = var_6476_1, y = const_161_promoted)[name = string("op_6478")]; int32 var_6480 = const()[name = string("op_6480"), val = int32(-1)]; bool var_6481_interleave_0 = const()[name = string("op_6481_interleave_0"), val = bool(false)]; tensor var_6481 = concat(axis = var_6480, interleave = var_6481_interleave_0, values = (var_6478, var_6476_0))[name = string("op_6481")]; tensor var_6482 = mul(x = var_6481, y = sin)[name = string("op_6482")]; tensor k_71 = add(x = var_6475, y = var_6482)[name = string("k_71")]; tensor var_6487_begin_0 = const()[name = string("op_6487_begin_0"), val = tensor([11, 0, 0, 0])]; tensor var_6487_end_0 = const()[name = string("op_6487_end_0"), val = tensor([12, 1, 2048, 256])]; tensor var_6487_end_mask_0 = const()[name = string("op_6487_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6487_squeeze_mask_0 = const()[name = string("op_6487_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_6487_cast_fp16 = slice_by_index(begin = var_6487_begin_0, end = var_6487_end_0, end_mask = var_6487_end_mask_0, squeeze_mask = var_6487_squeeze_mask_0, x = coreml_update_state_57)[name = string("op_6487_cast_fp16")]; tensor K_cache_23_axes_0 = const()[name = string("K_cache_23_axes_0"), val = tensor([0])]; tensor K_cache_23_cast_fp16 = expand_dims(axes = K_cache_23_axes_0, x = var_6487_cast_fp16)[name = string("K_cache_23_cast_fp16")]; tensor var_6492_begin_0 = const()[name = string("op_6492_begin_0"), val = tensor([29, 0, 0, 0])]; tensor var_6492_end_0 = const()[name = string("op_6492_end_0"), val = tensor([30, 1, 2048, 256])]; tensor var_6492_end_mask_0 = const()[name = string("op_6492_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6492_squeeze_mask_0 = const()[name = string("op_6492_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_6492_cast_fp16 = slice_by_index(begin = var_6492_begin_0, end = var_6492_end_0, end_mask = var_6492_end_mask_0, squeeze_mask = var_6492_squeeze_mask_0, x = coreml_update_state_57)[name = string("op_6492_cast_fp16")]; tensor V_cache_23_axes_0 = const()[name = string("V_cache_23_axes_0"), val = tensor([0])]; tensor V_cache_23_cast_fp16 = expand_dims(axes = V_cache_23_axes_0, x = var_6492_cast_fp16)[name = string("V_cache_23_cast_fp16")]; tensor k_broadcast_23_reps_0 = const()[name = string("k_broadcast_23_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor k_broadcast_23 = tile(reps = k_broadcast_23_reps_0, x = k_71)[name = string("k_broadcast_23")]; tensor v_broadcast_23_reps_0 = const()[name = string("v_broadcast_23_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor v_23 = transpose(perm = var_6463, x = var_6458)[name = string("transpose_61")]; tensor v_broadcast_23 = tile(reps = v_broadcast_23_reps_0, x = v_23)[name = string("v_broadcast_23")]; tensor var_6500_cast_fp16 = mul(x = K_cache_23_cast_fp16, y = var_1120_cast_fp16)[name = string("op_6500_cast_fp16")]; tensor var_6501_cast_fp16 = mul(x = k_broadcast_23, y = update_mask)[name = string("op_6501_cast_fp16")]; tensor K_new_23_cast_fp16 = add(x = var_6500_cast_fp16, y = var_6501_cast_fp16)[name = string("K_new_23_cast_fp16")]; tensor var_6507_cast_fp16 = mul(x = V_cache_23_cast_fp16, y = var_1120_cast_fp16)[name = string("op_6507_cast_fp16")]; tensor var_6508_cast_fp16 = mul(x = v_broadcast_23, y = update_mask)[name = string("op_6508_cast_fp16")]; tensor V_new_23_cast_fp16 = add(x = var_6507_cast_fp16, y = var_6508_cast_fp16)[name = string("V_new_23_cast_fp16")]; tensor var_6512_axes_0 = const()[name = string("op_6512_axes_0"), val = tensor([0])]; tensor var_6512_cast_fp16 = squeeze(axes = var_6512_axes_0, x = K_new_23_cast_fp16)[name = string("op_6512_cast_fp16")]; tensor concat_44 = const()[name = string("concat_44"), val = tensor([11, 0, 0, 0])]; tensor concat_45 = const()[name = string("concat_45"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_23_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_23_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_23_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_44, begin_mask = kv_cache_0_internal_tensor_assign_23_begin_mask_0, end = concat_45, end_mask = kv_cache_0_internal_tensor_assign_23_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_23_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_23_stride_0, update = var_6512_cast_fp16, x = coreml_update_state_57)[name = string("kv_cache_0_internal_tensor_assign_23_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_23_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_58_write_state")]; tensor coreml_update_state_58 = read_state(input = kv_cache_0)[name = string("coreml_update_state_58")]; tensor var_6519_axes_0 = const()[name = string("op_6519_axes_0"), val = tensor([0])]; tensor var_6519_cast_fp16 = squeeze(axes = var_6519_axes_0, x = V_new_23_cast_fp16)[name = string("op_6519_cast_fp16")]; tensor concat_46 = const()[name = string("concat_46"), val = tensor([29, 0, 0, 0])]; tensor concat_47 = const()[name = string("concat_47"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_24_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_24_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_24_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_46, begin_mask = kv_cache_0_internal_tensor_assign_24_begin_mask_0, end = concat_47, end_mask = kv_cache_0_internal_tensor_assign_24_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_24_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_24_stride_0, update = var_6519_cast_fp16, x = coreml_update_state_58)[name = string("kv_cache_0_internal_tensor_assign_24_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_24_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_59_write_state")]; tensor coreml_update_state_59 = read_state(input = kv_cache_0)[name = string("coreml_update_state_59")]; tensor hidden_states_91_axes_0 = const()[name = string("hidden_states_91_axes_0"), val = tensor([2])]; tensor hidden_states_91_cast_fp16 = expand_dims(axes = hidden_states_91_axes_0, x = K_new_23_cast_fp16)[name = string("hidden_states_91_cast_fp16")]; tensor var_6532 = const()[name = string("op_6532"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_93_cast_fp16 = tile(reps = var_6532, x = hidden_states_91_cast_fp16)[name = string("hidden_states_93_cast_fp16")]; tensor var_6538 = const()[name = string("op_6538"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_23_cast_fp16 = reshape(shape = var_6538, x = hidden_states_93_cast_fp16)[name = string("K_expanded_23_cast_fp16")]; tensor hidden_states_95_axes_0 = const()[name = string("hidden_states_95_axes_0"), val = tensor([2])]; tensor hidden_states_95_cast_fp16 = expand_dims(axes = hidden_states_95_axes_0, x = V_new_23_cast_fp16)[name = string("hidden_states_95_cast_fp16")]; tensor var_6547 = const()[name = string("op_6547"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_97_cast_fp16 = tile(reps = var_6547, x = hidden_states_95_cast_fp16)[name = string("hidden_states_97_cast_fp16")]; tensor var_6553 = const()[name = string("op_6553"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_23_cast_fp16 = reshape(shape = var_6553, x = hidden_states_97_cast_fp16)[name = string("V_expanded_23_cast_fp16")]; bool var_6568_transpose_x_1 = const()[name = string("op_6568_transpose_x_1"), val = bool(false)]; bool var_6568_transpose_y_1 = const()[name = string("op_6568_transpose_y_1"), val = bool(true)]; tensor var_6568_cast_fp16 = matmul(transpose_x = var_6568_transpose_x_1, transpose_y = var_6568_transpose_y_1, x = q_71, y = K_expanded_23_cast_fp16)[name = string("op_6568_cast_fp16")]; fp16 var_6569_to_fp16 = const()[name = string("op_6569_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_67_cast_fp16 = mul(x = var_6568_cast_fp16, y = var_6569_to_fp16)[name = string("attn_weights_67_cast_fp16")]; tensor attn_weights_69_cast_fp16 = add(x = attn_weights_67_cast_fp16, y = causal_mask)[name = string("attn_weights_69_cast_fp16")]; int32 var_6578 = const()[name = string("op_6578"), val = int32(-1)]; tensor var_6580_cast_fp16 = softmax(axis = var_6578, x = attn_weights_69_cast_fp16)[name = string("op_6580_cast_fp16")]; bool var_6596_transpose_x_0 = const()[name = string("op_6596_transpose_x_0"), val = bool(false)]; bool var_6596_transpose_y_0 = const()[name = string("op_6596_transpose_y_0"), val = bool(false)]; tensor var_6596_cast_fp16 = matmul(transpose_x = var_6596_transpose_x_0, transpose_y = var_6596_transpose_y_0, x = var_6580_cast_fp16, y = V_expanded_23_cast_fp16)[name = string("op_6596_cast_fp16")]; tensor var_6606 = const()[name = string("op_6606"), val = tensor([0, 2, 1, 3])]; tensor var_6613 = const()[name = string("op_6613"), val = tensor([1, 1, -1])]; tensor var_6607 = transpose(perm = var_6606, x = var_6596_cast_fp16)[name = string("transpose_60")]; tensor attn_output_69 = reshape(shape = var_6613, x = var_6607)[name = string("attn_output_69")]; tensor var_6618 = const()[name = string("op_6618"), val = tensor([0, 2, 1])]; tensor squeeze_11 = const()[name = string("squeeze_11"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535380352)))]; string var_6634_pad_type_0 = const()[name = string("op_6634_pad_type_0"), val = string("valid")]; int32 var_6634_groups_0 = const()[name = string("op_6634_groups_0"), val = int32(1)]; tensor var_6634_strides_0 = const()[name = string("op_6634_strides_0"), val = tensor([1])]; tensor var_6634_pad_0 = const()[name = string("op_6634_pad_0"), val = tensor([0, 0])]; tensor var_6634_dilations_0 = const()[name = string("op_6634_dilations_0"), val = tensor([1])]; tensor var_6619 = transpose(perm = var_6618, x = attn_output_69)[name = string("transpose_59")]; tensor var_6634 = conv(dilations = var_6634_dilations_0, groups = var_6634_groups_0, pad = var_6634_pad_0, pad_type = var_6634_pad_type_0, strides = var_6634_strides_0, weight = squeeze_11, x = var_6619)[name = string("op_6634")]; tensor var_6638 = const()[name = string("op_6638"), val = tensor([0, 2, 1])]; int32 var_6645 = const()[name = string("op_6645"), val = int32(-1)]; fp16 const_162_promoted_to_fp16 = const()[name = string("const_162_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_185 = transpose(perm = var_6638, x = var_6634)[name = string("transpose_58")]; tensor var_6651_cast_fp16 = mul(x = x_185, y = const_162_promoted_to_fp16)[name = string("op_6651_cast_fp16")]; bool input_231_interleave_0 = const()[name = string("input_231_interleave_0"), val = bool(false)]; tensor input_231_cast_fp16 = concat(axis = var_6645, interleave = input_231_interleave_0, values = (x_185, var_6651_cast_fp16))[name = string("input_231_cast_fp16")]; tensor normed_323_axes_0 = const()[name = string("normed_323_axes_0"), val = tensor([-1])]; fp16 var_6643_to_fp16 = const()[name = string("op_6643_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_323_cast_fp16 = layer_norm(axes = normed_323_axes_0, epsilon = var_6643_to_fp16, x = input_231_cast_fp16)[name = string("normed_323_cast_fp16")]; tensor var_6656_split_sizes_0 = const()[name = string("op_6656_split_sizes_0"), val = tensor([640, 640])]; int32 var_6656_axis_0 = const()[name = string("op_6656_axis_0"), val = int32(-1)]; tensor var_6656_cast_fp16_0, tensor var_6656_cast_fp16_1 = split(axis = var_6656_axis_0, split_sizes = var_6656_split_sizes_0, x = normed_323_cast_fp16)[name = string("op_6656_cast_fp16")]; tensor var_6660_to_fp16 = const()[name = string("op_6660_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536691136)))]; tensor out_139_cast_fp16 = mul(x = var_6656_cast_fp16_0, y = var_6660_to_fp16)[name = string("out_139_cast_fp16")]; tensor x_187_cast_fp16 = add(x = x_177_cast_fp16, y = out_139_cast_fp16)[name = string("x_187_cast_fp16")]; int32 var_6674 = const()[name = string("op_6674"), val = int32(-1)]; fp16 const_164_promoted_to_fp16 = const()[name = string("const_164_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6680_cast_fp16 = mul(x = x_187_cast_fp16, y = const_164_promoted_to_fp16)[name = string("op_6680_cast_fp16")]; bool input_233_interleave_0 = const()[name = string("input_233_interleave_0"), val = bool(false)]; tensor input_233_cast_fp16 = concat(axis = var_6674, interleave = input_233_interleave_0, values = (x_187_cast_fp16, var_6680_cast_fp16))[name = string("input_233_cast_fp16")]; tensor normed_327_axes_0 = const()[name = string("normed_327_axes_0"), val = tensor([-1])]; fp16 var_6672_to_fp16 = const()[name = string("op_6672_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_327_cast_fp16 = layer_norm(axes = normed_327_axes_0, epsilon = var_6672_to_fp16, x = input_233_cast_fp16)[name = string("normed_327_cast_fp16")]; tensor var_6685_split_sizes_0 = const()[name = string("op_6685_split_sizes_0"), val = tensor([640, 640])]; int32 var_6685_axis_0 = const()[name = string("op_6685_axis_0"), val = int32(-1)]; tensor var_6685_cast_fp16_0, tensor var_6685_cast_fp16_1 = split(axis = var_6685_axis_0, split_sizes = var_6685_split_sizes_0, x = normed_327_cast_fp16)[name = string("op_6685_cast_fp16")]; tensor var_6689_to_fp16 = const()[name = string("op_6689_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536692480)))]; tensor out_141_cast_fp16 = mul(x = var_6685_cast_fp16_0, y = var_6689_to_fp16)[name = string("out_141_cast_fp16")]; tensor var_6703 = const()[name = string("op_6703"), val = tensor([0, 2, 1])]; tensor input_235_axes_0 = const()[name = string("input_235_axes_0"), val = tensor([2])]; tensor var_6704 = transpose(perm = var_6703, x = out_141_cast_fp16)[name = string("transpose_57")]; tensor input_235 = expand_dims(axes = input_235_axes_0, x = var_6704)[name = string("input_235")]; string gate_45_pad_type_0 = const()[name = string("gate_45_pad_type_0"), val = string("valid")]; tensor gate_45_strides_0 = const()[name = string("gate_45_strides_0"), val = tensor([1, 1])]; tensor gate_45_pad_0 = const()[name = string("gate_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_45_dilations_0 = const()[name = string("gate_45_dilations_0"), val = tensor([1, 1])]; int32 gate_45_groups_0 = const()[name = string("gate_45_groups_0"), val = int32(1)]; tensor gate_45 = conv(dilations = gate_45_dilations_0, groups = gate_45_groups_0, pad = gate_45_pad_0, pad_type = gate_45_pad_type_0, strides = gate_45_strides_0, weight = layers_11_mlp_gate_proj_weight, x = input_235)[name = string("gate_45")]; string up_23_pad_type_0 = const()[name = string("up_23_pad_type_0"), val = string("valid")]; tensor up_23_strides_0 = const()[name = string("up_23_strides_0"), val = tensor([1, 1])]; tensor up_23_pad_0 = const()[name = string("up_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_23_dilations_0 = const()[name = string("up_23_dilations_0"), val = tensor([1, 1])]; int32 up_23_groups_0 = const()[name = string("up_23_groups_0"), val = int32(1)]; tensor up_23 = conv(dilations = up_23_dilations_0, groups = up_23_groups_0, pad = up_23_pad_0, pad_type = up_23_pad_type_0, strides = up_23_strides_0, weight = layers_11_mlp_up_proj_weight, x = input_235)[name = string("up_23")]; string gate_47_mode_0 = const()[name = string("gate_47_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_47 = gelu(mode = gate_47_mode_0, x = gate_45)[name = string("gate_47")]; tensor input_237 = mul(x = gate_47, y = up_23)[name = string("input_237")]; string mlp_out_45_pad_type_0 = const()[name = string("mlp_out_45_pad_type_0"), val = string("valid")]; tensor mlp_out_45_strides_0 = const()[name = string("mlp_out_45_strides_0"), val = tensor([1, 1])]; tensor mlp_out_45_pad_0 = const()[name = string("mlp_out_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_45_dilations_0 = const()[name = string("mlp_out_45_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_45_groups_0 = const()[name = string("mlp_out_45_groups_0"), val = int32(1)]; tensor mlp_out_45 = conv(dilations = mlp_out_45_dilations_0, groups = mlp_out_45_groups_0, pad = mlp_out_45_pad_0, pad_type = mlp_out_45_pad_type_0, strides = mlp_out_45_strides_0, weight = layers_11_mlp_down_proj_weight, x = input_237)[name = string("mlp_out_45")]; tensor var_6744_axes_0 = const()[name = string("op_6744_axes_0"), val = tensor([2])]; tensor var_6744 = squeeze(axes = var_6744_axes_0, x = mlp_out_45)[name = string("op_6744")]; tensor var_6748 = const()[name = string("op_6748"), val = tensor([0, 2, 1])]; int32 var_6755 = const()[name = string("op_6755"), val = int32(-1)]; fp16 const_166_promoted_to_fp16 = const()[name = string("const_166_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_191 = transpose(perm = var_6748, x = var_6744)[name = string("transpose_56")]; tensor var_6761_cast_fp16 = mul(x = x_191, y = const_166_promoted_to_fp16)[name = string("op_6761_cast_fp16")]; bool input_239_interleave_0 = const()[name = string("input_239_interleave_0"), val = bool(false)]; tensor input_239_cast_fp16 = concat(axis = var_6755, interleave = input_239_interleave_0, values = (x_191, var_6761_cast_fp16))[name = string("input_239_cast_fp16")]; tensor normed_333_axes_0 = const()[name = string("normed_333_axes_0"), val = tensor([-1])]; fp16 var_6753_to_fp16 = const()[name = string("op_6753_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_333_cast_fp16 = layer_norm(axes = normed_333_axes_0, epsilon = var_6753_to_fp16, x = input_239_cast_fp16)[name = string("normed_333_cast_fp16")]; tensor var_6766_split_sizes_0 = const()[name = string("op_6766_split_sizes_0"), val = tensor([640, 640])]; int32 var_6766_axis_0 = const()[name = string("op_6766_axis_0"), val = int32(-1)]; tensor var_6766_cast_fp16_0, tensor var_6766_cast_fp16_1 = split(axis = var_6766_axis_0, split_sizes = var_6766_split_sizes_0, x = normed_333_cast_fp16)[name = string("op_6766_cast_fp16")]; tensor var_6770_to_fp16 = const()[name = string("op_6770_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536693824)))]; tensor out_143_cast_fp16 = mul(x = var_6766_cast_fp16_0, y = var_6770_to_fp16)[name = string("out_143_cast_fp16")]; tensor x_193_cast_fp16 = add(x = x_187_cast_fp16, y = out_143_cast_fp16)[name = string("x_193_cast_fp16")]; int32 var_6784 = const()[name = string("op_6784"), val = int32(-1)]; fp16 const_168_promoted_to_fp16 = const()[name = string("const_168_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6790_cast_fp16 = mul(x = x_193_cast_fp16, y = const_168_promoted_to_fp16)[name = string("op_6790_cast_fp16")]; bool input_241_interleave_0 = const()[name = string("input_241_interleave_0"), val = bool(false)]; tensor input_241_cast_fp16 = concat(axis = var_6784, interleave = input_241_interleave_0, values = (x_193_cast_fp16, var_6790_cast_fp16))[name = string("input_241_cast_fp16")]; tensor normed_337_axes_0 = const()[name = string("normed_337_axes_0"), val = tensor([-1])]; fp16 var_6782_to_fp16 = const()[name = string("op_6782_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_337_cast_fp16 = layer_norm(axes = normed_337_axes_0, epsilon = var_6782_to_fp16, x = input_241_cast_fp16)[name = string("normed_337_cast_fp16")]; tensor var_6795_split_sizes_0 = const()[name = string("op_6795_split_sizes_0"), val = tensor([640, 640])]; int32 var_6795_axis_0 = const()[name = string("op_6795_axis_0"), val = int32(-1)]; tensor var_6795_cast_fp16_0, tensor var_6795_cast_fp16_1 = split(axis = var_6795_axis_0, split_sizes = var_6795_split_sizes_0, x = normed_337_cast_fp16)[name = string("op_6795_cast_fp16")]; tensor var_6799_to_fp16 = const()[name = string("op_6799_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536695168)))]; tensor out_145_cast_fp16 = mul(x = var_6795_cast_fp16_0, y = var_6799_to_fp16)[name = string("out_145_cast_fp16")]; tensor var_6813 = const()[name = string("op_6813"), val = tensor([0, 2, 1])]; tensor input_243_axes_0 = const()[name = string("input_243_axes_0"), val = tensor([2])]; tensor var_6814 = transpose(perm = var_6813, x = out_145_cast_fp16)[name = string("transpose_55")]; tensor input_243 = expand_dims(axes = input_243_axes_0, x = var_6814)[name = string("input_243")]; string var_6827_pad_type_0 = const()[name = string("op_6827_pad_type_0"), val = string("valid")]; tensor var_6827_strides_0 = const()[name = string("op_6827_strides_0"), val = tensor([1, 1])]; tensor var_6827_pad_0 = const()[name = string("op_6827_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6827_dilations_0 = const()[name = string("op_6827_dilations_0"), val = tensor([1, 1])]; int32 var_6827_groups_0 = const()[name = string("op_6827_groups_0"), val = int32(1)]; tensor var_6827 = conv(dilations = var_6827_dilations_0, groups = var_6827_groups_0, pad = var_6827_pad_0, pad_type = var_6827_pad_type_0, strides = var_6827_strides_0, weight = layers_12_self_attn_q_proj_weight, x = input_243)[name = string("op_6827")]; tensor var_6832 = const()[name = string("op_6832"), val = tensor([1, 4, 256, 1])]; tensor var_6833 = reshape(shape = var_6832, x = var_6827)[name = string("op_6833")]; tensor var_6838 = const()[name = string("op_6838"), val = tensor([0, 1, 3, 2])]; tensor var_6843 = const()[name = string("op_6843"), val = tensor([1, 4, 256])]; tensor q_73 = transpose(perm = var_6838, x = var_6833)[name = string("transpose_54")]; tensor x_197 = reshape(shape = var_6843, x = q_73)[name = string("x_197")]; int32 var_6850 = const()[name = string("op_6850"), val = int32(-1)]; fp16 const_170_promoted_to_fp16 = const()[name = string("const_170_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6856_cast_fp16 = mul(x = x_197, y = const_170_promoted_to_fp16)[name = string("op_6856_cast_fp16")]; bool input_245_interleave_0 = const()[name = string("input_245_interleave_0"), val = bool(false)]; tensor input_245_cast_fp16 = concat(axis = var_6850, interleave = input_245_interleave_0, values = (x_197, var_6856_cast_fp16))[name = string("input_245_cast_fp16")]; tensor normed_343_axes_0 = const()[name = string("normed_343_axes_0"), val = tensor([-1])]; fp16 var_6848_to_fp16 = const()[name = string("op_6848_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_343_cast_fp16 = layer_norm(axes = normed_343_axes_0, epsilon = var_6848_to_fp16, x = input_245_cast_fp16)[name = string("normed_343_cast_fp16")]; tensor var_6861_split_sizes_0 = const()[name = string("op_6861_split_sizes_0"), val = tensor([256, 256])]; int32 var_6861_axis_0 = const()[name = string("op_6861_axis_0"), val = int32(-1)]; tensor var_6861_cast_fp16_0, tensor var_6861_cast_fp16_1 = split(axis = var_6861_axis_0, split_sizes = var_6861_split_sizes_0, x = normed_343_cast_fp16)[name = string("op_6861_cast_fp16")]; tensor var_6865_to_fp16 = const()[name = string("op_6865_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536696512)))]; tensor out_147_cast_fp16 = mul(x = var_6861_cast_fp16_0, y = var_6865_to_fp16)[name = string("out_147_cast_fp16")]; tensor var_6872 = const()[name = string("op_6872"), val = tensor([1, 4, 1, 256])]; tensor q_75 = reshape(shape = var_6872, x = out_147_cast_fp16)[name = string("q_75")]; string var_6884_pad_type_0 = const()[name = string("op_6884_pad_type_0"), val = string("valid")]; tensor var_6884_strides_0 = const()[name = string("op_6884_strides_0"), val = tensor([1, 1])]; tensor var_6884_pad_0 = const()[name = string("op_6884_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6884_dilations_0 = const()[name = string("op_6884_dilations_0"), val = tensor([1, 1])]; int32 var_6884_groups_0 = const()[name = string("op_6884_groups_0"), val = int32(1)]; tensor var_6884 = conv(dilations = var_6884_dilations_0, groups = var_6884_groups_0, pad = var_6884_pad_0, pad_type = var_6884_pad_type_0, strides = var_6884_strides_0, weight = layers_12_self_attn_k_proj_weight, x = input_243)[name = string("op_6884")]; tensor var_6889 = const()[name = string("op_6889"), val = tensor([1, 1, 256, 1])]; tensor var_6890 = reshape(shape = var_6889, x = var_6884)[name = string("op_6890")]; tensor var_6895 = const()[name = string("op_6895"), val = tensor([0, 1, 3, 2])]; tensor var_6900 = const()[name = string("op_6900"), val = tensor([1, 1, 256])]; tensor k_73 = transpose(perm = var_6895, x = var_6890)[name = string("transpose_53")]; tensor x_199 = reshape(shape = var_6900, x = k_73)[name = string("x_199")]; int32 var_6907 = const()[name = string("op_6907"), val = int32(-1)]; fp16 const_172_promoted_to_fp16 = const()[name = string("const_172_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6913_cast_fp16 = mul(x = x_199, y = const_172_promoted_to_fp16)[name = string("op_6913_cast_fp16")]; bool input_247_interleave_0 = const()[name = string("input_247_interleave_0"), val = bool(false)]; tensor input_247_cast_fp16 = concat(axis = var_6907, interleave = input_247_interleave_0, values = (x_199, var_6913_cast_fp16))[name = string("input_247_cast_fp16")]; tensor normed_347_axes_0 = const()[name = string("normed_347_axes_0"), val = tensor([-1])]; fp16 var_6905_to_fp16 = const()[name = string("op_6905_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_347_cast_fp16 = layer_norm(axes = normed_347_axes_0, epsilon = var_6905_to_fp16, x = input_247_cast_fp16)[name = string("normed_347_cast_fp16")]; tensor var_6918_split_sizes_0 = const()[name = string("op_6918_split_sizes_0"), val = tensor([256, 256])]; int32 var_6918_axis_0 = const()[name = string("op_6918_axis_0"), val = int32(-1)]; tensor var_6918_cast_fp16_0, tensor var_6918_cast_fp16_1 = split(axis = var_6918_axis_0, split_sizes = var_6918_split_sizes_0, x = normed_347_cast_fp16)[name = string("op_6918_cast_fp16")]; tensor var_6922_to_fp16 = const()[name = string("op_6922_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536697088)))]; tensor out_149_cast_fp16 = mul(x = var_6918_cast_fp16_0, y = var_6922_to_fp16)[name = string("out_149_cast_fp16")]; tensor var_6929 = const()[name = string("op_6929"), val = tensor([1, 1, 1, 256])]; tensor k_75 = reshape(shape = var_6929, x = out_149_cast_fp16)[name = string("k_75")]; string var_6941_pad_type_0 = const()[name = string("op_6941_pad_type_0"), val = string("valid")]; tensor var_6941_strides_0 = const()[name = string("op_6941_strides_0"), val = tensor([1, 1])]; tensor var_6941_pad_0 = const()[name = string("op_6941_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6941_dilations_0 = const()[name = string("op_6941_dilations_0"), val = tensor([1, 1])]; int32 var_6941_groups_0 = const()[name = string("op_6941_groups_0"), val = int32(1)]; tensor var_6941 = conv(dilations = var_6941_dilations_0, groups = var_6941_groups_0, pad = var_6941_pad_0, pad_type = var_6941_pad_type_0, strides = var_6941_strides_0, weight = layers_12_self_attn_v_proj_weight, x = input_243)[name = string("op_6941")]; tensor var_6946 = const()[name = string("op_6946"), val = tensor([1, 1, 256, 1])]; tensor var_6947 = reshape(shape = var_6946, x = var_6941)[name = string("op_6947")]; tensor var_6952 = const()[name = string("op_6952"), val = tensor([0, 1, 3, 2])]; tensor var_6954 = mul(x = q_75, y = cos_1)[name = string("op_6954")]; tensor var_6955_split_sizes_0 = const()[name = string("op_6955_split_sizes_0"), val = tensor([128, 128])]; int32 var_6955_axis_0 = const()[name = string("op_6955_axis_0"), val = int32(-1)]; tensor var_6955_0, tensor var_6955_1 = split(axis = var_6955_axis_0, split_sizes = var_6955_split_sizes_0, x = q_75)[name = string("op_6955")]; fp16 const_174_promoted = const()[name = string("const_174_promoted"), val = fp16(-0x1p+0)]; tensor var_6957 = mul(x = var_6955_1, y = const_174_promoted)[name = string("op_6957")]; int32 var_6959 = const()[name = string("op_6959"), val = int32(-1)]; bool var_6960_interleave_0 = const()[name = string("op_6960_interleave_0"), val = bool(false)]; tensor var_6960 = concat(axis = var_6959, interleave = var_6960_interleave_0, values = (var_6957, var_6955_0))[name = string("op_6960")]; tensor var_6961 = mul(x = var_6960, y = sin_1)[name = string("op_6961")]; tensor q_77 = add(x = var_6954, y = var_6961)[name = string("q_77")]; tensor var_6964 = mul(x = k_75, y = cos_1)[name = string("op_6964")]; tensor var_6965_split_sizes_0 = const()[name = string("op_6965_split_sizes_0"), val = tensor([128, 128])]; int32 var_6965_axis_0 = const()[name = string("op_6965_axis_0"), val = int32(-1)]; tensor var_6965_0, tensor var_6965_1 = split(axis = var_6965_axis_0, split_sizes = var_6965_split_sizes_0, x = k_75)[name = string("op_6965")]; fp16 const_175_promoted = const()[name = string("const_175_promoted"), val = fp16(-0x1p+0)]; tensor var_6967 = mul(x = var_6965_1, y = const_175_promoted)[name = string("op_6967")]; int32 var_6969 = const()[name = string("op_6969"), val = int32(-1)]; bool var_6970_interleave_0 = const()[name = string("op_6970_interleave_0"), val = bool(false)]; tensor var_6970 = concat(axis = var_6969, interleave = var_6970_interleave_0, values = (var_6967, var_6965_0))[name = string("op_6970")]; tensor var_6971 = mul(x = var_6970, y = sin_1)[name = string("op_6971")]; tensor k_77 = add(x = var_6964, y = var_6971)[name = string("k_77")]; tensor var_6976_begin_0 = const()[name = string("op_6976_begin_0"), val = tensor([12, 0, 0, 0])]; tensor var_6976_end_0 = const()[name = string("op_6976_end_0"), val = tensor([13, 1, 2048, 256])]; tensor var_6976_end_mask_0 = const()[name = string("op_6976_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6976_squeeze_mask_0 = const()[name = string("op_6976_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_6976_cast_fp16 = slice_by_index(begin = var_6976_begin_0, end = var_6976_end_0, end_mask = var_6976_end_mask_0, squeeze_mask = var_6976_squeeze_mask_0, x = coreml_update_state_59)[name = string("op_6976_cast_fp16")]; tensor K_cache_25_axes_0 = const()[name = string("K_cache_25_axes_0"), val = tensor([0])]; tensor K_cache_25_cast_fp16 = expand_dims(axes = K_cache_25_axes_0, x = var_6976_cast_fp16)[name = string("K_cache_25_cast_fp16")]; tensor var_6981_begin_0 = const()[name = string("op_6981_begin_0"), val = tensor([30, 0, 0, 0])]; tensor var_6981_end_0 = const()[name = string("op_6981_end_0"), val = tensor([31, 1, 2048, 256])]; tensor var_6981_end_mask_0 = const()[name = string("op_6981_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6981_squeeze_mask_0 = const()[name = string("op_6981_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_6981_cast_fp16 = slice_by_index(begin = var_6981_begin_0, end = var_6981_end_0, end_mask = var_6981_end_mask_0, squeeze_mask = var_6981_squeeze_mask_0, x = coreml_update_state_59)[name = string("op_6981_cast_fp16")]; tensor V_cache_25_axes_0 = const()[name = string("V_cache_25_axes_0"), val = tensor([0])]; tensor V_cache_25_cast_fp16 = expand_dims(axes = V_cache_25_axes_0, x = var_6981_cast_fp16)[name = string("V_cache_25_cast_fp16")]; tensor k_broadcast_25_reps_0 = const()[name = string("k_broadcast_25_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor k_broadcast_25 = tile(reps = k_broadcast_25_reps_0, x = k_77)[name = string("k_broadcast_25")]; tensor v_broadcast_25_reps_0 = const()[name = string("v_broadcast_25_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor v_25 = transpose(perm = var_6952, x = var_6947)[name = string("transpose_52")]; tensor v_broadcast_25 = tile(reps = v_broadcast_25_reps_0, x = v_25)[name = string("v_broadcast_25")]; tensor var_6989_cast_fp16 = mul(x = K_cache_25_cast_fp16, y = var_1120_cast_fp16)[name = string("op_6989_cast_fp16")]; tensor var_6990_cast_fp16 = mul(x = k_broadcast_25, y = update_mask)[name = string("op_6990_cast_fp16")]; tensor K_new_25_cast_fp16 = add(x = var_6989_cast_fp16, y = var_6990_cast_fp16)[name = string("K_new_25_cast_fp16")]; tensor var_6996_cast_fp16 = mul(x = V_cache_25_cast_fp16, y = var_1120_cast_fp16)[name = string("op_6996_cast_fp16")]; tensor var_6997_cast_fp16 = mul(x = v_broadcast_25, y = update_mask)[name = string("op_6997_cast_fp16")]; tensor V_new_25_cast_fp16 = add(x = var_6996_cast_fp16, y = var_6997_cast_fp16)[name = string("V_new_25_cast_fp16")]; tensor var_7001_axes_0 = const()[name = string("op_7001_axes_0"), val = tensor([0])]; tensor var_7001_cast_fp16 = squeeze(axes = var_7001_axes_0, x = K_new_25_cast_fp16)[name = string("op_7001_cast_fp16")]; tensor concat_48 = const()[name = string("concat_48"), val = tensor([12, 0, 0, 0])]; tensor concat_49 = const()[name = string("concat_49"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_25_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_25_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_25_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_48, begin_mask = kv_cache_0_internal_tensor_assign_25_begin_mask_0, end = concat_49, end_mask = kv_cache_0_internal_tensor_assign_25_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_25_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_25_stride_0, update = var_7001_cast_fp16, x = coreml_update_state_59)[name = string("kv_cache_0_internal_tensor_assign_25_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_25_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_60_write_state")]; tensor coreml_update_state_60 = read_state(input = kv_cache_0)[name = string("coreml_update_state_60")]; tensor var_7008_axes_0 = const()[name = string("op_7008_axes_0"), val = tensor([0])]; tensor var_7008_cast_fp16 = squeeze(axes = var_7008_axes_0, x = V_new_25_cast_fp16)[name = string("op_7008_cast_fp16")]; tensor concat_50 = const()[name = string("concat_50"), val = tensor([30, 0, 0, 0])]; tensor concat_51 = const()[name = string("concat_51"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_26_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_26_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_26_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_50, begin_mask = kv_cache_0_internal_tensor_assign_26_begin_mask_0, end = concat_51, end_mask = kv_cache_0_internal_tensor_assign_26_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_26_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_26_stride_0, update = var_7008_cast_fp16, x = coreml_update_state_60)[name = string("kv_cache_0_internal_tensor_assign_26_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_26_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_61_write_state")]; tensor coreml_update_state_61 = read_state(input = kv_cache_0)[name = string("coreml_update_state_61")]; tensor hidden_states_99_axes_0 = const()[name = string("hidden_states_99_axes_0"), val = tensor([2])]; tensor hidden_states_99_cast_fp16 = expand_dims(axes = hidden_states_99_axes_0, x = K_new_25_cast_fp16)[name = string("hidden_states_99_cast_fp16")]; tensor var_7021 = const()[name = string("op_7021"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_101_cast_fp16 = tile(reps = var_7021, x = hidden_states_99_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; tensor var_7027 = const()[name = string("op_7027"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_25_cast_fp16 = reshape(shape = var_7027, x = hidden_states_101_cast_fp16)[name = string("K_expanded_25_cast_fp16")]; tensor hidden_states_103_axes_0 = const()[name = string("hidden_states_103_axes_0"), val = tensor([2])]; tensor hidden_states_103_cast_fp16 = expand_dims(axes = hidden_states_103_axes_0, x = V_new_25_cast_fp16)[name = string("hidden_states_103_cast_fp16")]; tensor var_7036 = const()[name = string("op_7036"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_105_cast_fp16 = tile(reps = var_7036, x = hidden_states_103_cast_fp16)[name = string("hidden_states_105_cast_fp16")]; tensor var_7042 = const()[name = string("op_7042"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_25_cast_fp16 = reshape(shape = var_7042, x = hidden_states_105_cast_fp16)[name = string("V_expanded_25_cast_fp16")]; bool var_7057_transpose_x_1 = const()[name = string("op_7057_transpose_x_1"), val = bool(false)]; bool var_7057_transpose_y_1 = const()[name = string("op_7057_transpose_y_1"), val = bool(true)]; tensor var_7057_cast_fp16 = matmul(transpose_x = var_7057_transpose_x_1, transpose_y = var_7057_transpose_y_1, x = q_77, y = K_expanded_25_cast_fp16)[name = string("op_7057_cast_fp16")]; fp16 var_7058_to_fp16 = const()[name = string("op_7058_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_73_cast_fp16 = mul(x = var_7057_cast_fp16, y = var_7058_to_fp16)[name = string("attn_weights_73_cast_fp16")]; tensor attn_weights_75_cast_fp16 = add(x = attn_weights_73_cast_fp16, y = causal_mask)[name = string("attn_weights_75_cast_fp16")]; int32 var_7067 = const()[name = string("op_7067"), val = int32(-1)]; tensor var_7069_cast_fp16 = softmax(axis = var_7067, x = attn_weights_75_cast_fp16)[name = string("op_7069_cast_fp16")]; bool var_7085_transpose_x_0 = const()[name = string("op_7085_transpose_x_0"), val = bool(false)]; bool var_7085_transpose_y_0 = const()[name = string("op_7085_transpose_y_0"), val = bool(false)]; tensor var_7085_cast_fp16 = matmul(transpose_x = var_7085_transpose_x_0, transpose_y = var_7085_transpose_y_0, x = var_7069_cast_fp16, y = V_expanded_25_cast_fp16)[name = string("op_7085_cast_fp16")]; tensor var_7095 = const()[name = string("op_7095"), val = tensor([0, 2, 1, 3])]; tensor var_7102 = const()[name = string("op_7102"), val = tensor([1, 1, -1])]; tensor var_7096 = transpose(perm = var_7095, x = var_7085_cast_fp16)[name = string("transpose_51")]; tensor attn_output_75 = reshape(shape = var_7102, x = var_7096)[name = string("attn_output_75")]; tensor var_7107 = const()[name = string("op_7107"), val = tensor([0, 2, 1])]; tensor squeeze_12 = const()[name = string("squeeze_12"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536697664)))]; string var_7123_pad_type_0 = const()[name = string("op_7123_pad_type_0"), val = string("valid")]; int32 var_7123_groups_0 = const()[name = string("op_7123_groups_0"), val = int32(1)]; tensor var_7123_strides_0 = const()[name = string("op_7123_strides_0"), val = tensor([1])]; tensor var_7123_pad_0 = const()[name = string("op_7123_pad_0"), val = tensor([0, 0])]; tensor var_7123_dilations_0 = const()[name = string("op_7123_dilations_0"), val = tensor([1])]; tensor var_7108 = transpose(perm = var_7107, x = attn_output_75)[name = string("transpose_50")]; tensor var_7123 = conv(dilations = var_7123_dilations_0, groups = var_7123_groups_0, pad = var_7123_pad_0, pad_type = var_7123_pad_type_0, strides = var_7123_strides_0, weight = squeeze_12, x = var_7108)[name = string("op_7123")]; tensor var_7127 = const()[name = string("op_7127"), val = tensor([0, 2, 1])]; int32 var_7134 = const()[name = string("op_7134"), val = int32(-1)]; fp16 const_176_promoted_to_fp16 = const()[name = string("const_176_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_201 = transpose(perm = var_7127, x = var_7123)[name = string("transpose_49")]; tensor var_7140_cast_fp16 = mul(x = x_201, y = const_176_promoted_to_fp16)[name = string("op_7140_cast_fp16")]; bool input_251_interleave_0 = const()[name = string("input_251_interleave_0"), val = bool(false)]; tensor input_251_cast_fp16 = concat(axis = var_7134, interleave = input_251_interleave_0, values = (x_201, var_7140_cast_fp16))[name = string("input_251_cast_fp16")]; tensor normed_351_axes_0 = const()[name = string("normed_351_axes_0"), val = tensor([-1])]; fp16 var_7132_to_fp16 = const()[name = string("op_7132_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_351_cast_fp16 = layer_norm(axes = normed_351_axes_0, epsilon = var_7132_to_fp16, x = input_251_cast_fp16)[name = string("normed_351_cast_fp16")]; tensor var_7145_split_sizes_0 = const()[name = string("op_7145_split_sizes_0"), val = tensor([640, 640])]; int32 var_7145_axis_0 = const()[name = string("op_7145_axis_0"), val = int32(-1)]; tensor var_7145_cast_fp16_0, tensor var_7145_cast_fp16_1 = split(axis = var_7145_axis_0, split_sizes = var_7145_split_sizes_0, x = normed_351_cast_fp16)[name = string("op_7145_cast_fp16")]; tensor var_7149_to_fp16 = const()[name = string("op_7149_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538008448)))]; tensor out_151_cast_fp16 = mul(x = var_7145_cast_fp16_0, y = var_7149_to_fp16)[name = string("out_151_cast_fp16")]; tensor x_203_cast_fp16 = add(x = x_193_cast_fp16, y = out_151_cast_fp16)[name = string("x_203_cast_fp16")]; int32 var_7163 = const()[name = string("op_7163"), val = int32(-1)]; fp16 const_178_promoted_to_fp16 = const()[name = string("const_178_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7169_cast_fp16 = mul(x = x_203_cast_fp16, y = const_178_promoted_to_fp16)[name = string("op_7169_cast_fp16")]; bool input_253_interleave_0 = const()[name = string("input_253_interleave_0"), val = bool(false)]; tensor input_253_cast_fp16 = concat(axis = var_7163, interleave = input_253_interleave_0, values = (x_203_cast_fp16, var_7169_cast_fp16))[name = string("input_253_cast_fp16")]; tensor normed_355_axes_0 = const()[name = string("normed_355_axes_0"), val = tensor([-1])]; fp16 var_7161_to_fp16 = const()[name = string("op_7161_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_355_cast_fp16 = layer_norm(axes = normed_355_axes_0, epsilon = var_7161_to_fp16, x = input_253_cast_fp16)[name = string("normed_355_cast_fp16")]; tensor var_7174_split_sizes_0 = const()[name = string("op_7174_split_sizes_0"), val = tensor([640, 640])]; int32 var_7174_axis_0 = const()[name = string("op_7174_axis_0"), val = int32(-1)]; tensor var_7174_cast_fp16_0, tensor var_7174_cast_fp16_1 = split(axis = var_7174_axis_0, split_sizes = var_7174_split_sizes_0, x = normed_355_cast_fp16)[name = string("op_7174_cast_fp16")]; tensor var_7178_to_fp16 = const()[name = string("op_7178_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538009792)))]; tensor out_153_cast_fp16 = mul(x = var_7174_cast_fp16_0, y = var_7178_to_fp16)[name = string("out_153_cast_fp16")]; tensor var_7192 = const()[name = string("op_7192"), val = tensor([0, 2, 1])]; tensor input_255_axes_0 = const()[name = string("input_255_axes_0"), val = tensor([2])]; tensor var_7193 = transpose(perm = var_7192, x = out_153_cast_fp16)[name = string("transpose_48")]; tensor input_255 = expand_dims(axes = input_255_axes_0, x = var_7193)[name = string("input_255")]; string gate_49_pad_type_0 = const()[name = string("gate_49_pad_type_0"), val = string("valid")]; tensor gate_49_strides_0 = const()[name = string("gate_49_strides_0"), val = tensor([1, 1])]; tensor gate_49_pad_0 = const()[name = string("gate_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_49_dilations_0 = const()[name = string("gate_49_dilations_0"), val = tensor([1, 1])]; int32 gate_49_groups_0 = const()[name = string("gate_49_groups_0"), val = int32(1)]; tensor gate_49 = conv(dilations = gate_49_dilations_0, groups = gate_49_groups_0, pad = gate_49_pad_0, pad_type = gate_49_pad_type_0, strides = gate_49_strides_0, weight = layers_12_mlp_gate_proj_weight, x = input_255)[name = string("gate_49")]; string up_25_pad_type_0 = const()[name = string("up_25_pad_type_0"), val = string("valid")]; tensor up_25_strides_0 = const()[name = string("up_25_strides_0"), val = tensor([1, 1])]; tensor up_25_pad_0 = const()[name = string("up_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_25_dilations_0 = const()[name = string("up_25_dilations_0"), val = tensor([1, 1])]; int32 up_25_groups_0 = const()[name = string("up_25_groups_0"), val = int32(1)]; tensor up_25 = conv(dilations = up_25_dilations_0, groups = up_25_groups_0, pad = up_25_pad_0, pad_type = up_25_pad_type_0, strides = up_25_strides_0, weight = layers_12_mlp_up_proj_weight, x = input_255)[name = string("up_25")]; string gate_51_mode_0 = const()[name = string("gate_51_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_51 = gelu(mode = gate_51_mode_0, x = gate_49)[name = string("gate_51")]; tensor input_257 = mul(x = gate_51, y = up_25)[name = string("input_257")]; string mlp_out_49_pad_type_0 = const()[name = string("mlp_out_49_pad_type_0"), val = string("valid")]; tensor mlp_out_49_strides_0 = const()[name = string("mlp_out_49_strides_0"), val = tensor([1, 1])]; tensor mlp_out_49_pad_0 = const()[name = string("mlp_out_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_49_dilations_0 = const()[name = string("mlp_out_49_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_49_groups_0 = const()[name = string("mlp_out_49_groups_0"), val = int32(1)]; tensor mlp_out_49 = conv(dilations = mlp_out_49_dilations_0, groups = mlp_out_49_groups_0, pad = mlp_out_49_pad_0, pad_type = mlp_out_49_pad_type_0, strides = mlp_out_49_strides_0, weight = layers_12_mlp_down_proj_weight, x = input_257)[name = string("mlp_out_49")]; tensor var_7233_axes_0 = const()[name = string("op_7233_axes_0"), val = tensor([2])]; tensor var_7233 = squeeze(axes = var_7233_axes_0, x = mlp_out_49)[name = string("op_7233")]; tensor var_7237 = const()[name = string("op_7237"), val = tensor([0, 2, 1])]; int32 var_7244 = const()[name = string("op_7244"), val = int32(-1)]; fp16 const_180_promoted_to_fp16 = const()[name = string("const_180_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_207 = transpose(perm = var_7237, x = var_7233)[name = string("transpose_47")]; tensor var_7250_cast_fp16 = mul(x = x_207, y = const_180_promoted_to_fp16)[name = string("op_7250_cast_fp16")]; bool input_259_interleave_0 = const()[name = string("input_259_interleave_0"), val = bool(false)]; tensor input_259_cast_fp16 = concat(axis = var_7244, interleave = input_259_interleave_0, values = (x_207, var_7250_cast_fp16))[name = string("input_259_cast_fp16")]; tensor normed_361_axes_0 = const()[name = string("normed_361_axes_0"), val = tensor([-1])]; fp16 var_7242_to_fp16 = const()[name = string("op_7242_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_361_cast_fp16 = layer_norm(axes = normed_361_axes_0, epsilon = var_7242_to_fp16, x = input_259_cast_fp16)[name = string("normed_361_cast_fp16")]; tensor var_7255_split_sizes_0 = const()[name = string("op_7255_split_sizes_0"), val = tensor([640, 640])]; int32 var_7255_axis_0 = const()[name = string("op_7255_axis_0"), val = int32(-1)]; tensor var_7255_cast_fp16_0, tensor var_7255_cast_fp16_1 = split(axis = var_7255_axis_0, split_sizes = var_7255_split_sizes_0, x = normed_361_cast_fp16)[name = string("op_7255_cast_fp16")]; tensor var_7259_to_fp16 = const()[name = string("op_7259_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538011136)))]; tensor out_155_cast_fp16 = mul(x = var_7255_cast_fp16_0, y = var_7259_to_fp16)[name = string("out_155_cast_fp16")]; tensor x_209_cast_fp16 = add(x = x_203_cast_fp16, y = out_155_cast_fp16)[name = string("x_209_cast_fp16")]; int32 var_7273 = const()[name = string("op_7273"), val = int32(-1)]; fp16 const_182_promoted_to_fp16 = const()[name = string("const_182_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7279_cast_fp16 = mul(x = x_209_cast_fp16, y = const_182_promoted_to_fp16)[name = string("op_7279_cast_fp16")]; bool input_261_interleave_0 = const()[name = string("input_261_interleave_0"), val = bool(false)]; tensor input_261_cast_fp16 = concat(axis = var_7273, interleave = input_261_interleave_0, values = (x_209_cast_fp16, var_7279_cast_fp16))[name = string("input_261_cast_fp16")]; tensor normed_365_axes_0 = const()[name = string("normed_365_axes_0"), val = tensor([-1])]; fp16 var_7271_to_fp16 = const()[name = string("op_7271_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_365_cast_fp16 = layer_norm(axes = normed_365_axes_0, epsilon = var_7271_to_fp16, x = input_261_cast_fp16)[name = string("normed_365_cast_fp16")]; tensor var_7284_split_sizes_0 = const()[name = string("op_7284_split_sizes_0"), val = tensor([640, 640])]; int32 var_7284_axis_0 = const()[name = string("op_7284_axis_0"), val = int32(-1)]; tensor var_7284_cast_fp16_0, tensor var_7284_cast_fp16_1 = split(axis = var_7284_axis_0, split_sizes = var_7284_split_sizes_0, x = normed_365_cast_fp16)[name = string("op_7284_cast_fp16")]; tensor var_7288_to_fp16 = const()[name = string("op_7288_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538012480)))]; tensor out_157_cast_fp16 = mul(x = var_7284_cast_fp16_0, y = var_7288_to_fp16)[name = string("out_157_cast_fp16")]; tensor var_7302 = const()[name = string("op_7302"), val = tensor([0, 2, 1])]; tensor input_263_axes_0 = const()[name = string("input_263_axes_0"), val = tensor([2])]; tensor var_7303 = transpose(perm = var_7302, x = out_157_cast_fp16)[name = string("transpose_46")]; tensor input_263 = expand_dims(axes = input_263_axes_0, x = var_7303)[name = string("input_263")]; string var_7316_pad_type_0 = const()[name = string("op_7316_pad_type_0"), val = string("valid")]; tensor var_7316_strides_0 = const()[name = string("op_7316_strides_0"), val = tensor([1, 1])]; tensor var_7316_pad_0 = const()[name = string("op_7316_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7316_dilations_0 = const()[name = string("op_7316_dilations_0"), val = tensor([1, 1])]; int32 var_7316_groups_0 = const()[name = string("op_7316_groups_0"), val = int32(1)]; tensor var_7316 = conv(dilations = var_7316_dilations_0, groups = var_7316_groups_0, pad = var_7316_pad_0, pad_type = var_7316_pad_type_0, strides = var_7316_strides_0, weight = layers_13_self_attn_q_proj_weight, x = input_263)[name = string("op_7316")]; tensor var_7321 = const()[name = string("op_7321"), val = tensor([1, 4, 256, 1])]; tensor var_7322 = reshape(shape = var_7321, x = var_7316)[name = string("op_7322")]; tensor var_7327 = const()[name = string("op_7327"), val = tensor([0, 1, 3, 2])]; tensor var_7332 = const()[name = string("op_7332"), val = tensor([1, 4, 256])]; tensor q_79 = transpose(perm = var_7327, x = var_7322)[name = string("transpose_45")]; tensor x_213 = reshape(shape = var_7332, x = q_79)[name = string("x_213")]; int32 var_7339 = const()[name = string("op_7339"), val = int32(-1)]; fp16 const_184_promoted_to_fp16 = const()[name = string("const_184_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7345_cast_fp16 = mul(x = x_213, y = const_184_promoted_to_fp16)[name = string("op_7345_cast_fp16")]; bool input_265_interleave_0 = const()[name = string("input_265_interleave_0"), val = bool(false)]; tensor input_265_cast_fp16 = concat(axis = var_7339, interleave = input_265_interleave_0, values = (x_213, var_7345_cast_fp16))[name = string("input_265_cast_fp16")]; tensor normed_371_axes_0 = const()[name = string("normed_371_axes_0"), val = tensor([-1])]; fp16 var_7337_to_fp16 = const()[name = string("op_7337_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_371_cast_fp16 = layer_norm(axes = normed_371_axes_0, epsilon = var_7337_to_fp16, x = input_265_cast_fp16)[name = string("normed_371_cast_fp16")]; tensor var_7350_split_sizes_0 = const()[name = string("op_7350_split_sizes_0"), val = tensor([256, 256])]; int32 var_7350_axis_0 = const()[name = string("op_7350_axis_0"), val = int32(-1)]; tensor var_7350_cast_fp16_0, tensor var_7350_cast_fp16_1 = split(axis = var_7350_axis_0, split_sizes = var_7350_split_sizes_0, x = normed_371_cast_fp16)[name = string("op_7350_cast_fp16")]; tensor var_7354_to_fp16 = const()[name = string("op_7354_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538013824)))]; tensor out_159_cast_fp16 = mul(x = var_7350_cast_fp16_0, y = var_7354_to_fp16)[name = string("out_159_cast_fp16")]; tensor var_7361 = const()[name = string("op_7361"), val = tensor([1, 4, 1, 256])]; tensor q_81 = reshape(shape = var_7361, x = out_159_cast_fp16)[name = string("q_81")]; string var_7373_pad_type_0 = const()[name = string("op_7373_pad_type_0"), val = string("valid")]; tensor var_7373_strides_0 = const()[name = string("op_7373_strides_0"), val = tensor([1, 1])]; tensor var_7373_pad_0 = const()[name = string("op_7373_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7373_dilations_0 = const()[name = string("op_7373_dilations_0"), val = tensor([1, 1])]; int32 var_7373_groups_0 = const()[name = string("op_7373_groups_0"), val = int32(1)]; tensor var_7373 = conv(dilations = var_7373_dilations_0, groups = var_7373_groups_0, pad = var_7373_pad_0, pad_type = var_7373_pad_type_0, strides = var_7373_strides_0, weight = layers_13_self_attn_k_proj_weight, x = input_263)[name = string("op_7373")]; tensor var_7378 = const()[name = string("op_7378"), val = tensor([1, 1, 256, 1])]; tensor var_7379 = reshape(shape = var_7378, x = var_7373)[name = string("op_7379")]; tensor var_7384 = const()[name = string("op_7384"), val = tensor([0, 1, 3, 2])]; tensor var_7389 = const()[name = string("op_7389"), val = tensor([1, 1, 256])]; tensor k_79 = transpose(perm = var_7384, x = var_7379)[name = string("transpose_44")]; tensor x_215 = reshape(shape = var_7389, x = k_79)[name = string("x_215")]; int32 var_7396 = const()[name = string("op_7396"), val = int32(-1)]; fp16 const_186_promoted_to_fp16 = const()[name = string("const_186_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7402_cast_fp16 = mul(x = x_215, y = const_186_promoted_to_fp16)[name = string("op_7402_cast_fp16")]; bool input_267_interleave_0 = const()[name = string("input_267_interleave_0"), val = bool(false)]; tensor input_267_cast_fp16 = concat(axis = var_7396, interleave = input_267_interleave_0, values = (x_215, var_7402_cast_fp16))[name = string("input_267_cast_fp16")]; tensor normed_375_axes_0 = const()[name = string("normed_375_axes_0"), val = tensor([-1])]; fp16 var_7394_to_fp16 = const()[name = string("op_7394_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_375_cast_fp16 = layer_norm(axes = normed_375_axes_0, epsilon = var_7394_to_fp16, x = input_267_cast_fp16)[name = string("normed_375_cast_fp16")]; tensor var_7407_split_sizes_0 = const()[name = string("op_7407_split_sizes_0"), val = tensor([256, 256])]; int32 var_7407_axis_0 = const()[name = string("op_7407_axis_0"), val = int32(-1)]; tensor var_7407_cast_fp16_0, tensor var_7407_cast_fp16_1 = split(axis = var_7407_axis_0, split_sizes = var_7407_split_sizes_0, x = normed_375_cast_fp16)[name = string("op_7407_cast_fp16")]; tensor var_7411_to_fp16 = const()[name = string("op_7411_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538014400)))]; tensor out_161_cast_fp16 = mul(x = var_7407_cast_fp16_0, y = var_7411_to_fp16)[name = string("out_161_cast_fp16")]; tensor var_7418 = const()[name = string("op_7418"), val = tensor([1, 1, 1, 256])]; tensor k_81 = reshape(shape = var_7418, x = out_161_cast_fp16)[name = string("k_81")]; string var_7430_pad_type_0 = const()[name = string("op_7430_pad_type_0"), val = string("valid")]; tensor var_7430_strides_0 = const()[name = string("op_7430_strides_0"), val = tensor([1, 1])]; tensor var_7430_pad_0 = const()[name = string("op_7430_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7430_dilations_0 = const()[name = string("op_7430_dilations_0"), val = tensor([1, 1])]; int32 var_7430_groups_0 = const()[name = string("op_7430_groups_0"), val = int32(1)]; tensor var_7430 = conv(dilations = var_7430_dilations_0, groups = var_7430_groups_0, pad = var_7430_pad_0, pad_type = var_7430_pad_type_0, strides = var_7430_strides_0, weight = layers_13_self_attn_v_proj_weight, x = input_263)[name = string("op_7430")]; tensor var_7435 = const()[name = string("op_7435"), val = tensor([1, 1, 256, 1])]; tensor var_7436 = reshape(shape = var_7435, x = var_7430)[name = string("op_7436")]; tensor var_7441 = const()[name = string("op_7441"), val = tensor([0, 1, 3, 2])]; tensor var_7443 = mul(x = q_81, y = cos_1)[name = string("op_7443")]; tensor var_7444_split_sizes_0 = const()[name = string("op_7444_split_sizes_0"), val = tensor([128, 128])]; int32 var_7444_axis_0 = const()[name = string("op_7444_axis_0"), val = int32(-1)]; tensor var_7444_0, tensor var_7444_1 = split(axis = var_7444_axis_0, split_sizes = var_7444_split_sizes_0, x = q_81)[name = string("op_7444")]; fp16 const_188_promoted = const()[name = string("const_188_promoted"), val = fp16(-0x1p+0)]; tensor var_7446 = mul(x = var_7444_1, y = const_188_promoted)[name = string("op_7446")]; int32 var_7448 = const()[name = string("op_7448"), val = int32(-1)]; bool var_7449_interleave_0 = const()[name = string("op_7449_interleave_0"), val = bool(false)]; tensor var_7449 = concat(axis = var_7448, interleave = var_7449_interleave_0, values = (var_7446, var_7444_0))[name = string("op_7449")]; tensor var_7450 = mul(x = var_7449, y = sin_1)[name = string("op_7450")]; tensor q_83 = add(x = var_7443, y = var_7450)[name = string("q_83")]; tensor var_7453 = mul(x = k_81, y = cos_1)[name = string("op_7453")]; tensor var_7454_split_sizes_0 = const()[name = string("op_7454_split_sizes_0"), val = tensor([128, 128])]; int32 var_7454_axis_0 = const()[name = string("op_7454_axis_0"), val = int32(-1)]; tensor var_7454_0, tensor var_7454_1 = split(axis = var_7454_axis_0, split_sizes = var_7454_split_sizes_0, x = k_81)[name = string("op_7454")]; fp16 const_189_promoted = const()[name = string("const_189_promoted"), val = fp16(-0x1p+0)]; tensor var_7456 = mul(x = var_7454_1, y = const_189_promoted)[name = string("op_7456")]; int32 var_7458 = const()[name = string("op_7458"), val = int32(-1)]; bool var_7459_interleave_0 = const()[name = string("op_7459_interleave_0"), val = bool(false)]; tensor var_7459 = concat(axis = var_7458, interleave = var_7459_interleave_0, values = (var_7456, var_7454_0))[name = string("op_7459")]; tensor var_7460 = mul(x = var_7459, y = sin_1)[name = string("op_7460")]; tensor k_83 = add(x = var_7453, y = var_7460)[name = string("k_83")]; tensor var_7465_begin_0 = const()[name = string("op_7465_begin_0"), val = tensor([13, 0, 0, 0])]; tensor var_7465_end_0 = const()[name = string("op_7465_end_0"), val = tensor([14, 1, 2048, 256])]; tensor var_7465_end_mask_0 = const()[name = string("op_7465_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7465_squeeze_mask_0 = const()[name = string("op_7465_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_7465_cast_fp16 = slice_by_index(begin = var_7465_begin_0, end = var_7465_end_0, end_mask = var_7465_end_mask_0, squeeze_mask = var_7465_squeeze_mask_0, x = coreml_update_state_61)[name = string("op_7465_cast_fp16")]; tensor K_cache_27_axes_0 = const()[name = string("K_cache_27_axes_0"), val = tensor([0])]; tensor K_cache_27_cast_fp16 = expand_dims(axes = K_cache_27_axes_0, x = var_7465_cast_fp16)[name = string("K_cache_27_cast_fp16")]; tensor var_7470_begin_0 = const()[name = string("op_7470_begin_0"), val = tensor([31, 0, 0, 0])]; tensor var_7470_end_0 = const()[name = string("op_7470_end_0"), val = tensor([32, 1, 2048, 256])]; tensor var_7470_end_mask_0 = const()[name = string("op_7470_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7470_squeeze_mask_0 = const()[name = string("op_7470_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_7470_cast_fp16 = slice_by_index(begin = var_7470_begin_0, end = var_7470_end_0, end_mask = var_7470_end_mask_0, squeeze_mask = var_7470_squeeze_mask_0, x = coreml_update_state_61)[name = string("op_7470_cast_fp16")]; tensor V_cache_27_axes_0 = const()[name = string("V_cache_27_axes_0"), val = tensor([0])]; tensor V_cache_27_cast_fp16 = expand_dims(axes = V_cache_27_axes_0, x = var_7470_cast_fp16)[name = string("V_cache_27_cast_fp16")]; tensor k_broadcast_27_reps_0 = const()[name = string("k_broadcast_27_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor k_broadcast_27 = tile(reps = k_broadcast_27_reps_0, x = k_83)[name = string("k_broadcast_27")]; tensor v_broadcast_27_reps_0 = const()[name = string("v_broadcast_27_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor v_27 = transpose(perm = var_7441, x = var_7436)[name = string("transpose_43")]; tensor v_broadcast_27 = tile(reps = v_broadcast_27_reps_0, x = v_27)[name = string("v_broadcast_27")]; tensor var_7478_cast_fp16 = mul(x = K_cache_27_cast_fp16, y = var_1120_cast_fp16)[name = string("op_7478_cast_fp16")]; tensor var_7479_cast_fp16 = mul(x = k_broadcast_27, y = update_mask)[name = string("op_7479_cast_fp16")]; tensor K_new_27_cast_fp16 = add(x = var_7478_cast_fp16, y = var_7479_cast_fp16)[name = string("K_new_27_cast_fp16")]; tensor var_7485_cast_fp16 = mul(x = V_cache_27_cast_fp16, y = var_1120_cast_fp16)[name = string("op_7485_cast_fp16")]; tensor var_7486_cast_fp16 = mul(x = v_broadcast_27, y = update_mask)[name = string("op_7486_cast_fp16")]; tensor V_new_27_cast_fp16 = add(x = var_7485_cast_fp16, y = var_7486_cast_fp16)[name = string("V_new_27_cast_fp16")]; tensor var_7490_axes_0 = const()[name = string("op_7490_axes_0"), val = tensor([0])]; tensor var_7490_cast_fp16 = squeeze(axes = var_7490_axes_0, x = K_new_27_cast_fp16)[name = string("op_7490_cast_fp16")]; tensor concat_52 = const()[name = string("concat_52"), val = tensor([13, 0, 0, 0])]; tensor concat_53 = const()[name = string("concat_53"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_27_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_27_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_27_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_52, begin_mask = kv_cache_0_internal_tensor_assign_27_begin_mask_0, end = concat_53, end_mask = kv_cache_0_internal_tensor_assign_27_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_27_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_27_stride_0, update = var_7490_cast_fp16, x = coreml_update_state_61)[name = string("kv_cache_0_internal_tensor_assign_27_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_27_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_62_write_state")]; tensor coreml_update_state_62 = read_state(input = kv_cache_0)[name = string("coreml_update_state_62")]; tensor var_7497_axes_0 = const()[name = string("op_7497_axes_0"), val = tensor([0])]; tensor var_7497_cast_fp16 = squeeze(axes = var_7497_axes_0, x = V_new_27_cast_fp16)[name = string("op_7497_cast_fp16")]; tensor concat_54 = const()[name = string("concat_54"), val = tensor([31, 0, 0, 0])]; tensor concat_55 = const()[name = string("concat_55"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_28_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_28_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_28_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_54, begin_mask = kv_cache_0_internal_tensor_assign_28_begin_mask_0, end = concat_55, end_mask = kv_cache_0_internal_tensor_assign_28_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_28_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_28_stride_0, update = var_7497_cast_fp16, x = coreml_update_state_62)[name = string("kv_cache_0_internal_tensor_assign_28_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_28_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_63_write_state")]; tensor coreml_update_state_63 = read_state(input = kv_cache_0)[name = string("coreml_update_state_63")]; tensor hidden_states_107_axes_0 = const()[name = string("hidden_states_107_axes_0"), val = tensor([2])]; tensor hidden_states_107_cast_fp16 = expand_dims(axes = hidden_states_107_axes_0, x = K_new_27_cast_fp16)[name = string("hidden_states_107_cast_fp16")]; tensor var_7510 = const()[name = string("op_7510"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_109_cast_fp16 = tile(reps = var_7510, x = hidden_states_107_cast_fp16)[name = string("hidden_states_109_cast_fp16")]; tensor var_7516 = const()[name = string("op_7516"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_27_cast_fp16 = reshape(shape = var_7516, x = hidden_states_109_cast_fp16)[name = string("K_expanded_27_cast_fp16")]; tensor hidden_states_111_axes_0 = const()[name = string("hidden_states_111_axes_0"), val = tensor([2])]; tensor hidden_states_111_cast_fp16 = expand_dims(axes = hidden_states_111_axes_0, x = V_new_27_cast_fp16)[name = string("hidden_states_111_cast_fp16")]; tensor var_7525 = const()[name = string("op_7525"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_113_cast_fp16 = tile(reps = var_7525, x = hidden_states_111_cast_fp16)[name = string("hidden_states_113_cast_fp16")]; tensor var_7531 = const()[name = string("op_7531"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_27_cast_fp16 = reshape(shape = var_7531, x = hidden_states_113_cast_fp16)[name = string("V_expanded_27_cast_fp16")]; bool var_7546_transpose_x_1 = const()[name = string("op_7546_transpose_x_1"), val = bool(false)]; bool var_7546_transpose_y_1 = const()[name = string("op_7546_transpose_y_1"), val = bool(true)]; tensor var_7546_cast_fp16 = matmul(transpose_x = var_7546_transpose_x_1, transpose_y = var_7546_transpose_y_1, x = q_83, y = K_expanded_27_cast_fp16)[name = string("op_7546_cast_fp16")]; fp16 var_7547_to_fp16 = const()[name = string("op_7547_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_79_cast_fp16 = mul(x = var_7546_cast_fp16, y = var_7547_to_fp16)[name = string("attn_weights_79_cast_fp16")]; tensor attn_weights_81_cast_fp16 = add(x = attn_weights_79_cast_fp16, y = causal_mask)[name = string("attn_weights_81_cast_fp16")]; int32 var_7556 = const()[name = string("op_7556"), val = int32(-1)]; tensor var_7558_cast_fp16 = softmax(axis = var_7556, x = attn_weights_81_cast_fp16)[name = string("op_7558_cast_fp16")]; bool var_7574_transpose_x_0 = const()[name = string("op_7574_transpose_x_0"), val = bool(false)]; bool var_7574_transpose_y_0 = const()[name = string("op_7574_transpose_y_0"), val = bool(false)]; tensor var_7574_cast_fp16 = matmul(transpose_x = var_7574_transpose_x_0, transpose_y = var_7574_transpose_y_0, x = var_7558_cast_fp16, y = V_expanded_27_cast_fp16)[name = string("op_7574_cast_fp16")]; tensor var_7584 = const()[name = string("op_7584"), val = tensor([0, 2, 1, 3])]; tensor var_7591 = const()[name = string("op_7591"), val = tensor([1, 1, -1])]; tensor var_7585 = transpose(perm = var_7584, x = var_7574_cast_fp16)[name = string("transpose_42")]; tensor attn_output_81 = reshape(shape = var_7591, x = var_7585)[name = string("attn_output_81")]; tensor var_7596 = const()[name = string("op_7596"), val = tensor([0, 2, 1])]; tensor squeeze_13 = const()[name = string("squeeze_13"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538014976)))]; string var_7612_pad_type_0 = const()[name = string("op_7612_pad_type_0"), val = string("valid")]; int32 var_7612_groups_0 = const()[name = string("op_7612_groups_0"), val = int32(1)]; tensor var_7612_strides_0 = const()[name = string("op_7612_strides_0"), val = tensor([1])]; tensor var_7612_pad_0 = const()[name = string("op_7612_pad_0"), val = tensor([0, 0])]; tensor var_7612_dilations_0 = const()[name = string("op_7612_dilations_0"), val = tensor([1])]; tensor var_7597 = transpose(perm = var_7596, x = attn_output_81)[name = string("transpose_41")]; tensor var_7612 = conv(dilations = var_7612_dilations_0, groups = var_7612_groups_0, pad = var_7612_pad_0, pad_type = var_7612_pad_type_0, strides = var_7612_strides_0, weight = squeeze_13, x = var_7597)[name = string("op_7612")]; tensor var_7616 = const()[name = string("op_7616"), val = tensor([0, 2, 1])]; int32 var_7623 = const()[name = string("op_7623"), val = int32(-1)]; fp16 const_190_promoted_to_fp16 = const()[name = string("const_190_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_217 = transpose(perm = var_7616, x = var_7612)[name = string("transpose_40")]; tensor var_7629_cast_fp16 = mul(x = x_217, y = const_190_promoted_to_fp16)[name = string("op_7629_cast_fp16")]; bool input_271_interleave_0 = const()[name = string("input_271_interleave_0"), val = bool(false)]; tensor input_271_cast_fp16 = concat(axis = var_7623, interleave = input_271_interleave_0, values = (x_217, var_7629_cast_fp16))[name = string("input_271_cast_fp16")]; tensor normed_379_axes_0 = const()[name = string("normed_379_axes_0"), val = tensor([-1])]; fp16 var_7621_to_fp16 = const()[name = string("op_7621_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_379_cast_fp16 = layer_norm(axes = normed_379_axes_0, epsilon = var_7621_to_fp16, x = input_271_cast_fp16)[name = string("normed_379_cast_fp16")]; tensor var_7634_split_sizes_0 = const()[name = string("op_7634_split_sizes_0"), val = tensor([640, 640])]; int32 var_7634_axis_0 = const()[name = string("op_7634_axis_0"), val = int32(-1)]; tensor var_7634_cast_fp16_0, tensor var_7634_cast_fp16_1 = split(axis = var_7634_axis_0, split_sizes = var_7634_split_sizes_0, x = normed_379_cast_fp16)[name = string("op_7634_cast_fp16")]; tensor var_7638_to_fp16 = const()[name = string("op_7638_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539325760)))]; tensor out_163_cast_fp16 = mul(x = var_7634_cast_fp16_0, y = var_7638_to_fp16)[name = string("out_163_cast_fp16")]; tensor x_219_cast_fp16 = add(x = x_209_cast_fp16, y = out_163_cast_fp16)[name = string("x_219_cast_fp16")]; int32 var_7652 = const()[name = string("op_7652"), val = int32(-1)]; fp16 const_192_promoted_to_fp16 = const()[name = string("const_192_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7658_cast_fp16 = mul(x = x_219_cast_fp16, y = const_192_promoted_to_fp16)[name = string("op_7658_cast_fp16")]; bool input_273_interleave_0 = const()[name = string("input_273_interleave_0"), val = bool(false)]; tensor input_273_cast_fp16 = concat(axis = var_7652, interleave = input_273_interleave_0, values = (x_219_cast_fp16, var_7658_cast_fp16))[name = string("input_273_cast_fp16")]; tensor normed_383_axes_0 = const()[name = string("normed_383_axes_0"), val = tensor([-1])]; fp16 var_7650_to_fp16 = const()[name = string("op_7650_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_383_cast_fp16 = layer_norm(axes = normed_383_axes_0, epsilon = var_7650_to_fp16, x = input_273_cast_fp16)[name = string("normed_383_cast_fp16")]; tensor var_7663_split_sizes_0 = const()[name = string("op_7663_split_sizes_0"), val = tensor([640, 640])]; int32 var_7663_axis_0 = const()[name = string("op_7663_axis_0"), val = int32(-1)]; tensor var_7663_cast_fp16_0, tensor var_7663_cast_fp16_1 = split(axis = var_7663_axis_0, split_sizes = var_7663_split_sizes_0, x = normed_383_cast_fp16)[name = string("op_7663_cast_fp16")]; tensor var_7667_to_fp16 = const()[name = string("op_7667_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539327104)))]; tensor out_165_cast_fp16 = mul(x = var_7663_cast_fp16_0, y = var_7667_to_fp16)[name = string("out_165_cast_fp16")]; tensor var_7681 = const()[name = string("op_7681"), val = tensor([0, 2, 1])]; tensor input_275_axes_0 = const()[name = string("input_275_axes_0"), val = tensor([2])]; tensor var_7682 = transpose(perm = var_7681, x = out_165_cast_fp16)[name = string("transpose_39")]; tensor input_275 = expand_dims(axes = input_275_axes_0, x = var_7682)[name = string("input_275")]; string gate_53_pad_type_0 = const()[name = string("gate_53_pad_type_0"), val = string("valid")]; tensor gate_53_strides_0 = const()[name = string("gate_53_strides_0"), val = tensor([1, 1])]; tensor gate_53_pad_0 = const()[name = string("gate_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_53_dilations_0 = const()[name = string("gate_53_dilations_0"), val = tensor([1, 1])]; int32 gate_53_groups_0 = const()[name = string("gate_53_groups_0"), val = int32(1)]; tensor gate_53 = conv(dilations = gate_53_dilations_0, groups = gate_53_groups_0, pad = gate_53_pad_0, pad_type = gate_53_pad_type_0, strides = gate_53_strides_0, weight = layers_13_mlp_gate_proj_weight, x = input_275)[name = string("gate_53")]; string up_27_pad_type_0 = const()[name = string("up_27_pad_type_0"), val = string("valid")]; tensor up_27_strides_0 = const()[name = string("up_27_strides_0"), val = tensor([1, 1])]; tensor up_27_pad_0 = const()[name = string("up_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_27_dilations_0 = const()[name = string("up_27_dilations_0"), val = tensor([1, 1])]; int32 up_27_groups_0 = const()[name = string("up_27_groups_0"), val = int32(1)]; tensor up_27 = conv(dilations = up_27_dilations_0, groups = up_27_groups_0, pad = up_27_pad_0, pad_type = up_27_pad_type_0, strides = up_27_strides_0, weight = layers_13_mlp_up_proj_weight, x = input_275)[name = string("up_27")]; string gate_55_mode_0 = const()[name = string("gate_55_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_55 = gelu(mode = gate_55_mode_0, x = gate_53)[name = string("gate_55")]; tensor input_277 = mul(x = gate_55, y = up_27)[name = string("input_277")]; string mlp_out_53_pad_type_0 = const()[name = string("mlp_out_53_pad_type_0"), val = string("valid")]; tensor mlp_out_53_strides_0 = const()[name = string("mlp_out_53_strides_0"), val = tensor([1, 1])]; tensor mlp_out_53_pad_0 = const()[name = string("mlp_out_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_53_dilations_0 = const()[name = string("mlp_out_53_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_53_groups_0 = const()[name = string("mlp_out_53_groups_0"), val = int32(1)]; tensor mlp_out_53 = conv(dilations = mlp_out_53_dilations_0, groups = mlp_out_53_groups_0, pad = mlp_out_53_pad_0, pad_type = mlp_out_53_pad_type_0, strides = mlp_out_53_strides_0, weight = layers_13_mlp_down_proj_weight, x = input_277)[name = string("mlp_out_53")]; tensor var_7722_axes_0 = const()[name = string("op_7722_axes_0"), val = tensor([2])]; tensor var_7722 = squeeze(axes = var_7722_axes_0, x = mlp_out_53)[name = string("op_7722")]; tensor var_7726 = const()[name = string("op_7726"), val = tensor([0, 2, 1])]; int32 var_7733 = const()[name = string("op_7733"), val = int32(-1)]; fp16 const_194_promoted_to_fp16 = const()[name = string("const_194_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_223 = transpose(perm = var_7726, x = var_7722)[name = string("transpose_38")]; tensor var_7739_cast_fp16 = mul(x = x_223, y = const_194_promoted_to_fp16)[name = string("op_7739_cast_fp16")]; bool input_279_interleave_0 = const()[name = string("input_279_interleave_0"), val = bool(false)]; tensor input_279_cast_fp16 = concat(axis = var_7733, interleave = input_279_interleave_0, values = (x_223, var_7739_cast_fp16))[name = string("input_279_cast_fp16")]; tensor normed_389_axes_0 = const()[name = string("normed_389_axes_0"), val = tensor([-1])]; fp16 var_7731_to_fp16 = const()[name = string("op_7731_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_389_cast_fp16 = layer_norm(axes = normed_389_axes_0, epsilon = var_7731_to_fp16, x = input_279_cast_fp16)[name = string("normed_389_cast_fp16")]; tensor var_7744_split_sizes_0 = const()[name = string("op_7744_split_sizes_0"), val = tensor([640, 640])]; int32 var_7744_axis_0 = const()[name = string("op_7744_axis_0"), val = int32(-1)]; tensor var_7744_cast_fp16_0, tensor var_7744_cast_fp16_1 = split(axis = var_7744_axis_0, split_sizes = var_7744_split_sizes_0, x = normed_389_cast_fp16)[name = string("op_7744_cast_fp16")]; tensor var_7748_to_fp16 = const()[name = string("op_7748_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539328448)))]; tensor out_167_cast_fp16 = mul(x = var_7744_cast_fp16_0, y = var_7748_to_fp16)[name = string("out_167_cast_fp16")]; tensor x_225_cast_fp16 = add(x = x_219_cast_fp16, y = out_167_cast_fp16)[name = string("x_225_cast_fp16")]; int32 var_7762 = const()[name = string("op_7762"), val = int32(-1)]; fp16 const_196_promoted_to_fp16 = const()[name = string("const_196_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7768_cast_fp16 = mul(x = x_225_cast_fp16, y = const_196_promoted_to_fp16)[name = string("op_7768_cast_fp16")]; bool input_281_interleave_0 = const()[name = string("input_281_interleave_0"), val = bool(false)]; tensor input_281_cast_fp16 = concat(axis = var_7762, interleave = input_281_interleave_0, values = (x_225_cast_fp16, var_7768_cast_fp16))[name = string("input_281_cast_fp16")]; tensor normed_393_axes_0 = const()[name = string("normed_393_axes_0"), val = tensor([-1])]; fp16 var_7760_to_fp16 = const()[name = string("op_7760_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_393_cast_fp16 = layer_norm(axes = normed_393_axes_0, epsilon = var_7760_to_fp16, x = input_281_cast_fp16)[name = string("normed_393_cast_fp16")]; tensor var_7773_split_sizes_0 = const()[name = string("op_7773_split_sizes_0"), val = tensor([640, 640])]; int32 var_7773_axis_0 = const()[name = string("op_7773_axis_0"), val = int32(-1)]; tensor var_7773_cast_fp16_0, tensor var_7773_cast_fp16_1 = split(axis = var_7773_axis_0, split_sizes = var_7773_split_sizes_0, x = normed_393_cast_fp16)[name = string("op_7773_cast_fp16")]; tensor var_7777_to_fp16 = const()[name = string("op_7777_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539329792)))]; tensor out_169_cast_fp16 = mul(x = var_7773_cast_fp16_0, y = var_7777_to_fp16)[name = string("out_169_cast_fp16")]; tensor var_7791 = const()[name = string("op_7791"), val = tensor([0, 2, 1])]; tensor input_283_axes_0 = const()[name = string("input_283_axes_0"), val = tensor([2])]; tensor var_7792 = transpose(perm = var_7791, x = out_169_cast_fp16)[name = string("transpose_37")]; tensor input_283 = expand_dims(axes = input_283_axes_0, x = var_7792)[name = string("input_283")]; string var_7805_pad_type_0 = const()[name = string("op_7805_pad_type_0"), val = string("valid")]; tensor var_7805_strides_0 = const()[name = string("op_7805_strides_0"), val = tensor([1, 1])]; tensor var_7805_pad_0 = const()[name = string("op_7805_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7805_dilations_0 = const()[name = string("op_7805_dilations_0"), val = tensor([1, 1])]; int32 var_7805_groups_0 = const()[name = string("op_7805_groups_0"), val = int32(1)]; tensor var_7805 = conv(dilations = var_7805_dilations_0, groups = var_7805_groups_0, pad = var_7805_pad_0, pad_type = var_7805_pad_type_0, strides = var_7805_strides_0, weight = layers_14_self_attn_q_proj_weight, x = input_283)[name = string("op_7805")]; tensor var_7810 = const()[name = string("op_7810"), val = tensor([1, 4, 256, 1])]; tensor var_7811 = reshape(shape = var_7810, x = var_7805)[name = string("op_7811")]; tensor var_7816 = const()[name = string("op_7816"), val = tensor([0, 1, 3, 2])]; tensor var_7821 = const()[name = string("op_7821"), val = tensor([1, 4, 256])]; tensor q_85 = transpose(perm = var_7816, x = var_7811)[name = string("transpose_36")]; tensor x_229 = reshape(shape = var_7821, x = q_85)[name = string("x_229")]; int32 var_7828 = const()[name = string("op_7828"), val = int32(-1)]; fp16 const_198_promoted_to_fp16 = const()[name = string("const_198_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7834_cast_fp16 = mul(x = x_229, y = const_198_promoted_to_fp16)[name = string("op_7834_cast_fp16")]; bool input_285_interleave_0 = const()[name = string("input_285_interleave_0"), val = bool(false)]; tensor input_285_cast_fp16 = concat(axis = var_7828, interleave = input_285_interleave_0, values = (x_229, var_7834_cast_fp16))[name = string("input_285_cast_fp16")]; tensor normed_399_axes_0 = const()[name = string("normed_399_axes_0"), val = tensor([-1])]; fp16 var_7826_to_fp16 = const()[name = string("op_7826_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_399_cast_fp16 = layer_norm(axes = normed_399_axes_0, epsilon = var_7826_to_fp16, x = input_285_cast_fp16)[name = string("normed_399_cast_fp16")]; tensor var_7839_split_sizes_0 = const()[name = string("op_7839_split_sizes_0"), val = tensor([256, 256])]; int32 var_7839_axis_0 = const()[name = string("op_7839_axis_0"), val = int32(-1)]; tensor var_7839_cast_fp16_0, tensor var_7839_cast_fp16_1 = split(axis = var_7839_axis_0, split_sizes = var_7839_split_sizes_0, x = normed_399_cast_fp16)[name = string("op_7839_cast_fp16")]; tensor var_7843_to_fp16 = const()[name = string("op_7843_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539331136)))]; tensor out_171_cast_fp16 = mul(x = var_7839_cast_fp16_0, y = var_7843_to_fp16)[name = string("out_171_cast_fp16")]; tensor var_7850 = const()[name = string("op_7850"), val = tensor([1, 4, 1, 256])]; tensor q_87 = reshape(shape = var_7850, x = out_171_cast_fp16)[name = string("q_87")]; string var_7862_pad_type_0 = const()[name = string("op_7862_pad_type_0"), val = string("valid")]; tensor var_7862_strides_0 = const()[name = string("op_7862_strides_0"), val = tensor([1, 1])]; tensor var_7862_pad_0 = const()[name = string("op_7862_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7862_dilations_0 = const()[name = string("op_7862_dilations_0"), val = tensor([1, 1])]; int32 var_7862_groups_0 = const()[name = string("op_7862_groups_0"), val = int32(1)]; tensor var_7862 = conv(dilations = var_7862_dilations_0, groups = var_7862_groups_0, pad = var_7862_pad_0, pad_type = var_7862_pad_type_0, strides = var_7862_strides_0, weight = layers_14_self_attn_k_proj_weight, x = input_283)[name = string("op_7862")]; tensor var_7867 = const()[name = string("op_7867"), val = tensor([1, 1, 256, 1])]; tensor var_7868 = reshape(shape = var_7867, x = var_7862)[name = string("op_7868")]; tensor var_7873 = const()[name = string("op_7873"), val = tensor([0, 1, 3, 2])]; tensor var_7878 = const()[name = string("op_7878"), val = tensor([1, 1, 256])]; tensor k_85 = transpose(perm = var_7873, x = var_7868)[name = string("transpose_35")]; tensor x_231 = reshape(shape = var_7878, x = k_85)[name = string("x_231")]; int32 var_7885 = const()[name = string("op_7885"), val = int32(-1)]; fp16 const_200_promoted_to_fp16 = const()[name = string("const_200_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7891_cast_fp16 = mul(x = x_231, y = const_200_promoted_to_fp16)[name = string("op_7891_cast_fp16")]; bool input_287_interleave_0 = const()[name = string("input_287_interleave_0"), val = bool(false)]; tensor input_287_cast_fp16 = concat(axis = var_7885, interleave = input_287_interleave_0, values = (x_231, var_7891_cast_fp16))[name = string("input_287_cast_fp16")]; tensor normed_403_axes_0 = const()[name = string("normed_403_axes_0"), val = tensor([-1])]; fp16 var_7883_to_fp16 = const()[name = string("op_7883_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_403_cast_fp16 = layer_norm(axes = normed_403_axes_0, epsilon = var_7883_to_fp16, x = input_287_cast_fp16)[name = string("normed_403_cast_fp16")]; tensor var_7896_split_sizes_0 = const()[name = string("op_7896_split_sizes_0"), val = tensor([256, 256])]; int32 var_7896_axis_0 = const()[name = string("op_7896_axis_0"), val = int32(-1)]; tensor var_7896_cast_fp16_0, tensor var_7896_cast_fp16_1 = split(axis = var_7896_axis_0, split_sizes = var_7896_split_sizes_0, x = normed_403_cast_fp16)[name = string("op_7896_cast_fp16")]; tensor var_7900_to_fp16 = const()[name = string("op_7900_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539331712)))]; tensor out_173_cast_fp16 = mul(x = var_7896_cast_fp16_0, y = var_7900_to_fp16)[name = string("out_173_cast_fp16")]; tensor var_7907 = const()[name = string("op_7907"), val = tensor([1, 1, 1, 256])]; tensor k_87 = reshape(shape = var_7907, x = out_173_cast_fp16)[name = string("k_87")]; string var_7919_pad_type_0 = const()[name = string("op_7919_pad_type_0"), val = string("valid")]; tensor var_7919_strides_0 = const()[name = string("op_7919_strides_0"), val = tensor([1, 1])]; tensor var_7919_pad_0 = const()[name = string("op_7919_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7919_dilations_0 = const()[name = string("op_7919_dilations_0"), val = tensor([1, 1])]; int32 var_7919_groups_0 = const()[name = string("op_7919_groups_0"), val = int32(1)]; tensor var_7919 = conv(dilations = var_7919_dilations_0, groups = var_7919_groups_0, pad = var_7919_pad_0, pad_type = var_7919_pad_type_0, strides = var_7919_strides_0, weight = layers_14_self_attn_v_proj_weight, x = input_283)[name = string("op_7919")]; tensor var_7924 = const()[name = string("op_7924"), val = tensor([1, 1, 256, 1])]; tensor var_7925 = reshape(shape = var_7924, x = var_7919)[name = string("op_7925")]; tensor var_7930 = const()[name = string("op_7930"), val = tensor([0, 1, 3, 2])]; tensor var_7932 = mul(x = q_87, y = cos_1)[name = string("op_7932")]; tensor var_7933_split_sizes_0 = const()[name = string("op_7933_split_sizes_0"), val = tensor([128, 128])]; int32 var_7933_axis_0 = const()[name = string("op_7933_axis_0"), val = int32(-1)]; tensor var_7933_0, tensor var_7933_1 = split(axis = var_7933_axis_0, split_sizes = var_7933_split_sizes_0, x = q_87)[name = string("op_7933")]; fp16 const_202_promoted = const()[name = string("const_202_promoted"), val = fp16(-0x1p+0)]; tensor var_7935 = mul(x = var_7933_1, y = const_202_promoted)[name = string("op_7935")]; int32 var_7937 = const()[name = string("op_7937"), val = int32(-1)]; bool var_7938_interleave_0 = const()[name = string("op_7938_interleave_0"), val = bool(false)]; tensor var_7938 = concat(axis = var_7937, interleave = var_7938_interleave_0, values = (var_7935, var_7933_0))[name = string("op_7938")]; tensor var_7939 = mul(x = var_7938, y = sin_1)[name = string("op_7939")]; tensor q_89 = add(x = var_7932, y = var_7939)[name = string("q_89")]; tensor var_7942 = mul(x = k_87, y = cos_1)[name = string("op_7942")]; tensor var_7943_split_sizes_0 = const()[name = string("op_7943_split_sizes_0"), val = tensor([128, 128])]; int32 var_7943_axis_0 = const()[name = string("op_7943_axis_0"), val = int32(-1)]; tensor var_7943_0, tensor var_7943_1 = split(axis = var_7943_axis_0, split_sizes = var_7943_split_sizes_0, x = k_87)[name = string("op_7943")]; fp16 const_203_promoted = const()[name = string("const_203_promoted"), val = fp16(-0x1p+0)]; tensor var_7945 = mul(x = var_7943_1, y = const_203_promoted)[name = string("op_7945")]; int32 var_7947 = const()[name = string("op_7947"), val = int32(-1)]; bool var_7948_interleave_0 = const()[name = string("op_7948_interleave_0"), val = bool(false)]; tensor var_7948 = concat(axis = var_7947, interleave = var_7948_interleave_0, values = (var_7945, var_7943_0))[name = string("op_7948")]; tensor var_7949 = mul(x = var_7948, y = sin_1)[name = string("op_7949")]; tensor k_89 = add(x = var_7942, y = var_7949)[name = string("k_89")]; tensor var_7954_begin_0 = const()[name = string("op_7954_begin_0"), val = tensor([14, 0, 0, 0])]; tensor var_7954_end_0 = const()[name = string("op_7954_end_0"), val = tensor([15, 1, 2048, 256])]; tensor var_7954_end_mask_0 = const()[name = string("op_7954_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7954_squeeze_mask_0 = const()[name = string("op_7954_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_7954_cast_fp16 = slice_by_index(begin = var_7954_begin_0, end = var_7954_end_0, end_mask = var_7954_end_mask_0, squeeze_mask = var_7954_squeeze_mask_0, x = coreml_update_state_63)[name = string("op_7954_cast_fp16")]; tensor K_cache_29_axes_0 = const()[name = string("K_cache_29_axes_0"), val = tensor([0])]; tensor K_cache_29_cast_fp16 = expand_dims(axes = K_cache_29_axes_0, x = var_7954_cast_fp16)[name = string("K_cache_29_cast_fp16")]; tensor var_7959_begin_0 = const()[name = string("op_7959_begin_0"), val = tensor([32, 0, 0, 0])]; tensor var_7959_end_0 = const()[name = string("op_7959_end_0"), val = tensor([33, 1, 2048, 256])]; tensor var_7959_end_mask_0 = const()[name = string("op_7959_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7959_squeeze_mask_0 = const()[name = string("op_7959_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_7959_cast_fp16 = slice_by_index(begin = var_7959_begin_0, end = var_7959_end_0, end_mask = var_7959_end_mask_0, squeeze_mask = var_7959_squeeze_mask_0, x = coreml_update_state_63)[name = string("op_7959_cast_fp16")]; tensor V_cache_29_axes_0 = const()[name = string("V_cache_29_axes_0"), val = tensor([0])]; tensor V_cache_29_cast_fp16 = expand_dims(axes = V_cache_29_axes_0, x = var_7959_cast_fp16)[name = string("V_cache_29_cast_fp16")]; tensor k_broadcast_29_reps_0 = const()[name = string("k_broadcast_29_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor k_broadcast_29 = tile(reps = k_broadcast_29_reps_0, x = k_89)[name = string("k_broadcast_29")]; tensor v_broadcast_29_reps_0 = const()[name = string("v_broadcast_29_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor v_29 = transpose(perm = var_7930, x = var_7925)[name = string("transpose_34")]; tensor v_broadcast_29 = tile(reps = v_broadcast_29_reps_0, x = v_29)[name = string("v_broadcast_29")]; tensor var_7967_cast_fp16 = mul(x = K_cache_29_cast_fp16, y = var_1120_cast_fp16)[name = string("op_7967_cast_fp16")]; tensor var_7968_cast_fp16 = mul(x = k_broadcast_29, y = update_mask)[name = string("op_7968_cast_fp16")]; tensor K_new_29_cast_fp16 = add(x = var_7967_cast_fp16, y = var_7968_cast_fp16)[name = string("K_new_29_cast_fp16")]; tensor var_7974_cast_fp16 = mul(x = V_cache_29_cast_fp16, y = var_1120_cast_fp16)[name = string("op_7974_cast_fp16")]; tensor var_7975_cast_fp16 = mul(x = v_broadcast_29, y = update_mask)[name = string("op_7975_cast_fp16")]; tensor V_new_29_cast_fp16 = add(x = var_7974_cast_fp16, y = var_7975_cast_fp16)[name = string("V_new_29_cast_fp16")]; tensor var_7979_axes_0 = const()[name = string("op_7979_axes_0"), val = tensor([0])]; tensor var_7979_cast_fp16 = squeeze(axes = var_7979_axes_0, x = K_new_29_cast_fp16)[name = string("op_7979_cast_fp16")]; tensor concat_56 = const()[name = string("concat_56"), val = tensor([14, 0, 0, 0])]; tensor concat_57 = const()[name = string("concat_57"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_29_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_29_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_29_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_56, begin_mask = kv_cache_0_internal_tensor_assign_29_begin_mask_0, end = concat_57, end_mask = kv_cache_0_internal_tensor_assign_29_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_29_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_29_stride_0, update = var_7979_cast_fp16, x = coreml_update_state_63)[name = string("kv_cache_0_internal_tensor_assign_29_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_29_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_64_write_state")]; tensor coreml_update_state_64 = read_state(input = kv_cache_0)[name = string("coreml_update_state_64")]; tensor var_7986_axes_0 = const()[name = string("op_7986_axes_0"), val = tensor([0])]; tensor var_7986_cast_fp16 = squeeze(axes = var_7986_axes_0, x = V_new_29_cast_fp16)[name = string("op_7986_cast_fp16")]; tensor concat_58 = const()[name = string("concat_58"), val = tensor([32, 0, 0, 0])]; tensor concat_59 = const()[name = string("concat_59"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_30_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_30_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_30_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_58, begin_mask = kv_cache_0_internal_tensor_assign_30_begin_mask_0, end = concat_59, end_mask = kv_cache_0_internal_tensor_assign_30_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_30_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_30_stride_0, update = var_7986_cast_fp16, x = coreml_update_state_64)[name = string("kv_cache_0_internal_tensor_assign_30_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_30_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_65_write_state")]; tensor coreml_update_state_65 = read_state(input = kv_cache_0)[name = string("coreml_update_state_65")]; tensor hidden_states_115_axes_0 = const()[name = string("hidden_states_115_axes_0"), val = tensor([2])]; tensor hidden_states_115_cast_fp16 = expand_dims(axes = hidden_states_115_axes_0, x = K_new_29_cast_fp16)[name = string("hidden_states_115_cast_fp16")]; tensor var_7999 = const()[name = string("op_7999"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_117_cast_fp16 = tile(reps = var_7999, x = hidden_states_115_cast_fp16)[name = string("hidden_states_117_cast_fp16")]; tensor var_8005 = const()[name = string("op_8005"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_29_cast_fp16 = reshape(shape = var_8005, x = hidden_states_117_cast_fp16)[name = string("K_expanded_29_cast_fp16")]; tensor hidden_states_119_axes_0 = const()[name = string("hidden_states_119_axes_0"), val = tensor([2])]; tensor hidden_states_119_cast_fp16 = expand_dims(axes = hidden_states_119_axes_0, x = V_new_29_cast_fp16)[name = string("hidden_states_119_cast_fp16")]; tensor var_8014 = const()[name = string("op_8014"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_121_cast_fp16 = tile(reps = var_8014, x = hidden_states_119_cast_fp16)[name = string("hidden_states_121_cast_fp16")]; tensor var_8020 = const()[name = string("op_8020"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_29_cast_fp16 = reshape(shape = var_8020, x = hidden_states_121_cast_fp16)[name = string("V_expanded_29_cast_fp16")]; bool var_8035_transpose_x_1 = const()[name = string("op_8035_transpose_x_1"), val = bool(false)]; bool var_8035_transpose_y_1 = const()[name = string("op_8035_transpose_y_1"), val = bool(true)]; tensor var_8035_cast_fp16 = matmul(transpose_x = var_8035_transpose_x_1, transpose_y = var_8035_transpose_y_1, x = q_89, y = K_expanded_29_cast_fp16)[name = string("op_8035_cast_fp16")]; fp16 var_8036_to_fp16 = const()[name = string("op_8036_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_85_cast_fp16 = mul(x = var_8035_cast_fp16, y = var_8036_to_fp16)[name = string("attn_weights_85_cast_fp16")]; tensor attn_weights_87_cast_fp16 = add(x = attn_weights_85_cast_fp16, y = causal_mask)[name = string("attn_weights_87_cast_fp16")]; int32 var_8045 = const()[name = string("op_8045"), val = int32(-1)]; tensor var_8047_cast_fp16 = softmax(axis = var_8045, x = attn_weights_87_cast_fp16)[name = string("op_8047_cast_fp16")]; bool var_8063_transpose_x_0 = const()[name = string("op_8063_transpose_x_0"), val = bool(false)]; bool var_8063_transpose_y_0 = const()[name = string("op_8063_transpose_y_0"), val = bool(false)]; tensor var_8063_cast_fp16 = matmul(transpose_x = var_8063_transpose_x_0, transpose_y = var_8063_transpose_y_0, x = var_8047_cast_fp16, y = V_expanded_29_cast_fp16)[name = string("op_8063_cast_fp16")]; tensor var_8073 = const()[name = string("op_8073"), val = tensor([0, 2, 1, 3])]; tensor var_8080 = const()[name = string("op_8080"), val = tensor([1, 1, -1])]; tensor var_8074 = transpose(perm = var_8073, x = var_8063_cast_fp16)[name = string("transpose_33")]; tensor attn_output_87 = reshape(shape = var_8080, x = var_8074)[name = string("attn_output_87")]; tensor var_8085 = const()[name = string("op_8085"), val = tensor([0, 2, 1])]; tensor squeeze_14 = const()[name = string("squeeze_14"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539332288)))]; string var_8101_pad_type_0 = const()[name = string("op_8101_pad_type_0"), val = string("valid")]; int32 var_8101_groups_0 = const()[name = string("op_8101_groups_0"), val = int32(1)]; tensor var_8101_strides_0 = const()[name = string("op_8101_strides_0"), val = tensor([1])]; tensor var_8101_pad_0 = const()[name = string("op_8101_pad_0"), val = tensor([0, 0])]; tensor var_8101_dilations_0 = const()[name = string("op_8101_dilations_0"), val = tensor([1])]; tensor var_8086 = transpose(perm = var_8085, x = attn_output_87)[name = string("transpose_32")]; tensor var_8101 = conv(dilations = var_8101_dilations_0, groups = var_8101_groups_0, pad = var_8101_pad_0, pad_type = var_8101_pad_type_0, strides = var_8101_strides_0, weight = squeeze_14, x = var_8086)[name = string("op_8101")]; tensor var_8105 = const()[name = string("op_8105"), val = tensor([0, 2, 1])]; int32 var_8112 = const()[name = string("op_8112"), val = int32(-1)]; fp16 const_204_promoted_to_fp16 = const()[name = string("const_204_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_233 = transpose(perm = var_8105, x = var_8101)[name = string("transpose_31")]; tensor var_8118_cast_fp16 = mul(x = x_233, y = const_204_promoted_to_fp16)[name = string("op_8118_cast_fp16")]; bool input_291_interleave_0 = const()[name = string("input_291_interleave_0"), val = bool(false)]; tensor input_291_cast_fp16 = concat(axis = var_8112, interleave = input_291_interleave_0, values = (x_233, var_8118_cast_fp16))[name = string("input_291_cast_fp16")]; tensor normed_407_axes_0 = const()[name = string("normed_407_axes_0"), val = tensor([-1])]; fp16 var_8110_to_fp16 = const()[name = string("op_8110_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_407_cast_fp16 = layer_norm(axes = normed_407_axes_0, epsilon = var_8110_to_fp16, x = input_291_cast_fp16)[name = string("normed_407_cast_fp16")]; tensor var_8123_split_sizes_0 = const()[name = string("op_8123_split_sizes_0"), val = tensor([640, 640])]; int32 var_8123_axis_0 = const()[name = string("op_8123_axis_0"), val = int32(-1)]; tensor var_8123_cast_fp16_0, tensor var_8123_cast_fp16_1 = split(axis = var_8123_axis_0, split_sizes = var_8123_split_sizes_0, x = normed_407_cast_fp16)[name = string("op_8123_cast_fp16")]; tensor var_8127_to_fp16 = const()[name = string("op_8127_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540643072)))]; tensor out_175_cast_fp16 = mul(x = var_8123_cast_fp16_0, y = var_8127_to_fp16)[name = string("out_175_cast_fp16")]; tensor x_235_cast_fp16 = add(x = x_225_cast_fp16, y = out_175_cast_fp16)[name = string("x_235_cast_fp16")]; int32 var_8141 = const()[name = string("op_8141"), val = int32(-1)]; fp16 const_206_promoted_to_fp16 = const()[name = string("const_206_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8147_cast_fp16 = mul(x = x_235_cast_fp16, y = const_206_promoted_to_fp16)[name = string("op_8147_cast_fp16")]; bool input_293_interleave_0 = const()[name = string("input_293_interleave_0"), val = bool(false)]; tensor input_293_cast_fp16 = concat(axis = var_8141, interleave = input_293_interleave_0, values = (x_235_cast_fp16, var_8147_cast_fp16))[name = string("input_293_cast_fp16")]; tensor normed_411_axes_0 = const()[name = string("normed_411_axes_0"), val = tensor([-1])]; fp16 var_8139_to_fp16 = const()[name = string("op_8139_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_411_cast_fp16 = layer_norm(axes = normed_411_axes_0, epsilon = var_8139_to_fp16, x = input_293_cast_fp16)[name = string("normed_411_cast_fp16")]; tensor var_8152_split_sizes_0 = const()[name = string("op_8152_split_sizes_0"), val = tensor([640, 640])]; int32 var_8152_axis_0 = const()[name = string("op_8152_axis_0"), val = int32(-1)]; tensor var_8152_cast_fp16_0, tensor var_8152_cast_fp16_1 = split(axis = var_8152_axis_0, split_sizes = var_8152_split_sizes_0, x = normed_411_cast_fp16)[name = string("op_8152_cast_fp16")]; tensor var_8156_to_fp16 = const()[name = string("op_8156_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540644416)))]; tensor out_177_cast_fp16 = mul(x = var_8152_cast_fp16_0, y = var_8156_to_fp16)[name = string("out_177_cast_fp16")]; tensor var_8170 = const()[name = string("op_8170"), val = tensor([0, 2, 1])]; tensor input_295_axes_0 = const()[name = string("input_295_axes_0"), val = tensor([2])]; tensor var_8171 = transpose(perm = var_8170, x = out_177_cast_fp16)[name = string("transpose_30")]; tensor input_295 = expand_dims(axes = input_295_axes_0, x = var_8171)[name = string("input_295")]; string gate_57_pad_type_0 = const()[name = string("gate_57_pad_type_0"), val = string("valid")]; tensor gate_57_strides_0 = const()[name = string("gate_57_strides_0"), val = tensor([1, 1])]; tensor gate_57_pad_0 = const()[name = string("gate_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_57_dilations_0 = const()[name = string("gate_57_dilations_0"), val = tensor([1, 1])]; int32 gate_57_groups_0 = const()[name = string("gate_57_groups_0"), val = int32(1)]; tensor gate_57 = conv(dilations = gate_57_dilations_0, groups = gate_57_groups_0, pad = gate_57_pad_0, pad_type = gate_57_pad_type_0, strides = gate_57_strides_0, weight = layers_14_mlp_gate_proj_weight, x = input_295)[name = string("gate_57")]; string up_29_pad_type_0 = const()[name = string("up_29_pad_type_0"), val = string("valid")]; tensor up_29_strides_0 = const()[name = string("up_29_strides_0"), val = tensor([1, 1])]; tensor up_29_pad_0 = const()[name = string("up_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_29_dilations_0 = const()[name = string("up_29_dilations_0"), val = tensor([1, 1])]; int32 up_29_groups_0 = const()[name = string("up_29_groups_0"), val = int32(1)]; tensor up_29 = conv(dilations = up_29_dilations_0, groups = up_29_groups_0, pad = up_29_pad_0, pad_type = up_29_pad_type_0, strides = up_29_strides_0, weight = layers_14_mlp_up_proj_weight, x = input_295)[name = string("up_29")]; string gate_59_mode_0 = const()[name = string("gate_59_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_59 = gelu(mode = gate_59_mode_0, x = gate_57)[name = string("gate_59")]; tensor input_297 = mul(x = gate_59, y = up_29)[name = string("input_297")]; string mlp_out_57_pad_type_0 = const()[name = string("mlp_out_57_pad_type_0"), val = string("valid")]; tensor mlp_out_57_strides_0 = const()[name = string("mlp_out_57_strides_0"), val = tensor([1, 1])]; tensor mlp_out_57_pad_0 = const()[name = string("mlp_out_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_57_dilations_0 = const()[name = string("mlp_out_57_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_57_groups_0 = const()[name = string("mlp_out_57_groups_0"), val = int32(1)]; tensor mlp_out_57 = conv(dilations = mlp_out_57_dilations_0, groups = mlp_out_57_groups_0, pad = mlp_out_57_pad_0, pad_type = mlp_out_57_pad_type_0, strides = mlp_out_57_strides_0, weight = layers_14_mlp_down_proj_weight, x = input_297)[name = string("mlp_out_57")]; tensor var_8211_axes_0 = const()[name = string("op_8211_axes_0"), val = tensor([2])]; tensor var_8211 = squeeze(axes = var_8211_axes_0, x = mlp_out_57)[name = string("op_8211")]; tensor var_8215 = const()[name = string("op_8215"), val = tensor([0, 2, 1])]; int32 var_8222 = const()[name = string("op_8222"), val = int32(-1)]; fp16 const_208_promoted_to_fp16 = const()[name = string("const_208_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_239 = transpose(perm = var_8215, x = var_8211)[name = string("transpose_29")]; tensor var_8228_cast_fp16 = mul(x = x_239, y = const_208_promoted_to_fp16)[name = string("op_8228_cast_fp16")]; bool input_299_interleave_0 = const()[name = string("input_299_interleave_0"), val = bool(false)]; tensor input_299_cast_fp16 = concat(axis = var_8222, interleave = input_299_interleave_0, values = (x_239, var_8228_cast_fp16))[name = string("input_299_cast_fp16")]; tensor normed_417_axes_0 = const()[name = string("normed_417_axes_0"), val = tensor([-1])]; fp16 var_8220_to_fp16 = const()[name = string("op_8220_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_417_cast_fp16 = layer_norm(axes = normed_417_axes_0, epsilon = var_8220_to_fp16, x = input_299_cast_fp16)[name = string("normed_417_cast_fp16")]; tensor var_8233_split_sizes_0 = const()[name = string("op_8233_split_sizes_0"), val = tensor([640, 640])]; int32 var_8233_axis_0 = const()[name = string("op_8233_axis_0"), val = int32(-1)]; tensor var_8233_cast_fp16_0, tensor var_8233_cast_fp16_1 = split(axis = var_8233_axis_0, split_sizes = var_8233_split_sizes_0, x = normed_417_cast_fp16)[name = string("op_8233_cast_fp16")]; tensor var_8237_to_fp16 = const()[name = string("op_8237_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540645760)))]; tensor out_179_cast_fp16 = mul(x = var_8233_cast_fp16_0, y = var_8237_to_fp16)[name = string("out_179_cast_fp16")]; tensor x_241_cast_fp16 = add(x = x_235_cast_fp16, y = out_179_cast_fp16)[name = string("x_241_cast_fp16")]; int32 var_8251 = const()[name = string("op_8251"), val = int32(-1)]; fp16 const_210_promoted_to_fp16 = const()[name = string("const_210_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8257_cast_fp16 = mul(x = x_241_cast_fp16, y = const_210_promoted_to_fp16)[name = string("op_8257_cast_fp16")]; bool input_301_interleave_0 = const()[name = string("input_301_interleave_0"), val = bool(false)]; tensor input_301_cast_fp16 = concat(axis = var_8251, interleave = input_301_interleave_0, values = (x_241_cast_fp16, var_8257_cast_fp16))[name = string("input_301_cast_fp16")]; tensor normed_421_axes_0 = const()[name = string("normed_421_axes_0"), val = tensor([-1])]; fp16 var_8249_to_fp16 = const()[name = string("op_8249_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_421_cast_fp16 = layer_norm(axes = normed_421_axes_0, epsilon = var_8249_to_fp16, x = input_301_cast_fp16)[name = string("normed_421_cast_fp16")]; tensor var_8262_split_sizes_0 = const()[name = string("op_8262_split_sizes_0"), val = tensor([640, 640])]; int32 var_8262_axis_0 = const()[name = string("op_8262_axis_0"), val = int32(-1)]; tensor var_8262_cast_fp16_0, tensor var_8262_cast_fp16_1 = split(axis = var_8262_axis_0, split_sizes = var_8262_split_sizes_0, x = normed_421_cast_fp16)[name = string("op_8262_cast_fp16")]; tensor var_8266_to_fp16 = const()[name = string("op_8266_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540647104)))]; tensor out_181_cast_fp16 = mul(x = var_8262_cast_fp16_0, y = var_8266_to_fp16)[name = string("out_181_cast_fp16")]; tensor var_8280 = const()[name = string("op_8280"), val = tensor([0, 2, 1])]; tensor input_303_axes_0 = const()[name = string("input_303_axes_0"), val = tensor([2])]; tensor var_8281 = transpose(perm = var_8280, x = out_181_cast_fp16)[name = string("transpose_28")]; tensor input_303 = expand_dims(axes = input_303_axes_0, x = var_8281)[name = string("input_303")]; string var_8294_pad_type_0 = const()[name = string("op_8294_pad_type_0"), val = string("valid")]; tensor var_8294_strides_0 = const()[name = string("op_8294_strides_0"), val = tensor([1, 1])]; tensor var_8294_pad_0 = const()[name = string("op_8294_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8294_dilations_0 = const()[name = string("op_8294_dilations_0"), val = tensor([1, 1])]; int32 var_8294_groups_0 = const()[name = string("op_8294_groups_0"), val = int32(1)]; tensor var_8294 = conv(dilations = var_8294_dilations_0, groups = var_8294_groups_0, pad = var_8294_pad_0, pad_type = var_8294_pad_type_0, strides = var_8294_strides_0, weight = layers_15_self_attn_q_proj_weight, x = input_303)[name = string("op_8294")]; tensor var_8299 = const()[name = string("op_8299"), val = tensor([1, 4, 256, 1])]; tensor var_8300 = reshape(shape = var_8299, x = var_8294)[name = string("op_8300")]; tensor var_8305 = const()[name = string("op_8305"), val = tensor([0, 1, 3, 2])]; tensor var_8310 = const()[name = string("op_8310"), val = tensor([1, 4, 256])]; tensor q_91 = transpose(perm = var_8305, x = var_8300)[name = string("transpose_27")]; tensor x_245 = reshape(shape = var_8310, x = q_91)[name = string("x_245")]; int32 var_8317 = const()[name = string("op_8317"), val = int32(-1)]; fp16 const_212_promoted_to_fp16 = const()[name = string("const_212_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8323_cast_fp16 = mul(x = x_245, y = const_212_promoted_to_fp16)[name = string("op_8323_cast_fp16")]; bool input_305_interleave_0 = const()[name = string("input_305_interleave_0"), val = bool(false)]; tensor input_305_cast_fp16 = concat(axis = var_8317, interleave = input_305_interleave_0, values = (x_245, var_8323_cast_fp16))[name = string("input_305_cast_fp16")]; tensor normed_427_axes_0 = const()[name = string("normed_427_axes_0"), val = tensor([-1])]; fp16 var_8315_to_fp16 = const()[name = string("op_8315_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_427_cast_fp16 = layer_norm(axes = normed_427_axes_0, epsilon = var_8315_to_fp16, x = input_305_cast_fp16)[name = string("normed_427_cast_fp16")]; tensor var_8328_split_sizes_0 = const()[name = string("op_8328_split_sizes_0"), val = tensor([256, 256])]; int32 var_8328_axis_0 = const()[name = string("op_8328_axis_0"), val = int32(-1)]; tensor var_8328_cast_fp16_0, tensor var_8328_cast_fp16_1 = split(axis = var_8328_axis_0, split_sizes = var_8328_split_sizes_0, x = normed_427_cast_fp16)[name = string("op_8328_cast_fp16")]; tensor var_8332_to_fp16 = const()[name = string("op_8332_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540648448)))]; tensor out_183_cast_fp16 = mul(x = var_8328_cast_fp16_0, y = var_8332_to_fp16)[name = string("out_183_cast_fp16")]; tensor var_8339 = const()[name = string("op_8339"), val = tensor([1, 4, 1, 256])]; tensor q_93 = reshape(shape = var_8339, x = out_183_cast_fp16)[name = string("q_93")]; string var_8351_pad_type_0 = const()[name = string("op_8351_pad_type_0"), val = string("valid")]; tensor var_8351_strides_0 = const()[name = string("op_8351_strides_0"), val = tensor([1, 1])]; tensor var_8351_pad_0 = const()[name = string("op_8351_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8351_dilations_0 = const()[name = string("op_8351_dilations_0"), val = tensor([1, 1])]; int32 var_8351_groups_0 = const()[name = string("op_8351_groups_0"), val = int32(1)]; tensor var_8351 = conv(dilations = var_8351_dilations_0, groups = var_8351_groups_0, pad = var_8351_pad_0, pad_type = var_8351_pad_type_0, strides = var_8351_strides_0, weight = layers_15_self_attn_k_proj_weight, x = input_303)[name = string("op_8351")]; tensor var_8356 = const()[name = string("op_8356"), val = tensor([1, 1, 256, 1])]; tensor var_8357 = reshape(shape = var_8356, x = var_8351)[name = string("op_8357")]; tensor var_8362 = const()[name = string("op_8362"), val = tensor([0, 1, 3, 2])]; tensor var_8367 = const()[name = string("op_8367"), val = tensor([1, 1, 256])]; tensor k_91 = transpose(perm = var_8362, x = var_8357)[name = string("transpose_26")]; tensor x_247 = reshape(shape = var_8367, x = k_91)[name = string("x_247")]; int32 var_8374 = const()[name = string("op_8374"), val = int32(-1)]; fp16 const_214_promoted_to_fp16 = const()[name = string("const_214_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8380_cast_fp16 = mul(x = x_247, y = const_214_promoted_to_fp16)[name = string("op_8380_cast_fp16")]; bool input_307_interleave_0 = const()[name = string("input_307_interleave_0"), val = bool(false)]; tensor input_307_cast_fp16 = concat(axis = var_8374, interleave = input_307_interleave_0, values = (x_247, var_8380_cast_fp16))[name = string("input_307_cast_fp16")]; tensor normed_431_axes_0 = const()[name = string("normed_431_axes_0"), val = tensor([-1])]; fp16 var_8372_to_fp16 = const()[name = string("op_8372_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_431_cast_fp16 = layer_norm(axes = normed_431_axes_0, epsilon = var_8372_to_fp16, x = input_307_cast_fp16)[name = string("normed_431_cast_fp16")]; tensor var_8385_split_sizes_0 = const()[name = string("op_8385_split_sizes_0"), val = tensor([256, 256])]; int32 var_8385_axis_0 = const()[name = string("op_8385_axis_0"), val = int32(-1)]; tensor var_8385_cast_fp16_0, tensor var_8385_cast_fp16_1 = split(axis = var_8385_axis_0, split_sizes = var_8385_split_sizes_0, x = normed_431_cast_fp16)[name = string("op_8385_cast_fp16")]; tensor var_8389_to_fp16 = const()[name = string("op_8389_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540649024)))]; tensor out_185_cast_fp16 = mul(x = var_8385_cast_fp16_0, y = var_8389_to_fp16)[name = string("out_185_cast_fp16")]; tensor var_8396 = const()[name = string("op_8396"), val = tensor([1, 1, 1, 256])]; tensor k_93 = reshape(shape = var_8396, x = out_185_cast_fp16)[name = string("k_93")]; string var_8408_pad_type_0 = const()[name = string("op_8408_pad_type_0"), val = string("valid")]; tensor var_8408_strides_0 = const()[name = string("op_8408_strides_0"), val = tensor([1, 1])]; tensor var_8408_pad_0 = const()[name = string("op_8408_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8408_dilations_0 = const()[name = string("op_8408_dilations_0"), val = tensor([1, 1])]; int32 var_8408_groups_0 = const()[name = string("op_8408_groups_0"), val = int32(1)]; tensor var_8408 = conv(dilations = var_8408_dilations_0, groups = var_8408_groups_0, pad = var_8408_pad_0, pad_type = var_8408_pad_type_0, strides = var_8408_strides_0, weight = layers_15_self_attn_v_proj_weight, x = input_303)[name = string("op_8408")]; tensor var_8413 = const()[name = string("op_8413"), val = tensor([1, 1, 256, 1])]; tensor var_8414 = reshape(shape = var_8413, x = var_8408)[name = string("op_8414")]; tensor var_8419 = const()[name = string("op_8419"), val = tensor([0, 1, 3, 2])]; tensor var_8421 = mul(x = q_93, y = cos_1)[name = string("op_8421")]; tensor var_8422_split_sizes_0 = const()[name = string("op_8422_split_sizes_0"), val = tensor([128, 128])]; int32 var_8422_axis_0 = const()[name = string("op_8422_axis_0"), val = int32(-1)]; tensor var_8422_0, tensor var_8422_1 = split(axis = var_8422_axis_0, split_sizes = var_8422_split_sizes_0, x = q_93)[name = string("op_8422")]; fp16 const_216_promoted = const()[name = string("const_216_promoted"), val = fp16(-0x1p+0)]; tensor var_8424 = mul(x = var_8422_1, y = const_216_promoted)[name = string("op_8424")]; int32 var_8426 = const()[name = string("op_8426"), val = int32(-1)]; bool var_8427_interleave_0 = const()[name = string("op_8427_interleave_0"), val = bool(false)]; tensor var_8427 = concat(axis = var_8426, interleave = var_8427_interleave_0, values = (var_8424, var_8422_0))[name = string("op_8427")]; tensor var_8428 = mul(x = var_8427, y = sin_1)[name = string("op_8428")]; tensor q_95 = add(x = var_8421, y = var_8428)[name = string("q_95")]; tensor var_8431 = mul(x = k_93, y = cos_1)[name = string("op_8431")]; tensor var_8432_split_sizes_0 = const()[name = string("op_8432_split_sizes_0"), val = tensor([128, 128])]; int32 var_8432_axis_0 = const()[name = string("op_8432_axis_0"), val = int32(-1)]; tensor var_8432_0, tensor var_8432_1 = split(axis = var_8432_axis_0, split_sizes = var_8432_split_sizes_0, x = k_93)[name = string("op_8432")]; fp16 const_217_promoted = const()[name = string("const_217_promoted"), val = fp16(-0x1p+0)]; tensor var_8434 = mul(x = var_8432_1, y = const_217_promoted)[name = string("op_8434")]; int32 var_8436 = const()[name = string("op_8436"), val = int32(-1)]; bool var_8437_interleave_0 = const()[name = string("op_8437_interleave_0"), val = bool(false)]; tensor var_8437 = concat(axis = var_8436, interleave = var_8437_interleave_0, values = (var_8434, var_8432_0))[name = string("op_8437")]; tensor var_8438 = mul(x = var_8437, y = sin_1)[name = string("op_8438")]; tensor k_95 = add(x = var_8431, y = var_8438)[name = string("k_95")]; tensor var_8443_begin_0 = const()[name = string("op_8443_begin_0"), val = tensor([15, 0, 0, 0])]; tensor var_8443_end_0 = const()[name = string("op_8443_end_0"), val = tensor([16, 1, 2048, 256])]; tensor var_8443_end_mask_0 = const()[name = string("op_8443_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8443_squeeze_mask_0 = const()[name = string("op_8443_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_8443_cast_fp16 = slice_by_index(begin = var_8443_begin_0, end = var_8443_end_0, end_mask = var_8443_end_mask_0, squeeze_mask = var_8443_squeeze_mask_0, x = coreml_update_state_65)[name = string("op_8443_cast_fp16")]; tensor K_cache_31_axes_0 = const()[name = string("K_cache_31_axes_0"), val = tensor([0])]; tensor K_cache_31_cast_fp16 = expand_dims(axes = K_cache_31_axes_0, x = var_8443_cast_fp16)[name = string("K_cache_31_cast_fp16")]; tensor var_8448_begin_0 = const()[name = string("op_8448_begin_0"), val = tensor([33, 0, 0, 0])]; tensor var_8448_end_0 = const()[name = string("op_8448_end_0"), val = tensor([34, 1, 2048, 256])]; tensor var_8448_end_mask_0 = const()[name = string("op_8448_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8448_squeeze_mask_0 = const()[name = string("op_8448_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_8448_cast_fp16 = slice_by_index(begin = var_8448_begin_0, end = var_8448_end_0, end_mask = var_8448_end_mask_0, squeeze_mask = var_8448_squeeze_mask_0, x = coreml_update_state_65)[name = string("op_8448_cast_fp16")]; tensor V_cache_31_axes_0 = const()[name = string("V_cache_31_axes_0"), val = tensor([0])]; tensor V_cache_31_cast_fp16 = expand_dims(axes = V_cache_31_axes_0, x = var_8448_cast_fp16)[name = string("V_cache_31_cast_fp16")]; tensor k_broadcast_31_reps_0 = const()[name = string("k_broadcast_31_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor k_broadcast_31 = tile(reps = k_broadcast_31_reps_0, x = k_95)[name = string("k_broadcast_31")]; tensor v_broadcast_31_reps_0 = const()[name = string("v_broadcast_31_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor v_31 = transpose(perm = var_8419, x = var_8414)[name = string("transpose_25")]; tensor v_broadcast_31 = tile(reps = v_broadcast_31_reps_0, x = v_31)[name = string("v_broadcast_31")]; tensor var_8456_cast_fp16 = mul(x = K_cache_31_cast_fp16, y = var_1120_cast_fp16)[name = string("op_8456_cast_fp16")]; tensor var_8457_cast_fp16 = mul(x = k_broadcast_31, y = update_mask)[name = string("op_8457_cast_fp16")]; tensor K_new_31_cast_fp16 = add(x = var_8456_cast_fp16, y = var_8457_cast_fp16)[name = string("K_new_31_cast_fp16")]; tensor var_8463_cast_fp16 = mul(x = V_cache_31_cast_fp16, y = var_1120_cast_fp16)[name = string("op_8463_cast_fp16")]; tensor var_8464_cast_fp16 = mul(x = v_broadcast_31, y = update_mask)[name = string("op_8464_cast_fp16")]; tensor V_new_31_cast_fp16 = add(x = var_8463_cast_fp16, y = var_8464_cast_fp16)[name = string("V_new_31_cast_fp16")]; tensor var_8468_axes_0 = const()[name = string("op_8468_axes_0"), val = tensor([0])]; tensor var_8468_cast_fp16 = squeeze(axes = var_8468_axes_0, x = K_new_31_cast_fp16)[name = string("op_8468_cast_fp16")]; tensor concat_60 = const()[name = string("concat_60"), val = tensor([15, 0, 0, 0])]; tensor concat_61 = const()[name = string("concat_61"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_31_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_31_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_31_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_60, begin_mask = kv_cache_0_internal_tensor_assign_31_begin_mask_0, end = concat_61, end_mask = kv_cache_0_internal_tensor_assign_31_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_31_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_31_stride_0, update = var_8468_cast_fp16, x = coreml_update_state_65)[name = string("kv_cache_0_internal_tensor_assign_31_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_31_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_66_write_state")]; tensor coreml_update_state_66 = read_state(input = kv_cache_0)[name = string("coreml_update_state_66")]; tensor var_8475_axes_0 = const()[name = string("op_8475_axes_0"), val = tensor([0])]; tensor var_8475_cast_fp16 = squeeze(axes = var_8475_axes_0, x = V_new_31_cast_fp16)[name = string("op_8475_cast_fp16")]; tensor concat_62 = const()[name = string("concat_62"), val = tensor([33, 0, 0, 0])]; tensor concat_63 = const()[name = string("concat_63"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_32_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_32_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_32_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_62, begin_mask = kv_cache_0_internal_tensor_assign_32_begin_mask_0, end = concat_63, end_mask = kv_cache_0_internal_tensor_assign_32_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_32_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_32_stride_0, update = var_8475_cast_fp16, x = coreml_update_state_66)[name = string("kv_cache_0_internal_tensor_assign_32_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_32_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_67_write_state")]; tensor coreml_update_state_67 = read_state(input = kv_cache_0)[name = string("coreml_update_state_67")]; tensor hidden_states_123_axes_0 = const()[name = string("hidden_states_123_axes_0"), val = tensor([2])]; tensor hidden_states_123_cast_fp16 = expand_dims(axes = hidden_states_123_axes_0, x = K_new_31_cast_fp16)[name = string("hidden_states_123_cast_fp16")]; tensor var_8488 = const()[name = string("op_8488"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_125_cast_fp16 = tile(reps = var_8488, x = hidden_states_123_cast_fp16)[name = string("hidden_states_125_cast_fp16")]; tensor var_8494 = const()[name = string("op_8494"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_31_cast_fp16 = reshape(shape = var_8494, x = hidden_states_125_cast_fp16)[name = string("K_expanded_31_cast_fp16")]; tensor hidden_states_127_axes_0 = const()[name = string("hidden_states_127_axes_0"), val = tensor([2])]; tensor hidden_states_127_cast_fp16 = expand_dims(axes = hidden_states_127_axes_0, x = V_new_31_cast_fp16)[name = string("hidden_states_127_cast_fp16")]; tensor var_8503 = const()[name = string("op_8503"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_129_cast_fp16 = tile(reps = var_8503, x = hidden_states_127_cast_fp16)[name = string("hidden_states_129_cast_fp16")]; tensor var_8509 = const()[name = string("op_8509"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_31_cast_fp16 = reshape(shape = var_8509, x = hidden_states_129_cast_fp16)[name = string("V_expanded_31_cast_fp16")]; bool var_8524_transpose_x_1 = const()[name = string("op_8524_transpose_x_1"), val = bool(false)]; bool var_8524_transpose_y_1 = const()[name = string("op_8524_transpose_y_1"), val = bool(true)]; tensor var_8524_cast_fp16 = matmul(transpose_x = var_8524_transpose_x_1, transpose_y = var_8524_transpose_y_1, x = q_95, y = K_expanded_31_cast_fp16)[name = string("op_8524_cast_fp16")]; fp16 var_8525_to_fp16 = const()[name = string("op_8525_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_91_cast_fp16 = mul(x = var_8524_cast_fp16, y = var_8525_to_fp16)[name = string("attn_weights_91_cast_fp16")]; tensor attn_weights_93_cast_fp16 = add(x = attn_weights_91_cast_fp16, y = causal_mask)[name = string("attn_weights_93_cast_fp16")]; int32 var_8534 = const()[name = string("op_8534"), val = int32(-1)]; tensor var_8536_cast_fp16 = softmax(axis = var_8534, x = attn_weights_93_cast_fp16)[name = string("op_8536_cast_fp16")]; bool var_8552_transpose_x_0 = const()[name = string("op_8552_transpose_x_0"), val = bool(false)]; bool var_8552_transpose_y_0 = const()[name = string("op_8552_transpose_y_0"), val = bool(false)]; tensor var_8552_cast_fp16 = matmul(transpose_x = var_8552_transpose_x_0, transpose_y = var_8552_transpose_y_0, x = var_8536_cast_fp16, y = V_expanded_31_cast_fp16)[name = string("op_8552_cast_fp16")]; tensor var_8562 = const()[name = string("op_8562"), val = tensor([0, 2, 1, 3])]; tensor var_8569 = const()[name = string("op_8569"), val = tensor([1, 1, -1])]; tensor var_8563 = transpose(perm = var_8562, x = var_8552_cast_fp16)[name = string("transpose_24")]; tensor attn_output_93 = reshape(shape = var_8569, x = var_8563)[name = string("attn_output_93")]; tensor var_8574 = const()[name = string("op_8574"), val = tensor([0, 2, 1])]; tensor squeeze_15 = const()[name = string("squeeze_15"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540649600)))]; string var_8590_pad_type_0 = const()[name = string("op_8590_pad_type_0"), val = string("valid")]; int32 var_8590_groups_0 = const()[name = string("op_8590_groups_0"), val = int32(1)]; tensor var_8590_strides_0 = const()[name = string("op_8590_strides_0"), val = tensor([1])]; tensor var_8590_pad_0 = const()[name = string("op_8590_pad_0"), val = tensor([0, 0])]; tensor var_8590_dilations_0 = const()[name = string("op_8590_dilations_0"), val = tensor([1])]; tensor var_8575 = transpose(perm = var_8574, x = attn_output_93)[name = string("transpose_23")]; tensor var_8590 = conv(dilations = var_8590_dilations_0, groups = var_8590_groups_0, pad = var_8590_pad_0, pad_type = var_8590_pad_type_0, strides = var_8590_strides_0, weight = squeeze_15, x = var_8575)[name = string("op_8590")]; tensor var_8594 = const()[name = string("op_8594"), val = tensor([0, 2, 1])]; int32 var_8601 = const()[name = string("op_8601"), val = int32(-1)]; fp16 const_218_promoted_to_fp16 = const()[name = string("const_218_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_249 = transpose(perm = var_8594, x = var_8590)[name = string("transpose_22")]; tensor var_8607_cast_fp16 = mul(x = x_249, y = const_218_promoted_to_fp16)[name = string("op_8607_cast_fp16")]; bool input_311_interleave_0 = const()[name = string("input_311_interleave_0"), val = bool(false)]; tensor input_311_cast_fp16 = concat(axis = var_8601, interleave = input_311_interleave_0, values = (x_249, var_8607_cast_fp16))[name = string("input_311_cast_fp16")]; tensor normed_435_axes_0 = const()[name = string("normed_435_axes_0"), val = tensor([-1])]; fp16 var_8599_to_fp16 = const()[name = string("op_8599_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_435_cast_fp16 = layer_norm(axes = normed_435_axes_0, epsilon = var_8599_to_fp16, x = input_311_cast_fp16)[name = string("normed_435_cast_fp16")]; tensor var_8612_split_sizes_0 = const()[name = string("op_8612_split_sizes_0"), val = tensor([640, 640])]; int32 var_8612_axis_0 = const()[name = string("op_8612_axis_0"), val = int32(-1)]; tensor var_8612_cast_fp16_0, tensor var_8612_cast_fp16_1 = split(axis = var_8612_axis_0, split_sizes = var_8612_split_sizes_0, x = normed_435_cast_fp16)[name = string("op_8612_cast_fp16")]; tensor var_8616_to_fp16 = const()[name = string("op_8616_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(541960384)))]; tensor out_187_cast_fp16 = mul(x = var_8612_cast_fp16_0, y = var_8616_to_fp16)[name = string("out_187_cast_fp16")]; tensor x_251_cast_fp16 = add(x = x_241_cast_fp16, y = out_187_cast_fp16)[name = string("x_251_cast_fp16")]; int32 var_8630 = const()[name = string("op_8630"), val = int32(-1)]; fp16 const_220_promoted_to_fp16 = const()[name = string("const_220_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8636_cast_fp16 = mul(x = x_251_cast_fp16, y = const_220_promoted_to_fp16)[name = string("op_8636_cast_fp16")]; bool input_313_interleave_0 = const()[name = string("input_313_interleave_0"), val = bool(false)]; tensor input_313_cast_fp16 = concat(axis = var_8630, interleave = input_313_interleave_0, values = (x_251_cast_fp16, var_8636_cast_fp16))[name = string("input_313_cast_fp16")]; tensor normed_439_axes_0 = const()[name = string("normed_439_axes_0"), val = tensor([-1])]; fp16 var_8628_to_fp16 = const()[name = string("op_8628_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_439_cast_fp16 = layer_norm(axes = normed_439_axes_0, epsilon = var_8628_to_fp16, x = input_313_cast_fp16)[name = string("normed_439_cast_fp16")]; tensor var_8641_split_sizes_0 = const()[name = string("op_8641_split_sizes_0"), val = tensor([640, 640])]; int32 var_8641_axis_0 = const()[name = string("op_8641_axis_0"), val = int32(-1)]; tensor var_8641_cast_fp16_0, tensor var_8641_cast_fp16_1 = split(axis = var_8641_axis_0, split_sizes = var_8641_split_sizes_0, x = normed_439_cast_fp16)[name = string("op_8641_cast_fp16")]; tensor var_8645_to_fp16 = const()[name = string("op_8645_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(541961728)))]; tensor out_189_cast_fp16 = mul(x = var_8641_cast_fp16_0, y = var_8645_to_fp16)[name = string("out_189_cast_fp16")]; tensor var_8659 = const()[name = string("op_8659"), val = tensor([0, 2, 1])]; tensor input_315_axes_0 = const()[name = string("input_315_axes_0"), val = tensor([2])]; tensor var_8660 = transpose(perm = var_8659, x = out_189_cast_fp16)[name = string("transpose_21")]; tensor input_315 = expand_dims(axes = input_315_axes_0, x = var_8660)[name = string("input_315")]; string gate_61_pad_type_0 = const()[name = string("gate_61_pad_type_0"), val = string("valid")]; tensor gate_61_strides_0 = const()[name = string("gate_61_strides_0"), val = tensor([1, 1])]; tensor gate_61_pad_0 = const()[name = string("gate_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_61_dilations_0 = const()[name = string("gate_61_dilations_0"), val = tensor([1, 1])]; int32 gate_61_groups_0 = const()[name = string("gate_61_groups_0"), val = int32(1)]; tensor gate_61 = conv(dilations = gate_61_dilations_0, groups = gate_61_groups_0, pad = gate_61_pad_0, pad_type = gate_61_pad_type_0, strides = gate_61_strides_0, weight = layers_15_mlp_gate_proj_weight, x = input_315)[name = string("gate_61")]; string up_31_pad_type_0 = const()[name = string("up_31_pad_type_0"), val = string("valid")]; tensor up_31_strides_0 = const()[name = string("up_31_strides_0"), val = tensor([1, 1])]; tensor up_31_pad_0 = const()[name = string("up_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_31_dilations_0 = const()[name = string("up_31_dilations_0"), val = tensor([1, 1])]; int32 up_31_groups_0 = const()[name = string("up_31_groups_0"), val = int32(1)]; tensor up_31 = conv(dilations = up_31_dilations_0, groups = up_31_groups_0, pad = up_31_pad_0, pad_type = up_31_pad_type_0, strides = up_31_strides_0, weight = layers_15_mlp_up_proj_weight, x = input_315)[name = string("up_31")]; string gate_63_mode_0 = const()[name = string("gate_63_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_63 = gelu(mode = gate_63_mode_0, x = gate_61)[name = string("gate_63")]; tensor input_317 = mul(x = gate_63, y = up_31)[name = string("input_317")]; string mlp_out_61_pad_type_0 = const()[name = string("mlp_out_61_pad_type_0"), val = string("valid")]; tensor mlp_out_61_strides_0 = const()[name = string("mlp_out_61_strides_0"), val = tensor([1, 1])]; tensor mlp_out_61_pad_0 = const()[name = string("mlp_out_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_61_dilations_0 = const()[name = string("mlp_out_61_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_61_groups_0 = const()[name = string("mlp_out_61_groups_0"), val = int32(1)]; tensor mlp_out_61 = conv(dilations = mlp_out_61_dilations_0, groups = mlp_out_61_groups_0, pad = mlp_out_61_pad_0, pad_type = mlp_out_61_pad_type_0, strides = mlp_out_61_strides_0, weight = layers_15_mlp_down_proj_weight, x = input_317)[name = string("mlp_out_61")]; tensor var_8700_axes_0 = const()[name = string("op_8700_axes_0"), val = tensor([2])]; tensor var_8700 = squeeze(axes = var_8700_axes_0, x = mlp_out_61)[name = string("op_8700")]; tensor var_8704 = const()[name = string("op_8704"), val = tensor([0, 2, 1])]; int32 var_8711 = const()[name = string("op_8711"), val = int32(-1)]; fp16 const_222_promoted_to_fp16 = const()[name = string("const_222_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_255 = transpose(perm = var_8704, x = var_8700)[name = string("transpose_20")]; tensor var_8717_cast_fp16 = mul(x = x_255, y = const_222_promoted_to_fp16)[name = string("op_8717_cast_fp16")]; bool input_319_interleave_0 = const()[name = string("input_319_interleave_0"), val = bool(false)]; tensor input_319_cast_fp16 = concat(axis = var_8711, interleave = input_319_interleave_0, values = (x_255, var_8717_cast_fp16))[name = string("input_319_cast_fp16")]; tensor normed_445_axes_0 = const()[name = string("normed_445_axes_0"), val = tensor([-1])]; fp16 var_8709_to_fp16 = const()[name = string("op_8709_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_445_cast_fp16 = layer_norm(axes = normed_445_axes_0, epsilon = var_8709_to_fp16, x = input_319_cast_fp16)[name = string("normed_445_cast_fp16")]; tensor var_8722_split_sizes_0 = const()[name = string("op_8722_split_sizes_0"), val = tensor([640, 640])]; int32 var_8722_axis_0 = const()[name = string("op_8722_axis_0"), val = int32(-1)]; tensor var_8722_cast_fp16_0, tensor var_8722_cast_fp16_1 = split(axis = var_8722_axis_0, split_sizes = var_8722_split_sizes_0, x = normed_445_cast_fp16)[name = string("op_8722_cast_fp16")]; tensor var_8726_to_fp16 = const()[name = string("op_8726_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(541963072)))]; tensor out_191_cast_fp16 = mul(x = var_8722_cast_fp16_0, y = var_8726_to_fp16)[name = string("out_191_cast_fp16")]; tensor x_257_cast_fp16 = add(x = x_251_cast_fp16, y = out_191_cast_fp16)[name = string("x_257_cast_fp16")]; int32 var_8740 = const()[name = string("op_8740"), val = int32(-1)]; fp16 const_224_promoted_to_fp16 = const()[name = string("const_224_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8746_cast_fp16 = mul(x = x_257_cast_fp16, y = const_224_promoted_to_fp16)[name = string("op_8746_cast_fp16")]; bool input_321_interleave_0 = const()[name = string("input_321_interleave_0"), val = bool(false)]; tensor input_321_cast_fp16 = concat(axis = var_8740, interleave = input_321_interleave_0, values = (x_257_cast_fp16, var_8746_cast_fp16))[name = string("input_321_cast_fp16")]; tensor normed_449_axes_0 = const()[name = string("normed_449_axes_0"), val = tensor([-1])]; fp16 var_8738_to_fp16 = const()[name = string("op_8738_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_449_cast_fp16 = layer_norm(axes = normed_449_axes_0, epsilon = var_8738_to_fp16, x = input_321_cast_fp16)[name = string("normed_449_cast_fp16")]; tensor var_8751_split_sizes_0 = const()[name = string("op_8751_split_sizes_0"), val = tensor([640, 640])]; int32 var_8751_axis_0 = const()[name = string("op_8751_axis_0"), val = int32(-1)]; tensor var_8751_cast_fp16_0, tensor var_8751_cast_fp16_1 = split(axis = var_8751_axis_0, split_sizes = var_8751_split_sizes_0, x = normed_449_cast_fp16)[name = string("op_8751_cast_fp16")]; tensor var_8755_to_fp16 = const()[name = string("op_8755_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(541964416)))]; tensor out_193_cast_fp16 = mul(x = var_8751_cast_fp16_0, y = var_8755_to_fp16)[name = string("out_193_cast_fp16")]; tensor var_8769 = const()[name = string("op_8769"), val = tensor([0, 2, 1])]; tensor input_323_axes_0 = const()[name = string("input_323_axes_0"), val = tensor([2])]; tensor var_8770 = transpose(perm = var_8769, x = out_193_cast_fp16)[name = string("transpose_19")]; tensor input_323 = expand_dims(axes = input_323_axes_0, x = var_8770)[name = string("input_323")]; string var_8783_pad_type_0 = const()[name = string("op_8783_pad_type_0"), val = string("valid")]; tensor var_8783_strides_0 = const()[name = string("op_8783_strides_0"), val = tensor([1, 1])]; tensor var_8783_pad_0 = const()[name = string("op_8783_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8783_dilations_0 = const()[name = string("op_8783_dilations_0"), val = tensor([1, 1])]; int32 var_8783_groups_0 = const()[name = string("op_8783_groups_0"), val = int32(1)]; tensor var_8783 = conv(dilations = var_8783_dilations_0, groups = var_8783_groups_0, pad = var_8783_pad_0, pad_type = var_8783_pad_type_0, strides = var_8783_strides_0, weight = layers_16_self_attn_q_proj_weight, x = input_323)[name = string("op_8783")]; tensor var_8788 = const()[name = string("op_8788"), val = tensor([1, 4, 256, 1])]; tensor var_8789 = reshape(shape = var_8788, x = var_8783)[name = string("op_8789")]; tensor var_8794 = const()[name = string("op_8794"), val = tensor([0, 1, 3, 2])]; tensor var_8799 = const()[name = string("op_8799"), val = tensor([1, 4, 256])]; tensor q_97 = transpose(perm = var_8794, x = var_8789)[name = string("transpose_18")]; tensor x_261 = reshape(shape = var_8799, x = q_97)[name = string("x_261")]; int32 var_8806 = const()[name = string("op_8806"), val = int32(-1)]; fp16 const_226_promoted_to_fp16 = const()[name = string("const_226_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8812_cast_fp16 = mul(x = x_261, y = const_226_promoted_to_fp16)[name = string("op_8812_cast_fp16")]; bool input_325_interleave_0 = const()[name = string("input_325_interleave_0"), val = bool(false)]; tensor input_325_cast_fp16 = concat(axis = var_8806, interleave = input_325_interleave_0, values = (x_261, var_8812_cast_fp16))[name = string("input_325_cast_fp16")]; tensor normed_455_axes_0 = const()[name = string("normed_455_axes_0"), val = tensor([-1])]; fp16 var_8804_to_fp16 = const()[name = string("op_8804_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_455_cast_fp16 = layer_norm(axes = normed_455_axes_0, epsilon = var_8804_to_fp16, x = input_325_cast_fp16)[name = string("normed_455_cast_fp16")]; tensor var_8817_split_sizes_0 = const()[name = string("op_8817_split_sizes_0"), val = tensor([256, 256])]; int32 var_8817_axis_0 = const()[name = string("op_8817_axis_0"), val = int32(-1)]; tensor var_8817_cast_fp16_0, tensor var_8817_cast_fp16_1 = split(axis = var_8817_axis_0, split_sizes = var_8817_split_sizes_0, x = normed_455_cast_fp16)[name = string("op_8817_cast_fp16")]; tensor var_8821_to_fp16 = const()[name = string("op_8821_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(541965760)))]; tensor out_195_cast_fp16 = mul(x = var_8817_cast_fp16_0, y = var_8821_to_fp16)[name = string("out_195_cast_fp16")]; tensor var_8828 = const()[name = string("op_8828"), val = tensor([1, 4, 1, 256])]; tensor q_99 = reshape(shape = var_8828, x = out_195_cast_fp16)[name = string("q_99")]; string var_8840_pad_type_0 = const()[name = string("op_8840_pad_type_0"), val = string("valid")]; tensor var_8840_strides_0 = const()[name = string("op_8840_strides_0"), val = tensor([1, 1])]; tensor var_8840_pad_0 = const()[name = string("op_8840_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8840_dilations_0 = const()[name = string("op_8840_dilations_0"), val = tensor([1, 1])]; int32 var_8840_groups_0 = const()[name = string("op_8840_groups_0"), val = int32(1)]; tensor var_8840 = conv(dilations = var_8840_dilations_0, groups = var_8840_groups_0, pad = var_8840_pad_0, pad_type = var_8840_pad_type_0, strides = var_8840_strides_0, weight = layers_16_self_attn_k_proj_weight, x = input_323)[name = string("op_8840")]; tensor var_8845 = const()[name = string("op_8845"), val = tensor([1, 1, 256, 1])]; tensor var_8846 = reshape(shape = var_8845, x = var_8840)[name = string("op_8846")]; tensor var_8851 = const()[name = string("op_8851"), val = tensor([0, 1, 3, 2])]; tensor var_8856 = const()[name = string("op_8856"), val = tensor([1, 1, 256])]; tensor k_97 = transpose(perm = var_8851, x = var_8846)[name = string("transpose_17")]; tensor x_263 = reshape(shape = var_8856, x = k_97)[name = string("x_263")]; int32 var_8863 = const()[name = string("op_8863"), val = int32(-1)]; fp16 const_228_promoted_to_fp16 = const()[name = string("const_228_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8869_cast_fp16 = mul(x = x_263, y = const_228_promoted_to_fp16)[name = string("op_8869_cast_fp16")]; bool input_327_interleave_0 = const()[name = string("input_327_interleave_0"), val = bool(false)]; tensor input_327_cast_fp16 = concat(axis = var_8863, interleave = input_327_interleave_0, values = (x_263, var_8869_cast_fp16))[name = string("input_327_cast_fp16")]; tensor normed_459_axes_0 = const()[name = string("normed_459_axes_0"), val = tensor([-1])]; fp16 var_8861_to_fp16 = const()[name = string("op_8861_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_459_cast_fp16 = layer_norm(axes = normed_459_axes_0, epsilon = var_8861_to_fp16, x = input_327_cast_fp16)[name = string("normed_459_cast_fp16")]; tensor var_8874_split_sizes_0 = const()[name = string("op_8874_split_sizes_0"), val = tensor([256, 256])]; int32 var_8874_axis_0 = const()[name = string("op_8874_axis_0"), val = int32(-1)]; tensor var_8874_cast_fp16_0, tensor var_8874_cast_fp16_1 = split(axis = var_8874_axis_0, split_sizes = var_8874_split_sizes_0, x = normed_459_cast_fp16)[name = string("op_8874_cast_fp16")]; tensor var_8878_to_fp16 = const()[name = string("op_8878_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(541966336)))]; tensor out_197_cast_fp16 = mul(x = var_8874_cast_fp16_0, y = var_8878_to_fp16)[name = string("out_197_cast_fp16")]; tensor var_8885 = const()[name = string("op_8885"), val = tensor([1, 1, 1, 256])]; tensor k_99 = reshape(shape = var_8885, x = out_197_cast_fp16)[name = string("k_99")]; string var_8897_pad_type_0 = const()[name = string("op_8897_pad_type_0"), val = string("valid")]; tensor var_8897_strides_0 = const()[name = string("op_8897_strides_0"), val = tensor([1, 1])]; tensor var_8897_pad_0 = const()[name = string("op_8897_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8897_dilations_0 = const()[name = string("op_8897_dilations_0"), val = tensor([1, 1])]; int32 var_8897_groups_0 = const()[name = string("op_8897_groups_0"), val = int32(1)]; tensor var_8897 = conv(dilations = var_8897_dilations_0, groups = var_8897_groups_0, pad = var_8897_pad_0, pad_type = var_8897_pad_type_0, strides = var_8897_strides_0, weight = layers_16_self_attn_v_proj_weight, x = input_323)[name = string("op_8897")]; tensor var_8902 = const()[name = string("op_8902"), val = tensor([1, 1, 256, 1])]; tensor var_8903 = reshape(shape = var_8902, x = var_8897)[name = string("op_8903")]; tensor var_8908 = const()[name = string("op_8908"), val = tensor([0, 1, 3, 2])]; tensor var_8910 = mul(x = q_99, y = cos_1)[name = string("op_8910")]; tensor var_8911_split_sizes_0 = const()[name = string("op_8911_split_sizes_0"), val = tensor([128, 128])]; int32 var_8911_axis_0 = const()[name = string("op_8911_axis_0"), val = int32(-1)]; tensor var_8911_0, tensor var_8911_1 = split(axis = var_8911_axis_0, split_sizes = var_8911_split_sizes_0, x = q_99)[name = string("op_8911")]; fp16 const_230_promoted = const()[name = string("const_230_promoted"), val = fp16(-0x1p+0)]; tensor var_8913 = mul(x = var_8911_1, y = const_230_promoted)[name = string("op_8913")]; int32 var_8915 = const()[name = string("op_8915"), val = int32(-1)]; bool var_8916_interleave_0 = const()[name = string("op_8916_interleave_0"), val = bool(false)]; tensor var_8916 = concat(axis = var_8915, interleave = var_8916_interleave_0, values = (var_8913, var_8911_0))[name = string("op_8916")]; tensor var_8917 = mul(x = var_8916, y = sin_1)[name = string("op_8917")]; tensor q_101 = add(x = var_8910, y = var_8917)[name = string("q_101")]; tensor var_8920 = mul(x = k_99, y = cos_1)[name = string("op_8920")]; tensor var_8921_split_sizes_0 = const()[name = string("op_8921_split_sizes_0"), val = tensor([128, 128])]; int32 var_8921_axis_0 = const()[name = string("op_8921_axis_0"), val = int32(-1)]; tensor var_8921_0, tensor var_8921_1 = split(axis = var_8921_axis_0, split_sizes = var_8921_split_sizes_0, x = k_99)[name = string("op_8921")]; fp16 const_231_promoted = const()[name = string("const_231_promoted"), val = fp16(-0x1p+0)]; tensor var_8923 = mul(x = var_8921_1, y = const_231_promoted)[name = string("op_8923")]; int32 var_8925 = const()[name = string("op_8925"), val = int32(-1)]; bool var_8926_interleave_0 = const()[name = string("op_8926_interleave_0"), val = bool(false)]; tensor var_8926 = concat(axis = var_8925, interleave = var_8926_interleave_0, values = (var_8923, var_8921_0))[name = string("op_8926")]; tensor var_8927 = mul(x = var_8926, y = sin_1)[name = string("op_8927")]; tensor k_101 = add(x = var_8920, y = var_8927)[name = string("k_101")]; tensor var_8932_begin_0 = const()[name = string("op_8932_begin_0"), val = tensor([16, 0, 0, 0])]; tensor var_8932_end_0 = const()[name = string("op_8932_end_0"), val = tensor([17, 1, 2048, 256])]; tensor var_8932_end_mask_0 = const()[name = string("op_8932_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8932_squeeze_mask_0 = const()[name = string("op_8932_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_8932_cast_fp16 = slice_by_index(begin = var_8932_begin_0, end = var_8932_end_0, end_mask = var_8932_end_mask_0, squeeze_mask = var_8932_squeeze_mask_0, x = coreml_update_state_67)[name = string("op_8932_cast_fp16")]; tensor K_cache_33_axes_0 = const()[name = string("K_cache_33_axes_0"), val = tensor([0])]; tensor K_cache_33_cast_fp16 = expand_dims(axes = K_cache_33_axes_0, x = var_8932_cast_fp16)[name = string("K_cache_33_cast_fp16")]; tensor var_8937_begin_0 = const()[name = string("op_8937_begin_0"), val = tensor([34, 0, 0, 0])]; tensor var_8937_end_0 = const()[name = string("op_8937_end_0"), val = tensor([35, 1, 2048, 256])]; tensor var_8937_end_mask_0 = const()[name = string("op_8937_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8937_squeeze_mask_0 = const()[name = string("op_8937_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_8937_cast_fp16 = slice_by_index(begin = var_8937_begin_0, end = var_8937_end_0, end_mask = var_8937_end_mask_0, squeeze_mask = var_8937_squeeze_mask_0, x = coreml_update_state_67)[name = string("op_8937_cast_fp16")]; tensor V_cache_33_axes_0 = const()[name = string("V_cache_33_axes_0"), val = tensor([0])]; tensor V_cache_33_cast_fp16 = expand_dims(axes = V_cache_33_axes_0, x = var_8937_cast_fp16)[name = string("V_cache_33_cast_fp16")]; tensor k_broadcast_33_reps_0 = const()[name = string("k_broadcast_33_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor k_broadcast_33 = tile(reps = k_broadcast_33_reps_0, x = k_101)[name = string("k_broadcast_33")]; tensor v_broadcast_33_reps_0 = const()[name = string("v_broadcast_33_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor v_33 = transpose(perm = var_8908, x = var_8903)[name = string("transpose_16")]; tensor v_broadcast_33 = tile(reps = v_broadcast_33_reps_0, x = v_33)[name = string("v_broadcast_33")]; tensor var_8945_cast_fp16 = mul(x = K_cache_33_cast_fp16, y = var_1120_cast_fp16)[name = string("op_8945_cast_fp16")]; tensor var_8946_cast_fp16 = mul(x = k_broadcast_33, y = update_mask)[name = string("op_8946_cast_fp16")]; tensor K_new_33_cast_fp16 = add(x = var_8945_cast_fp16, y = var_8946_cast_fp16)[name = string("K_new_33_cast_fp16")]; tensor var_8952_cast_fp16 = mul(x = V_cache_33_cast_fp16, y = var_1120_cast_fp16)[name = string("op_8952_cast_fp16")]; tensor var_8953_cast_fp16 = mul(x = v_broadcast_33, y = update_mask)[name = string("op_8953_cast_fp16")]; tensor V_new_33_cast_fp16 = add(x = var_8952_cast_fp16, y = var_8953_cast_fp16)[name = string("V_new_33_cast_fp16")]; tensor var_8957_axes_0 = const()[name = string("op_8957_axes_0"), val = tensor([0])]; tensor var_8957_cast_fp16 = squeeze(axes = var_8957_axes_0, x = K_new_33_cast_fp16)[name = string("op_8957_cast_fp16")]; tensor concat_64 = const()[name = string("concat_64"), val = tensor([16, 0, 0, 0])]; tensor concat_65 = const()[name = string("concat_65"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_33_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_33_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_33_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_33_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_33_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_33_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_33_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_33_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_33_cast_fp16 = slice_update(begin = concat_64, begin_mask = kv_cache_0_internal_tensor_assign_33_begin_mask_0, end = concat_65, end_mask = kv_cache_0_internal_tensor_assign_33_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_33_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_33_stride_0, update = var_8957_cast_fp16, x = coreml_update_state_67)[name = string("kv_cache_0_internal_tensor_assign_33_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_33_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_68_write_state")]; tensor coreml_update_state_68 = read_state(input = kv_cache_0)[name = string("coreml_update_state_68")]; tensor var_8964_axes_0 = const()[name = string("op_8964_axes_0"), val = tensor([0])]; tensor var_8964_cast_fp16 = squeeze(axes = var_8964_axes_0, x = V_new_33_cast_fp16)[name = string("op_8964_cast_fp16")]; tensor concat_66 = const()[name = string("concat_66"), val = tensor([34, 0, 0, 0])]; tensor concat_67 = const()[name = string("concat_67"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_34_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_34_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_34_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_34_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_34_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_34_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_34_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_34_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_34_cast_fp16 = slice_update(begin = concat_66, begin_mask = kv_cache_0_internal_tensor_assign_34_begin_mask_0, end = concat_67, end_mask = kv_cache_0_internal_tensor_assign_34_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_34_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_34_stride_0, update = var_8964_cast_fp16, x = coreml_update_state_68)[name = string("kv_cache_0_internal_tensor_assign_34_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_34_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_69_write_state")]; tensor coreml_update_state_69 = read_state(input = kv_cache_0)[name = string("coreml_update_state_69")]; tensor hidden_states_131_axes_0 = const()[name = string("hidden_states_131_axes_0"), val = tensor([2])]; tensor hidden_states_131_cast_fp16 = expand_dims(axes = hidden_states_131_axes_0, x = K_new_33_cast_fp16)[name = string("hidden_states_131_cast_fp16")]; tensor var_8977 = const()[name = string("op_8977"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_133_cast_fp16 = tile(reps = var_8977, x = hidden_states_131_cast_fp16)[name = string("hidden_states_133_cast_fp16")]; tensor var_8983 = const()[name = string("op_8983"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_33_cast_fp16 = reshape(shape = var_8983, x = hidden_states_133_cast_fp16)[name = string("K_expanded_33_cast_fp16")]; tensor hidden_states_135_axes_0 = const()[name = string("hidden_states_135_axes_0"), val = tensor([2])]; tensor hidden_states_135_cast_fp16 = expand_dims(axes = hidden_states_135_axes_0, x = V_new_33_cast_fp16)[name = string("hidden_states_135_cast_fp16")]; tensor var_8992 = const()[name = string("op_8992"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_137_cast_fp16 = tile(reps = var_8992, x = hidden_states_135_cast_fp16)[name = string("hidden_states_137_cast_fp16")]; tensor var_8998 = const()[name = string("op_8998"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_33_cast_fp16 = reshape(shape = var_8998, x = hidden_states_137_cast_fp16)[name = string("V_expanded_33_cast_fp16")]; bool var_9013_transpose_x_1 = const()[name = string("op_9013_transpose_x_1"), val = bool(false)]; bool var_9013_transpose_y_1 = const()[name = string("op_9013_transpose_y_1"), val = bool(true)]; tensor var_9013_cast_fp16 = matmul(transpose_x = var_9013_transpose_x_1, transpose_y = var_9013_transpose_y_1, x = q_101, y = K_expanded_33_cast_fp16)[name = string("op_9013_cast_fp16")]; fp16 var_9014_to_fp16 = const()[name = string("op_9014_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_97_cast_fp16 = mul(x = var_9013_cast_fp16, y = var_9014_to_fp16)[name = string("attn_weights_97_cast_fp16")]; tensor attn_weights_99_cast_fp16 = add(x = attn_weights_97_cast_fp16, y = causal_mask)[name = string("attn_weights_99_cast_fp16")]; int32 var_9023 = const()[name = string("op_9023"), val = int32(-1)]; tensor var_9025_cast_fp16 = softmax(axis = var_9023, x = attn_weights_99_cast_fp16)[name = string("op_9025_cast_fp16")]; bool var_9041_transpose_x_0 = const()[name = string("op_9041_transpose_x_0"), val = bool(false)]; bool var_9041_transpose_y_0 = const()[name = string("op_9041_transpose_y_0"), val = bool(false)]; tensor var_9041_cast_fp16 = matmul(transpose_x = var_9041_transpose_x_0, transpose_y = var_9041_transpose_y_0, x = var_9025_cast_fp16, y = V_expanded_33_cast_fp16)[name = string("op_9041_cast_fp16")]; tensor var_9051 = const()[name = string("op_9051"), val = tensor([0, 2, 1, 3])]; tensor var_9058 = const()[name = string("op_9058"), val = tensor([1, 1, -1])]; tensor var_9052 = transpose(perm = var_9051, x = var_9041_cast_fp16)[name = string("transpose_15")]; tensor attn_output_99 = reshape(shape = var_9058, x = var_9052)[name = string("attn_output_99")]; tensor var_9063 = const()[name = string("op_9063"), val = tensor([0, 2, 1])]; tensor squeeze_16 = const()[name = string("squeeze_16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(541966912)))]; string var_9079_pad_type_0 = const()[name = string("op_9079_pad_type_0"), val = string("valid")]; int32 var_9079_groups_0 = const()[name = string("op_9079_groups_0"), val = int32(1)]; tensor var_9079_strides_0 = const()[name = string("op_9079_strides_0"), val = tensor([1])]; tensor var_9079_pad_0 = const()[name = string("op_9079_pad_0"), val = tensor([0, 0])]; tensor var_9079_dilations_0 = const()[name = string("op_9079_dilations_0"), val = tensor([1])]; tensor var_9064 = transpose(perm = var_9063, x = attn_output_99)[name = string("transpose_14")]; tensor var_9079 = conv(dilations = var_9079_dilations_0, groups = var_9079_groups_0, pad = var_9079_pad_0, pad_type = var_9079_pad_type_0, strides = var_9079_strides_0, weight = squeeze_16, x = var_9064)[name = string("op_9079")]; tensor var_9083 = const()[name = string("op_9083"), val = tensor([0, 2, 1])]; int32 var_9090 = const()[name = string("op_9090"), val = int32(-1)]; fp16 const_232_promoted_to_fp16 = const()[name = string("const_232_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_265 = transpose(perm = var_9083, x = var_9079)[name = string("transpose_13")]; tensor var_9096_cast_fp16 = mul(x = x_265, y = const_232_promoted_to_fp16)[name = string("op_9096_cast_fp16")]; bool input_331_interleave_0 = const()[name = string("input_331_interleave_0"), val = bool(false)]; tensor input_331_cast_fp16 = concat(axis = var_9090, interleave = input_331_interleave_0, values = (x_265, var_9096_cast_fp16))[name = string("input_331_cast_fp16")]; tensor normed_463_axes_0 = const()[name = string("normed_463_axes_0"), val = tensor([-1])]; fp16 var_9088_to_fp16 = const()[name = string("op_9088_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_463_cast_fp16 = layer_norm(axes = normed_463_axes_0, epsilon = var_9088_to_fp16, x = input_331_cast_fp16)[name = string("normed_463_cast_fp16")]; tensor var_9101_split_sizes_0 = const()[name = string("op_9101_split_sizes_0"), val = tensor([640, 640])]; int32 var_9101_axis_0 = const()[name = string("op_9101_axis_0"), val = int32(-1)]; tensor var_9101_cast_fp16_0, tensor var_9101_cast_fp16_1 = split(axis = var_9101_axis_0, split_sizes = var_9101_split_sizes_0, x = normed_463_cast_fp16)[name = string("op_9101_cast_fp16")]; tensor var_9105_to_fp16 = const()[name = string("op_9105_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543277696)))]; tensor out_199_cast_fp16 = mul(x = var_9101_cast_fp16_0, y = var_9105_to_fp16)[name = string("out_199_cast_fp16")]; tensor x_267_cast_fp16 = add(x = x_257_cast_fp16, y = out_199_cast_fp16)[name = string("x_267_cast_fp16")]; int32 var_9119 = const()[name = string("op_9119"), val = int32(-1)]; fp16 const_234_promoted_to_fp16 = const()[name = string("const_234_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9125_cast_fp16 = mul(x = x_267_cast_fp16, y = const_234_promoted_to_fp16)[name = string("op_9125_cast_fp16")]; bool input_333_interleave_0 = const()[name = string("input_333_interleave_0"), val = bool(false)]; tensor input_333_cast_fp16 = concat(axis = var_9119, interleave = input_333_interleave_0, values = (x_267_cast_fp16, var_9125_cast_fp16))[name = string("input_333_cast_fp16")]; tensor normed_467_axes_0 = const()[name = string("normed_467_axes_0"), val = tensor([-1])]; fp16 var_9117_to_fp16 = const()[name = string("op_9117_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_467_cast_fp16 = layer_norm(axes = normed_467_axes_0, epsilon = var_9117_to_fp16, x = input_333_cast_fp16)[name = string("normed_467_cast_fp16")]; tensor var_9130_split_sizes_0 = const()[name = string("op_9130_split_sizes_0"), val = tensor([640, 640])]; int32 var_9130_axis_0 = const()[name = string("op_9130_axis_0"), val = int32(-1)]; tensor var_9130_cast_fp16_0, tensor var_9130_cast_fp16_1 = split(axis = var_9130_axis_0, split_sizes = var_9130_split_sizes_0, x = normed_467_cast_fp16)[name = string("op_9130_cast_fp16")]; tensor var_9134_to_fp16 = const()[name = string("op_9134_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543279040)))]; tensor out_201_cast_fp16 = mul(x = var_9130_cast_fp16_0, y = var_9134_to_fp16)[name = string("out_201_cast_fp16")]; tensor var_9148 = const()[name = string("op_9148"), val = tensor([0, 2, 1])]; tensor input_335_axes_0 = const()[name = string("input_335_axes_0"), val = tensor([2])]; tensor var_9149 = transpose(perm = var_9148, x = out_201_cast_fp16)[name = string("transpose_12")]; tensor input_335 = expand_dims(axes = input_335_axes_0, x = var_9149)[name = string("input_335")]; string gate_65_pad_type_0 = const()[name = string("gate_65_pad_type_0"), val = string("valid")]; tensor gate_65_strides_0 = const()[name = string("gate_65_strides_0"), val = tensor([1, 1])]; tensor gate_65_pad_0 = const()[name = string("gate_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_65_dilations_0 = const()[name = string("gate_65_dilations_0"), val = tensor([1, 1])]; int32 gate_65_groups_0 = const()[name = string("gate_65_groups_0"), val = int32(1)]; tensor gate_65 = conv(dilations = gate_65_dilations_0, groups = gate_65_groups_0, pad = gate_65_pad_0, pad_type = gate_65_pad_type_0, strides = gate_65_strides_0, weight = layers_16_mlp_gate_proj_weight, x = input_335)[name = string("gate_65")]; string up_33_pad_type_0 = const()[name = string("up_33_pad_type_0"), val = string("valid")]; tensor up_33_strides_0 = const()[name = string("up_33_strides_0"), val = tensor([1, 1])]; tensor up_33_pad_0 = const()[name = string("up_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_33_dilations_0 = const()[name = string("up_33_dilations_0"), val = tensor([1, 1])]; int32 up_33_groups_0 = const()[name = string("up_33_groups_0"), val = int32(1)]; tensor up_33 = conv(dilations = up_33_dilations_0, groups = up_33_groups_0, pad = up_33_pad_0, pad_type = up_33_pad_type_0, strides = up_33_strides_0, weight = layers_16_mlp_up_proj_weight, x = input_335)[name = string("up_33")]; string gate_67_mode_0 = const()[name = string("gate_67_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_67 = gelu(mode = gate_67_mode_0, x = gate_65)[name = string("gate_67")]; tensor input_337 = mul(x = gate_67, y = up_33)[name = string("input_337")]; string mlp_out_65_pad_type_0 = const()[name = string("mlp_out_65_pad_type_0"), val = string("valid")]; tensor mlp_out_65_strides_0 = const()[name = string("mlp_out_65_strides_0"), val = tensor([1, 1])]; tensor mlp_out_65_pad_0 = const()[name = string("mlp_out_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_65_dilations_0 = const()[name = string("mlp_out_65_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_65_groups_0 = const()[name = string("mlp_out_65_groups_0"), val = int32(1)]; tensor mlp_out_65 = conv(dilations = mlp_out_65_dilations_0, groups = mlp_out_65_groups_0, pad = mlp_out_65_pad_0, pad_type = mlp_out_65_pad_type_0, strides = mlp_out_65_strides_0, weight = layers_16_mlp_down_proj_weight, x = input_337)[name = string("mlp_out_65")]; tensor var_9189_axes_0 = const()[name = string("op_9189_axes_0"), val = tensor([2])]; tensor var_9189 = squeeze(axes = var_9189_axes_0, x = mlp_out_65)[name = string("op_9189")]; tensor var_9193 = const()[name = string("op_9193"), val = tensor([0, 2, 1])]; int32 var_9200 = const()[name = string("op_9200"), val = int32(-1)]; fp16 const_236_promoted_to_fp16 = const()[name = string("const_236_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_271 = transpose(perm = var_9193, x = var_9189)[name = string("transpose_11")]; tensor var_9206_cast_fp16 = mul(x = x_271, y = const_236_promoted_to_fp16)[name = string("op_9206_cast_fp16")]; bool input_339_interleave_0 = const()[name = string("input_339_interleave_0"), val = bool(false)]; tensor input_339_cast_fp16 = concat(axis = var_9200, interleave = input_339_interleave_0, values = (x_271, var_9206_cast_fp16))[name = string("input_339_cast_fp16")]; tensor normed_473_axes_0 = const()[name = string("normed_473_axes_0"), val = tensor([-1])]; fp16 var_9198_to_fp16 = const()[name = string("op_9198_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_473_cast_fp16 = layer_norm(axes = normed_473_axes_0, epsilon = var_9198_to_fp16, x = input_339_cast_fp16)[name = string("normed_473_cast_fp16")]; tensor var_9211_split_sizes_0 = const()[name = string("op_9211_split_sizes_0"), val = tensor([640, 640])]; int32 var_9211_axis_0 = const()[name = string("op_9211_axis_0"), val = int32(-1)]; tensor var_9211_cast_fp16_0, tensor var_9211_cast_fp16_1 = split(axis = var_9211_axis_0, split_sizes = var_9211_split_sizes_0, x = normed_473_cast_fp16)[name = string("op_9211_cast_fp16")]; tensor var_9215_to_fp16 = const()[name = string("op_9215_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543280384)))]; tensor out_203_cast_fp16 = mul(x = var_9211_cast_fp16_0, y = var_9215_to_fp16)[name = string("out_203_cast_fp16")]; tensor x_273_cast_fp16 = add(x = x_267_cast_fp16, y = out_203_cast_fp16)[name = string("x_273_cast_fp16")]; int32 var_9229 = const()[name = string("op_9229"), val = int32(-1)]; fp16 const_238_promoted_to_fp16 = const()[name = string("const_238_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9235_cast_fp16 = mul(x = x_273_cast_fp16, y = const_238_promoted_to_fp16)[name = string("op_9235_cast_fp16")]; bool input_341_interleave_0 = const()[name = string("input_341_interleave_0"), val = bool(false)]; tensor input_341_cast_fp16 = concat(axis = var_9229, interleave = input_341_interleave_0, values = (x_273_cast_fp16, var_9235_cast_fp16))[name = string("input_341_cast_fp16")]; tensor normed_477_axes_0 = const()[name = string("normed_477_axes_0"), val = tensor([-1])]; fp16 var_9227_to_fp16 = const()[name = string("op_9227_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_477_cast_fp16 = layer_norm(axes = normed_477_axes_0, epsilon = var_9227_to_fp16, x = input_341_cast_fp16)[name = string("normed_477_cast_fp16")]; tensor var_9240_split_sizes_0 = const()[name = string("op_9240_split_sizes_0"), val = tensor([640, 640])]; int32 var_9240_axis_0 = const()[name = string("op_9240_axis_0"), val = int32(-1)]; tensor var_9240_cast_fp16_0, tensor var_9240_cast_fp16_1 = split(axis = var_9240_axis_0, split_sizes = var_9240_split_sizes_0, x = normed_477_cast_fp16)[name = string("op_9240_cast_fp16")]; tensor var_9244_to_fp16 = const()[name = string("op_9244_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543281728)))]; tensor out_205_cast_fp16 = mul(x = var_9240_cast_fp16_0, y = var_9244_to_fp16)[name = string("out_205_cast_fp16")]; tensor var_9258 = const()[name = string("op_9258"), val = tensor([0, 2, 1])]; tensor input_343_axes_0 = const()[name = string("input_343_axes_0"), val = tensor([2])]; tensor var_9259 = transpose(perm = var_9258, x = out_205_cast_fp16)[name = string("transpose_10")]; tensor input_343 = expand_dims(axes = input_343_axes_0, x = var_9259)[name = string("input_343")]; string var_9272_pad_type_0 = const()[name = string("op_9272_pad_type_0"), val = string("valid")]; tensor var_9272_strides_0 = const()[name = string("op_9272_strides_0"), val = tensor([1, 1])]; tensor var_9272_pad_0 = const()[name = string("op_9272_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9272_dilations_0 = const()[name = string("op_9272_dilations_0"), val = tensor([1, 1])]; int32 var_9272_groups_0 = const()[name = string("op_9272_groups_0"), val = int32(1)]; tensor var_9272 = conv(dilations = var_9272_dilations_0, groups = var_9272_groups_0, pad = var_9272_pad_0, pad_type = var_9272_pad_type_0, strides = var_9272_strides_0, weight = layers_17_self_attn_q_proj_weight, x = input_343)[name = string("op_9272")]; tensor var_9277 = const()[name = string("op_9277"), val = tensor([1, 4, 256, 1])]; tensor var_9278 = reshape(shape = var_9277, x = var_9272)[name = string("op_9278")]; tensor var_9283 = const()[name = string("op_9283"), val = tensor([0, 1, 3, 2])]; tensor var_9288 = const()[name = string("op_9288"), val = tensor([1, 4, 256])]; tensor q_103 = transpose(perm = var_9283, x = var_9278)[name = string("transpose_9")]; tensor x_277 = reshape(shape = var_9288, x = q_103)[name = string("x_277")]; int32 var_9295 = const()[name = string("op_9295"), val = int32(-1)]; fp16 const_240_promoted_to_fp16 = const()[name = string("const_240_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9301_cast_fp16 = mul(x = x_277, y = const_240_promoted_to_fp16)[name = string("op_9301_cast_fp16")]; bool input_345_interleave_0 = const()[name = string("input_345_interleave_0"), val = bool(false)]; tensor input_345_cast_fp16 = concat(axis = var_9295, interleave = input_345_interleave_0, values = (x_277, var_9301_cast_fp16))[name = string("input_345_cast_fp16")]; tensor normed_483_axes_0 = const()[name = string("normed_483_axes_0"), val = tensor([-1])]; fp16 var_9293_to_fp16 = const()[name = string("op_9293_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_483_cast_fp16 = layer_norm(axes = normed_483_axes_0, epsilon = var_9293_to_fp16, x = input_345_cast_fp16)[name = string("normed_483_cast_fp16")]; tensor var_9306_split_sizes_0 = const()[name = string("op_9306_split_sizes_0"), val = tensor([256, 256])]; int32 var_9306_axis_0 = const()[name = string("op_9306_axis_0"), val = int32(-1)]; tensor var_9306_cast_fp16_0, tensor var_9306_cast_fp16_1 = split(axis = var_9306_axis_0, split_sizes = var_9306_split_sizes_0, x = normed_483_cast_fp16)[name = string("op_9306_cast_fp16")]; tensor var_9310_to_fp16 = const()[name = string("op_9310_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543283072)))]; tensor out_207_cast_fp16 = mul(x = var_9306_cast_fp16_0, y = var_9310_to_fp16)[name = string("out_207_cast_fp16")]; tensor var_9317 = const()[name = string("op_9317"), val = tensor([1, 4, 1, 256])]; tensor q_105 = reshape(shape = var_9317, x = out_207_cast_fp16)[name = string("q_105")]; string var_9329_pad_type_0 = const()[name = string("op_9329_pad_type_0"), val = string("valid")]; tensor var_9329_strides_0 = const()[name = string("op_9329_strides_0"), val = tensor([1, 1])]; tensor var_9329_pad_0 = const()[name = string("op_9329_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9329_dilations_0 = const()[name = string("op_9329_dilations_0"), val = tensor([1, 1])]; int32 var_9329_groups_0 = const()[name = string("op_9329_groups_0"), val = int32(1)]; tensor var_9329 = conv(dilations = var_9329_dilations_0, groups = var_9329_groups_0, pad = var_9329_pad_0, pad_type = var_9329_pad_type_0, strides = var_9329_strides_0, weight = layers_17_self_attn_k_proj_weight, x = input_343)[name = string("op_9329")]; tensor var_9334 = const()[name = string("op_9334"), val = tensor([1, 1, 256, 1])]; tensor var_9335 = reshape(shape = var_9334, x = var_9329)[name = string("op_9335")]; tensor var_9340 = const()[name = string("op_9340"), val = tensor([0, 1, 3, 2])]; tensor var_9345 = const()[name = string("op_9345"), val = tensor([1, 1, 256])]; tensor k_103 = transpose(perm = var_9340, x = var_9335)[name = string("transpose_8")]; tensor x_279 = reshape(shape = var_9345, x = k_103)[name = string("x_279")]; int32 var_9352 = const()[name = string("op_9352"), val = int32(-1)]; fp16 const_242_promoted_to_fp16 = const()[name = string("const_242_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9358_cast_fp16 = mul(x = x_279, y = const_242_promoted_to_fp16)[name = string("op_9358_cast_fp16")]; bool input_347_interleave_0 = const()[name = string("input_347_interleave_0"), val = bool(false)]; tensor input_347_cast_fp16 = concat(axis = var_9352, interleave = input_347_interleave_0, values = (x_279, var_9358_cast_fp16))[name = string("input_347_cast_fp16")]; tensor normed_487_axes_0 = const()[name = string("normed_487_axes_0"), val = tensor([-1])]; fp16 var_9350_to_fp16 = const()[name = string("op_9350_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_487_cast_fp16 = layer_norm(axes = normed_487_axes_0, epsilon = var_9350_to_fp16, x = input_347_cast_fp16)[name = string("normed_487_cast_fp16")]; tensor var_9363_split_sizes_0 = const()[name = string("op_9363_split_sizes_0"), val = tensor([256, 256])]; int32 var_9363_axis_0 = const()[name = string("op_9363_axis_0"), val = int32(-1)]; tensor var_9363_cast_fp16_0, tensor var_9363_cast_fp16_1 = split(axis = var_9363_axis_0, split_sizes = var_9363_split_sizes_0, x = normed_487_cast_fp16)[name = string("op_9363_cast_fp16")]; tensor var_9367_to_fp16 = const()[name = string("op_9367_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543283648)))]; tensor out_209_cast_fp16 = mul(x = var_9363_cast_fp16_0, y = var_9367_to_fp16)[name = string("out_209_cast_fp16")]; tensor var_9374 = const()[name = string("op_9374"), val = tensor([1, 1, 1, 256])]; tensor k_105 = reshape(shape = var_9374, x = out_209_cast_fp16)[name = string("k_105")]; string var_9386_pad_type_0 = const()[name = string("op_9386_pad_type_0"), val = string("valid")]; tensor var_9386_strides_0 = const()[name = string("op_9386_strides_0"), val = tensor([1, 1])]; tensor var_9386_pad_0 = const()[name = string("op_9386_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9386_dilations_0 = const()[name = string("op_9386_dilations_0"), val = tensor([1, 1])]; int32 var_9386_groups_0 = const()[name = string("op_9386_groups_0"), val = int32(1)]; tensor var_9386 = conv(dilations = var_9386_dilations_0, groups = var_9386_groups_0, pad = var_9386_pad_0, pad_type = var_9386_pad_type_0, strides = var_9386_strides_0, weight = layers_17_self_attn_v_proj_weight, x = input_343)[name = string("op_9386")]; tensor var_9391 = const()[name = string("op_9391"), val = tensor([1, 1, 256, 1])]; tensor var_9392 = reshape(shape = var_9391, x = var_9386)[name = string("op_9392")]; tensor var_9397 = const()[name = string("op_9397"), val = tensor([0, 1, 3, 2])]; tensor var_9399 = mul(x = q_105, y = cos)[name = string("op_9399")]; tensor var_9400_split_sizes_0 = const()[name = string("op_9400_split_sizes_0"), val = tensor([128, 128])]; int32 var_9400_axis_0 = const()[name = string("op_9400_axis_0"), val = int32(-1)]; tensor var_9400_0, tensor var_9400_1 = split(axis = var_9400_axis_0, split_sizes = var_9400_split_sizes_0, x = q_105)[name = string("op_9400")]; fp16 const_244_promoted = const()[name = string("const_244_promoted"), val = fp16(-0x1p+0)]; tensor var_9402 = mul(x = var_9400_1, y = const_244_promoted)[name = string("op_9402")]; int32 var_9404 = const()[name = string("op_9404"), val = int32(-1)]; bool var_9405_interleave_0 = const()[name = string("op_9405_interleave_0"), val = bool(false)]; tensor var_9405 = concat(axis = var_9404, interleave = var_9405_interleave_0, values = (var_9402, var_9400_0))[name = string("op_9405")]; tensor var_9406 = mul(x = var_9405, y = sin)[name = string("op_9406")]; tensor q = add(x = var_9399, y = var_9406)[name = string("q")]; tensor var_9409 = mul(x = k_105, y = cos)[name = string("op_9409")]; tensor var_9410_split_sizes_0 = const()[name = string("op_9410_split_sizes_0"), val = tensor([128, 128])]; int32 var_9410_axis_0 = const()[name = string("op_9410_axis_0"), val = int32(-1)]; tensor var_9410_0, tensor var_9410_1 = split(axis = var_9410_axis_0, split_sizes = var_9410_split_sizes_0, x = k_105)[name = string("op_9410")]; fp16 const_245_promoted = const()[name = string("const_245_promoted"), val = fp16(-0x1p+0)]; tensor var_9412 = mul(x = var_9410_1, y = const_245_promoted)[name = string("op_9412")]; int32 var_9414 = const()[name = string("op_9414"), val = int32(-1)]; bool var_9415_interleave_0 = const()[name = string("op_9415_interleave_0"), val = bool(false)]; tensor var_9415 = concat(axis = var_9414, interleave = var_9415_interleave_0, values = (var_9412, var_9410_0))[name = string("op_9415")]; tensor var_9416 = mul(x = var_9415, y = sin)[name = string("op_9416")]; tensor k = add(x = var_9409, y = var_9416)[name = string("k")]; tensor var_9421_begin_0 = const()[name = string("op_9421_begin_0"), val = tensor([17, 0, 0, 0])]; tensor var_9421_end_0 = const()[name = string("op_9421_end_0"), val = tensor([18, 1, 2048, 256])]; tensor var_9421_end_mask_0 = const()[name = string("op_9421_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_9421_squeeze_mask_0 = const()[name = string("op_9421_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_9421_cast_fp16 = slice_by_index(begin = var_9421_begin_0, end = var_9421_end_0, end_mask = var_9421_end_mask_0, squeeze_mask = var_9421_squeeze_mask_0, x = coreml_update_state_69)[name = string("op_9421_cast_fp16")]; tensor K_cache_axes_0 = const()[name = string("K_cache_axes_0"), val = tensor([0])]; tensor K_cache_cast_fp16 = expand_dims(axes = K_cache_axes_0, x = var_9421_cast_fp16)[name = string("K_cache_cast_fp16")]; tensor var_9426_begin_0 = const()[name = string("op_9426_begin_0"), val = tensor([35, 0, 0, 0])]; tensor var_9426_end_0 = const()[name = string("op_9426_end_0"), val = tensor([36, 1, 2048, 256])]; tensor var_9426_end_mask_0 = const()[name = string("op_9426_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_9426_squeeze_mask_0 = const()[name = string("op_9426_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_9426_cast_fp16 = slice_by_index(begin = var_9426_begin_0, end = var_9426_end_0, end_mask = var_9426_end_mask_0, squeeze_mask = var_9426_squeeze_mask_0, x = coreml_update_state_69)[name = string("op_9426_cast_fp16")]; tensor V_cache_axes_0 = const()[name = string("V_cache_axes_0"), val = tensor([0])]; tensor V_cache_cast_fp16 = expand_dims(axes = V_cache_axes_0, x = var_9426_cast_fp16)[name = string("V_cache_cast_fp16")]; tensor k_broadcast_reps_0 = const()[name = string("k_broadcast_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor k_broadcast = tile(reps = k_broadcast_reps_0, x = k)[name = string("k_broadcast")]; tensor v_broadcast_reps_0 = const()[name = string("v_broadcast_reps_0"), val = tensor([1, 1, 2048, 1])]; tensor v = transpose(perm = var_9397, x = var_9392)[name = string("transpose_7")]; tensor v_broadcast = tile(reps = v_broadcast_reps_0, x = v)[name = string("v_broadcast")]; tensor var_9434_cast_fp16 = mul(x = K_cache_cast_fp16, y = var_1120_cast_fp16)[name = string("op_9434_cast_fp16")]; tensor var_9435_cast_fp16 = mul(x = k_broadcast, y = update_mask)[name = string("op_9435_cast_fp16")]; tensor K_new_cast_fp16 = add(x = var_9434_cast_fp16, y = var_9435_cast_fp16)[name = string("K_new_cast_fp16")]; tensor var_9441_cast_fp16 = mul(x = V_cache_cast_fp16, y = var_1120_cast_fp16)[name = string("op_9441_cast_fp16")]; tensor var_9442_cast_fp16 = mul(x = v_broadcast, y = update_mask)[name = string("op_9442_cast_fp16")]; tensor V_new_cast_fp16 = add(x = var_9441_cast_fp16, y = var_9442_cast_fp16)[name = string("V_new_cast_fp16")]; tensor var_9446_axes_0 = const()[name = string("op_9446_axes_0"), val = tensor([0])]; tensor var_9446_cast_fp16 = squeeze(axes = var_9446_axes_0, x = K_new_cast_fp16)[name = string("op_9446_cast_fp16")]; tensor concat_68 = const()[name = string("concat_68"), val = tensor([17, 0, 0, 0])]; tensor concat_69 = const()[name = string("concat_69"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_35_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_35_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_35_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_35_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_35_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_35_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_35_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_35_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_35_cast_fp16 = slice_update(begin = concat_68, begin_mask = kv_cache_0_internal_tensor_assign_35_begin_mask_0, end = concat_69, end_mask = kv_cache_0_internal_tensor_assign_35_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_35_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_35_stride_0, update = var_9446_cast_fp16, x = coreml_update_state_69)[name = string("kv_cache_0_internal_tensor_assign_35_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_35_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_70_write_state")]; tensor coreml_update_state_70 = read_state(input = kv_cache_0)[name = string("coreml_update_state_70")]; tensor var_9453_axes_0 = const()[name = string("op_9453_axes_0"), val = tensor([0])]; tensor var_9453_cast_fp16 = squeeze(axes = var_9453_axes_0, x = V_new_cast_fp16)[name = string("op_9453_cast_fp16")]; tensor concat_70 = const()[name = string("concat_70"), val = tensor([35, 0, 0, 0])]; tensor concat_71 = const()[name = string("concat_71"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_36_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_36_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_36_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_36_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_36_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_36_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_36_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_36_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_36_cast_fp16 = slice_update(begin = concat_70, begin_mask = kv_cache_0_internal_tensor_assign_36_begin_mask_0, end = concat_71, end_mask = kv_cache_0_internal_tensor_assign_36_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_36_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_36_stride_0, update = var_9453_cast_fp16, x = coreml_update_state_70)[name = string("kv_cache_0_internal_tensor_assign_36_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_36_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_71_write_state")]; tensor hidden_states_139_axes_0 = const()[name = string("hidden_states_139_axes_0"), val = tensor([2])]; tensor hidden_states_139_cast_fp16 = expand_dims(axes = hidden_states_139_axes_0, x = K_new_cast_fp16)[name = string("hidden_states_139_cast_fp16")]; tensor var_9466 = const()[name = string("op_9466"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_141_cast_fp16 = tile(reps = var_9466, x = hidden_states_139_cast_fp16)[name = string("hidden_states_141_cast_fp16")]; tensor var_9472 = const()[name = string("op_9472"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_cast_fp16 = reshape(shape = var_9472, x = hidden_states_141_cast_fp16)[name = string("K_expanded_cast_fp16")]; tensor hidden_states_143_axes_0 = const()[name = string("hidden_states_143_axes_0"), val = tensor([2])]; tensor hidden_states_143_cast_fp16 = expand_dims(axes = hidden_states_143_axes_0, x = V_new_cast_fp16)[name = string("hidden_states_143_cast_fp16")]; tensor var_9481 = const()[name = string("op_9481"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_145_cast_fp16 = tile(reps = var_9481, x = hidden_states_143_cast_fp16)[name = string("hidden_states_145_cast_fp16")]; tensor var_9487 = const()[name = string("op_9487"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_cast_fp16 = reshape(shape = var_9487, x = hidden_states_145_cast_fp16)[name = string("V_expanded_cast_fp16")]; bool var_9502_transpose_x_1 = const()[name = string("op_9502_transpose_x_1"), val = bool(false)]; bool var_9502_transpose_y_1 = const()[name = string("op_9502_transpose_y_1"), val = bool(true)]; tensor var_9502_cast_fp16 = matmul(transpose_x = var_9502_transpose_x_1, transpose_y = var_9502_transpose_y_1, x = q, y = K_expanded_cast_fp16)[name = string("op_9502_cast_fp16")]; fp16 var_9503_to_fp16 = const()[name = string("op_9503_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_103_cast_fp16 = mul(x = var_9502_cast_fp16, y = var_9503_to_fp16)[name = string("attn_weights_103_cast_fp16")]; tensor attn_weights_105_cast_fp16 = add(x = attn_weights_103_cast_fp16, y = causal_mask)[name = string("attn_weights_105_cast_fp16")]; int32 var_9512 = const()[name = string("op_9512"), val = int32(-1)]; tensor var_9514_cast_fp16 = softmax(axis = var_9512, x = attn_weights_105_cast_fp16)[name = string("op_9514_cast_fp16")]; bool var_9530_transpose_x_0 = const()[name = string("op_9530_transpose_x_0"), val = bool(false)]; bool var_9530_transpose_y_0 = const()[name = string("op_9530_transpose_y_0"), val = bool(false)]; tensor var_9530_cast_fp16 = matmul(transpose_x = var_9530_transpose_x_0, transpose_y = var_9530_transpose_y_0, x = var_9514_cast_fp16, y = V_expanded_cast_fp16)[name = string("op_9530_cast_fp16")]; tensor var_9540 = const()[name = string("op_9540"), val = tensor([0, 2, 1, 3])]; tensor var_9547 = const()[name = string("op_9547"), val = tensor([1, 1, -1])]; tensor var_9541 = transpose(perm = var_9540, x = var_9530_cast_fp16)[name = string("transpose_6")]; tensor attn_output_105 = reshape(shape = var_9547, x = var_9541)[name = string("attn_output_105")]; tensor var_9552 = const()[name = string("op_9552"), val = tensor([0, 2, 1])]; tensor squeeze_17 = const()[name = string("squeeze_17"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543284224)))]; string var_9568_pad_type_0 = const()[name = string("op_9568_pad_type_0"), val = string("valid")]; int32 var_9568_groups_0 = const()[name = string("op_9568_groups_0"), val = int32(1)]; tensor var_9568_strides_0 = const()[name = string("op_9568_strides_0"), val = tensor([1])]; tensor var_9568_pad_0 = const()[name = string("op_9568_pad_0"), val = tensor([0, 0])]; tensor var_9568_dilations_0 = const()[name = string("op_9568_dilations_0"), val = tensor([1])]; tensor var_9553 = transpose(perm = var_9552, x = attn_output_105)[name = string("transpose_5")]; tensor var_9568 = conv(dilations = var_9568_dilations_0, groups = var_9568_groups_0, pad = var_9568_pad_0, pad_type = var_9568_pad_type_0, strides = var_9568_strides_0, weight = squeeze_17, x = var_9553)[name = string("op_9568")]; tensor var_9572 = const()[name = string("op_9572"), val = tensor([0, 2, 1])]; int32 var_9579 = const()[name = string("op_9579"), val = int32(-1)]; fp16 const_246_promoted_to_fp16 = const()[name = string("const_246_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_281 = transpose(perm = var_9572, x = var_9568)[name = string("transpose_4")]; tensor var_9585_cast_fp16 = mul(x = x_281, y = const_246_promoted_to_fp16)[name = string("op_9585_cast_fp16")]; bool input_351_interleave_0 = const()[name = string("input_351_interleave_0"), val = bool(false)]; tensor input_351_cast_fp16 = concat(axis = var_9579, interleave = input_351_interleave_0, values = (x_281, var_9585_cast_fp16))[name = string("input_351_cast_fp16")]; tensor normed_491_axes_0 = const()[name = string("normed_491_axes_0"), val = tensor([-1])]; fp16 var_9577_to_fp16 = const()[name = string("op_9577_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_491_cast_fp16 = layer_norm(axes = normed_491_axes_0, epsilon = var_9577_to_fp16, x = input_351_cast_fp16)[name = string("normed_491_cast_fp16")]; tensor var_9590_split_sizes_0 = const()[name = string("op_9590_split_sizes_0"), val = tensor([640, 640])]; int32 var_9590_axis_0 = const()[name = string("op_9590_axis_0"), val = int32(-1)]; tensor var_9590_cast_fp16_0, tensor var_9590_cast_fp16_1 = split(axis = var_9590_axis_0, split_sizes = var_9590_split_sizes_0, x = normed_491_cast_fp16)[name = string("op_9590_cast_fp16")]; tensor var_9594_to_fp16 = const()[name = string("op_9594_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(544595008)))]; tensor out_211_cast_fp16 = mul(x = var_9590_cast_fp16_0, y = var_9594_to_fp16)[name = string("out_211_cast_fp16")]; tensor x_283_cast_fp16 = add(x = x_273_cast_fp16, y = out_211_cast_fp16)[name = string("x_283_cast_fp16")]; int32 var_9608 = const()[name = string("op_9608"), val = int32(-1)]; fp16 const_248_promoted_to_fp16 = const()[name = string("const_248_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9614_cast_fp16 = mul(x = x_283_cast_fp16, y = const_248_promoted_to_fp16)[name = string("op_9614_cast_fp16")]; bool input_353_interleave_0 = const()[name = string("input_353_interleave_0"), val = bool(false)]; tensor input_353_cast_fp16 = concat(axis = var_9608, interleave = input_353_interleave_0, values = (x_283_cast_fp16, var_9614_cast_fp16))[name = string("input_353_cast_fp16")]; tensor normed_495_axes_0 = const()[name = string("normed_495_axes_0"), val = tensor([-1])]; fp16 var_9606_to_fp16 = const()[name = string("op_9606_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_495_cast_fp16 = layer_norm(axes = normed_495_axes_0, epsilon = var_9606_to_fp16, x = input_353_cast_fp16)[name = string("normed_495_cast_fp16")]; tensor var_9619_split_sizes_0 = const()[name = string("op_9619_split_sizes_0"), val = tensor([640, 640])]; int32 var_9619_axis_0 = const()[name = string("op_9619_axis_0"), val = int32(-1)]; tensor var_9619_cast_fp16_0, tensor var_9619_cast_fp16_1 = split(axis = var_9619_axis_0, split_sizes = var_9619_split_sizes_0, x = normed_495_cast_fp16)[name = string("op_9619_cast_fp16")]; tensor var_9623_to_fp16 = const()[name = string("op_9623_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(544596352)))]; tensor out_213_cast_fp16 = mul(x = var_9619_cast_fp16_0, y = var_9623_to_fp16)[name = string("out_213_cast_fp16")]; tensor var_9637 = const()[name = string("op_9637"), val = tensor([0, 2, 1])]; tensor input_355_axes_0 = const()[name = string("input_355_axes_0"), val = tensor([2])]; tensor var_9638 = transpose(perm = var_9637, x = out_213_cast_fp16)[name = string("transpose_3")]; tensor input_355 = expand_dims(axes = input_355_axes_0, x = var_9638)[name = string("input_355")]; string gate_69_pad_type_0 = const()[name = string("gate_69_pad_type_0"), val = string("valid")]; tensor gate_69_strides_0 = const()[name = string("gate_69_strides_0"), val = tensor([1, 1])]; tensor gate_69_pad_0 = const()[name = string("gate_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_69_dilations_0 = const()[name = string("gate_69_dilations_0"), val = tensor([1, 1])]; int32 gate_69_groups_0 = const()[name = string("gate_69_groups_0"), val = int32(1)]; tensor gate_69 = conv(dilations = gate_69_dilations_0, groups = gate_69_groups_0, pad = gate_69_pad_0, pad_type = gate_69_pad_type_0, strides = gate_69_strides_0, weight = layers_17_mlp_gate_proj_weight, x = input_355)[name = string("gate_69")]; string up_pad_type_0 = const()[name = string("up_pad_type_0"), val = string("valid")]; tensor up_strides_0 = const()[name = string("up_strides_0"), val = tensor([1, 1])]; tensor up_pad_0 = const()[name = string("up_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_dilations_0 = const()[name = string("up_dilations_0"), val = tensor([1, 1])]; int32 up_groups_0 = const()[name = string("up_groups_0"), val = int32(1)]; tensor up = conv(dilations = up_dilations_0, groups = up_groups_0, pad = up_pad_0, pad_type = up_pad_type_0, strides = up_strides_0, weight = layers_17_mlp_up_proj_weight, x = input_355)[name = string("up")]; string gate_mode_0 = const()[name = string("gate_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate = gelu(mode = gate_mode_0, x = gate_69)[name = string("gate")]; tensor input_357 = mul(x = gate, y = up)[name = string("input_357")]; string mlp_out_69_pad_type_0 = const()[name = string("mlp_out_69_pad_type_0"), val = string("valid")]; tensor mlp_out_69_strides_0 = const()[name = string("mlp_out_69_strides_0"), val = tensor([1, 1])]; tensor mlp_out_69_pad_0 = const()[name = string("mlp_out_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_69_dilations_0 = const()[name = string("mlp_out_69_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_69_groups_0 = const()[name = string("mlp_out_69_groups_0"), val = int32(1)]; tensor mlp_out_69 = conv(dilations = mlp_out_69_dilations_0, groups = mlp_out_69_groups_0, pad = mlp_out_69_pad_0, pad_type = mlp_out_69_pad_type_0, strides = mlp_out_69_strides_0, weight = layers_17_mlp_down_proj_weight, x = input_357)[name = string("mlp_out_69")]; tensor var_9678_axes_0 = const()[name = string("op_9678_axes_0"), val = tensor([2])]; tensor var_9678 = squeeze(axes = var_9678_axes_0, x = mlp_out_69)[name = string("op_9678")]; tensor var_9682 = const()[name = string("op_9682"), val = tensor([0, 2, 1])]; int32 var_9689 = const()[name = string("op_9689"), val = int32(-1)]; fp16 const_250_promoted_to_fp16 = const()[name = string("const_250_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_287 = transpose(perm = var_9682, x = var_9678)[name = string("transpose_2")]; tensor var_9695_cast_fp16 = mul(x = x_287, y = const_250_promoted_to_fp16)[name = string("op_9695_cast_fp16")]; bool input_359_interleave_0 = const()[name = string("input_359_interleave_0"), val = bool(false)]; tensor input_359_cast_fp16 = concat(axis = var_9689, interleave = input_359_interleave_0, values = (x_287, var_9695_cast_fp16))[name = string("input_359_cast_fp16")]; tensor normed_501_axes_0 = const()[name = string("normed_501_axes_0"), val = tensor([-1])]; fp16 var_9687_to_fp16 = const()[name = string("op_9687_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_501_cast_fp16 = layer_norm(axes = normed_501_axes_0, epsilon = var_9687_to_fp16, x = input_359_cast_fp16)[name = string("normed_501_cast_fp16")]; tensor var_9700_split_sizes_0 = const()[name = string("op_9700_split_sizes_0"), val = tensor([640, 640])]; int32 var_9700_axis_0 = const()[name = string("op_9700_axis_0"), val = int32(-1)]; tensor var_9700_cast_fp16_0, tensor var_9700_cast_fp16_1 = split(axis = var_9700_axis_0, split_sizes = var_9700_split_sizes_0, x = normed_501_cast_fp16)[name = string("op_9700_cast_fp16")]; tensor var_9704_to_fp16 = const()[name = string("op_9704_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(544597696)))]; tensor out_215_cast_fp16 = mul(x = var_9700_cast_fp16_0, y = var_9704_to_fp16)[name = string("out_215_cast_fp16")]; tensor x_289_cast_fp16 = add(x = x_283_cast_fp16, y = out_215_cast_fp16)[name = string("x_289_cast_fp16")]; int32 var_9718 = const()[name = string("op_9718"), val = int32(-1)]; fp16 const_252_promoted_to_fp16 = const()[name = string("const_252_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9724_cast_fp16 = mul(x = x_289_cast_fp16, y = const_252_promoted_to_fp16)[name = string("op_9724_cast_fp16")]; bool input_361_interleave_0 = const()[name = string("input_361_interleave_0"), val = bool(false)]; tensor input_361_cast_fp16 = concat(axis = var_9718, interleave = input_361_interleave_0, values = (x_289_cast_fp16, var_9724_cast_fp16))[name = string("input_361_cast_fp16")]; tensor normed_505_axes_0 = const()[name = string("normed_505_axes_0"), val = tensor([-1])]; fp16 var_9716_to_fp16 = const()[name = string("op_9716_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_505_cast_fp16 = layer_norm(axes = normed_505_axes_0, epsilon = var_9716_to_fp16, x = input_361_cast_fp16)[name = string("normed_505_cast_fp16")]; tensor var_9729_split_sizes_0 = const()[name = string("op_9729_split_sizes_0"), val = tensor([640, 640])]; int32 var_9729_axis_0 = const()[name = string("op_9729_axis_0"), val = int32(-1)]; tensor var_9729_cast_fp16_0, tensor var_9729_cast_fp16_1 = split(axis = var_9729_axis_0, split_sizes = var_9729_split_sizes_0, x = normed_505_cast_fp16)[name = string("op_9729_cast_fp16")]; tensor var_9733_to_fp16 = const()[name = string("op_9733_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(544599040)))]; tensor out_cast_fp16 = mul(x = var_9729_cast_fp16_0, y = var_9733_to_fp16)[name = string("out_cast_fp16")]; tensor var_9744 = const()[name = string("op_9744"), val = tensor([0, 2, 1])]; tensor squeeze_18 = const()[name = string("squeeze_18"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(544600384)))]; string var_9760_pad_type_0 = const()[name = string("op_9760_pad_type_0"), val = string("valid")]; int32 var_9760_groups_0 = const()[name = string("op_9760_groups_0"), val = int32(1)]; tensor var_9760_strides_0 = const()[name = string("op_9760_strides_0"), val = tensor([1])]; tensor var_9760_pad_0 = const()[name = string("op_9760_pad_0"), val = tensor([0, 0])]; tensor var_9760_dilations_0 = const()[name = string("op_9760_dilations_0"), val = tensor([1])]; tensor var_9745 = transpose(perm = var_9744, x = out_cast_fp16)[name = string("transpose_1")]; tensor var_9760 = conv(dilations = var_9760_dilations_0, groups = var_9760_groups_0, pad = var_9760_pad_0, pad_type = var_9760_pad_type_0, strides = var_9760_strides_0, weight = squeeze_18, x = var_9745)[name = string("op_9760")]; tensor var_9764 = const()[name = string("op_9764"), val = tensor([0, 2, 1])]; tensor logits_axes_0 = const()[name = string("logits_axes_0"), val = tensor([0])]; tensor logits_1 = transpose(perm = var_9764, x = var_9760)[name = string("transpose_0")]; tensor logits = squeeze(axes = logits_axes_0, x = logits_1)[name = string("logits")]; int32 var_9769 = const()[name = string("op_9769"), val = int32(-1)]; int32 token_id_axis_0 = const()[name = string("token_id_axis_0"), val = int32(-1)]; bool token_id_keep_dims_0 = const()[name = string("token_id_keep_dims_0"), val = bool(false)]; string token_id_output_dtype_0 = const()[name = string("token_id_output_dtype_0"), val = string("int32")]; tensor token_id = reduce_argmax(axis = token_id_axis_0, keep_dims = token_id_keep_dims_0, output_dtype = token_id_output_dtype_0, x = logits)[name = string("token_id")]; tensor var_9771_axes_0 = const()[name = string("op_9771_axes_0"), val = tensor([-1])]; tensor var_9771 = expand_dims(axes = var_9771_axes_0, x = token_id)[name = string("op_9771")]; bool var_9772_validate_indices_0 = const()[name = string("op_9772_validate_indices_0"), val = bool(false)]; tensor var_9772 = gather_along_axis(axis = var_9769, indices = var_9771, validate_indices = var_9772_validate_indices_0, x = logits)[name = string("op_9772")]; tensor var_9773_axes_0 = const()[name = string("op_9773_axes_0"), val = tensor([-1])]; tensor token_logit = squeeze(axes = var_9773_axes_0, x = var_9772)[name = string("op_9773")]; } -> (token_id, token_logit); }