program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}})] { func main(tensor causal_mask, tensor input_ids, state> kv_cache_0, tensor position_ids, tensor update_mask) { tensor sin_full_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1048704))))[name = string("sin_full_quantized")]; tensor cos_full_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1056960))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2105600))))[name = string("cos_full_quantized")]; tensor sin_sliding_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2113856))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3162496))))[name = string("sin_sliding_quantized")]; tensor cos_sliding_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3170752))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4219392))))[name = string("cos_sliding_quantized")]; tensor layers_0_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4227648))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4883072))))[name = string("layers_0_self_attn_q_proj_weight_quantized")]; tensor layers_0_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4885184))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5049088))))[name = string("layers_0_self_attn_k_proj_weight_quantized")]; tensor layers_0_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5049664))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5213568))))[name = string("layers_0_self_attn_v_proj_weight_quantized")]; tensor layers_0_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5214144))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6524928))))[name = string("layers_0_mlp_gate_proj_weight_quantized")]; tensor layers_0_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6529088))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7839872))))[name = string("layers_0_mlp_up_proj_weight_quantized")]; tensor layers_0_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7844032))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9154816))))[name = string("layers_0_mlp_down_proj_weight_quantized")]; tensor layers_1_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9156160))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9811584))))[name = string("layers_1_self_attn_q_proj_weight_quantized")]; tensor layers_1_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9813696))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9977600))))[name = string("layers_1_self_attn_k_proj_weight_quantized")]; tensor layers_1_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9978176))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10142080))))[name = string("layers_1_self_attn_v_proj_weight_quantized")]; tensor layers_1_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10142656))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11453440))))[name = string("layers_1_mlp_gate_proj_weight_quantized")]; tensor layers_1_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11457600))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12768384))))[name = string("layers_1_mlp_up_proj_weight_quantized")]; tensor layers_1_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12772544))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14083328))))[name = string("layers_1_mlp_down_proj_weight_quantized")]; tensor layers_2_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14084672))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14740096))))[name = string("layers_2_self_attn_q_proj_weight_quantized")]; tensor layers_2_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14742208))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14906112))))[name = string("layers_2_self_attn_k_proj_weight_quantized")]; tensor layers_2_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14906688))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15070592))))[name = string("layers_2_self_attn_v_proj_weight_quantized")]; tensor layers_2_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15071168))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16381952))))[name = string("layers_2_mlp_gate_proj_weight_quantized")]; tensor layers_2_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16386112))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17696896))))[name = string("layers_2_mlp_up_proj_weight_quantized")]; tensor layers_2_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17701056))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19011840))))[name = string("layers_2_mlp_down_proj_weight_quantized")]; tensor layers_3_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19013184))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19668608))))[name = string("layers_3_self_attn_q_proj_weight_quantized")]; tensor layers_3_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19670720))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19834624))))[name = string("layers_3_self_attn_k_proj_weight_quantized")]; tensor layers_3_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19835200))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19999104))))[name = string("layers_3_self_attn_v_proj_weight_quantized")]; tensor layers_3_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19999680))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21310464))))[name = string("layers_3_mlp_gate_proj_weight_quantized")]; tensor layers_3_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21314624))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22625408))))[name = string("layers_3_mlp_up_proj_weight_quantized")]; tensor layers_3_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22629568))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23940352))))[name = string("layers_3_mlp_down_proj_weight_quantized")]; tensor layers_4_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23941696))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24597120))))[name = string("layers_4_self_attn_q_proj_weight_quantized")]; tensor layers_4_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24599232))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24763136))))[name = string("layers_4_self_attn_k_proj_weight_quantized")]; tensor layers_4_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24763712))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24927616))))[name = string("layers_4_self_attn_v_proj_weight_quantized")]; tensor layers_4_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24928192))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26238976))))[name = string("layers_4_mlp_gate_proj_weight_quantized")]; tensor layers_4_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26243136))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27553920))))[name = string("layers_4_mlp_up_proj_weight_quantized")]; tensor layers_4_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27558080))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28868864))))[name = string("layers_4_mlp_down_proj_weight_quantized")]; tensor layers_5_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28870208))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29525632))))[name = string("layers_5_self_attn_q_proj_weight_quantized")]; tensor layers_5_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29527744))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29691648))))[name = string("layers_5_self_attn_k_proj_weight_quantized")]; tensor layers_5_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29692224))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29856128))))[name = string("layers_5_self_attn_v_proj_weight_quantized")]; tensor layers_5_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29856704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31167488))))[name = string("layers_5_mlp_gate_proj_weight_quantized")]; tensor layers_5_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31171648))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32482432))))[name = string("layers_5_mlp_up_proj_weight_quantized")]; tensor layers_5_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32486592))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33797376))))[name = string("layers_5_mlp_down_proj_weight_quantized")]; tensor layers_6_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33798720))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34454144))))[name = string("layers_6_self_attn_q_proj_weight_quantized")]; tensor layers_6_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34456256))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34620160))))[name = string("layers_6_self_attn_k_proj_weight_quantized")]; tensor layers_6_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34620736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34784640))))[name = string("layers_6_self_attn_v_proj_weight_quantized")]; tensor layers_6_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34785216))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36096000))))[name = string("layers_6_mlp_gate_proj_weight_quantized")]; tensor layers_6_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36100160))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37410944))))[name = string("layers_6_mlp_up_proj_weight_quantized")]; tensor layers_6_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37415104))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38725888))))[name = string("layers_6_mlp_down_proj_weight_quantized")]; tensor layers_7_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38727232))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39382656))))[name = string("layers_7_self_attn_q_proj_weight_quantized")]; tensor layers_7_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39384768))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39548672))))[name = string("layers_7_self_attn_k_proj_weight_quantized")]; tensor layers_7_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39549248))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39713152))))[name = string("layers_7_self_attn_v_proj_weight_quantized")]; tensor layers_7_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39713728))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41024512))))[name = string("layers_7_mlp_gate_proj_weight_quantized")]; tensor layers_7_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41028672))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42339456))))[name = string("layers_7_mlp_up_proj_weight_quantized")]; tensor layers_7_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42343616))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43654400))))[name = string("layers_7_mlp_down_proj_weight_quantized")]; tensor layers_8_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43655744))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44311168))))[name = string("layers_8_self_attn_q_proj_weight_quantized")]; tensor layers_8_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44313280))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44477184))))[name = string("layers_8_self_attn_k_proj_weight_quantized")]; tensor layers_8_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44477760))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44641664))))[name = string("layers_8_self_attn_v_proj_weight_quantized")]; tensor layers_8_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44642240))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45953024))))[name = string("layers_8_mlp_gate_proj_weight_quantized")]; tensor layers_8_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45957184))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47267968))))[name = string("layers_8_mlp_up_proj_weight_quantized")]; tensor layers_8_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47272128))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48582912))))[name = string("layers_8_mlp_down_proj_weight_quantized")]; tensor layers_9_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48584256))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49239680))))[name = string("layers_9_self_attn_q_proj_weight_quantized")]; tensor layers_9_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49241792))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49405696))))[name = string("layers_9_self_attn_k_proj_weight_quantized")]; tensor layers_9_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49406272))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49570176))))[name = string("layers_9_self_attn_v_proj_weight_quantized")]; tensor layers_9_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49570752))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50881536))))[name = string("layers_9_mlp_gate_proj_weight_quantized")]; tensor layers_9_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50885696))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52196480))))[name = string("layers_9_mlp_up_proj_weight_quantized")]; tensor layers_9_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52200640))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53511424))))[name = string("layers_9_mlp_down_proj_weight_quantized")]; tensor layers_10_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53512768))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54168192))))[name = string("layers_10_self_attn_q_proj_weight_quantized")]; tensor layers_10_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54170304))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54334208))))[name = string("layers_10_self_attn_k_proj_weight_quantized")]; tensor layers_10_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54334784))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54498688))))[name = string("layers_10_self_attn_v_proj_weight_quantized")]; tensor layers_10_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54499264))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55810048))))[name = string("layers_10_mlp_gate_proj_weight_quantized")]; tensor layers_10_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55814208))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57124992))))[name = string("layers_10_mlp_up_proj_weight_quantized")]; tensor layers_10_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57129152))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58439936))))[name = string("layers_10_mlp_down_proj_weight_quantized")]; tensor layers_11_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58441280))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59096704))))[name = string("layers_11_self_attn_q_proj_weight_quantized")]; tensor layers_11_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59098816))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59262720))))[name = string("layers_11_self_attn_k_proj_weight_quantized")]; tensor layers_11_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59263296))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59427200))))[name = string("layers_11_self_attn_v_proj_weight_quantized")]; tensor layers_11_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59427776))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60738560))))[name = string("layers_11_mlp_gate_proj_weight_quantized")]; tensor layers_11_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60742720))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62053504))))[name = string("layers_11_mlp_up_proj_weight_quantized")]; tensor layers_11_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62057664))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63368448))))[name = string("layers_11_mlp_down_proj_weight_quantized")]; tensor layers_12_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63369792))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64025216))))[name = string("layers_12_self_attn_q_proj_weight_quantized")]; tensor layers_12_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64027328))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64191232))))[name = string("layers_12_self_attn_k_proj_weight_quantized")]; tensor layers_12_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64191808))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64355712))))[name = string("layers_12_self_attn_v_proj_weight_quantized")]; tensor layers_12_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64356288))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65667072))))[name = string("layers_12_mlp_gate_proj_weight_quantized")]; tensor layers_12_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65671232))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66982016))))[name = string("layers_12_mlp_up_proj_weight_quantized")]; tensor layers_12_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66986176))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68296960))))[name = string("layers_12_mlp_down_proj_weight_quantized")]; tensor layers_13_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68298304))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68953728))))[name = string("layers_13_self_attn_q_proj_weight_quantized")]; tensor layers_13_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68955840))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69119744))))[name = string("layers_13_self_attn_k_proj_weight_quantized")]; tensor layers_13_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69120320))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69284224))))[name = string("layers_13_self_attn_v_proj_weight_quantized")]; tensor layers_13_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69284800))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70595584))))[name = string("layers_13_mlp_gate_proj_weight_quantized")]; tensor layers_13_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70599744))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71910528))))[name = string("layers_13_mlp_up_proj_weight_quantized")]; tensor layers_13_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71914688))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73225472))))[name = string("layers_13_mlp_down_proj_weight_quantized")]; tensor layers_14_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73226816))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73882240))))[name = string("layers_14_self_attn_q_proj_weight_quantized")]; tensor layers_14_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73884352))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74048256))))[name = string("layers_14_self_attn_k_proj_weight_quantized")]; tensor layers_14_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74048832))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74212736))))[name = string("layers_14_self_attn_v_proj_weight_quantized")]; tensor layers_14_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74213312))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75524096))))[name = string("layers_14_mlp_gate_proj_weight_quantized")]; tensor layers_14_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75528256))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76839040))))[name = string("layers_14_mlp_up_proj_weight_quantized")]; tensor layers_14_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76843200))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78153984))))[name = string("layers_14_mlp_down_proj_weight_quantized")]; tensor layers_15_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78155328))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78810752))))[name = string("layers_15_self_attn_q_proj_weight_quantized")]; tensor layers_15_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78812864))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78976768))))[name = string("layers_15_self_attn_k_proj_weight_quantized")]; tensor layers_15_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78977344))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79141248))))[name = string("layers_15_self_attn_v_proj_weight_quantized")]; tensor layers_15_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79141824))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80452608))))[name = string("layers_15_mlp_gate_proj_weight_quantized")]; tensor layers_15_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80456768))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81767552))))[name = string("layers_15_mlp_up_proj_weight_quantized")]; tensor layers_15_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81771712))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83082496))))[name = string("layers_15_mlp_down_proj_weight_quantized")]; tensor layers_16_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83083840))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83739264))))[name = string("layers_16_self_attn_q_proj_weight_quantized")]; tensor layers_16_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83741376))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83905280))))[name = string("layers_16_self_attn_k_proj_weight_quantized")]; tensor layers_16_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83905856))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84069760))))[name = string("layers_16_self_attn_v_proj_weight_quantized")]; tensor layers_16_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84070336))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85381120))))[name = string("layers_16_mlp_gate_proj_weight_quantized")]; tensor layers_16_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85385280))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86696064))))[name = string("layers_16_mlp_up_proj_weight_quantized")]; tensor layers_16_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86700224))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88011008))))[name = string("layers_16_mlp_down_proj_weight_quantized")]; tensor layers_17_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88012352))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88667776))))[name = string("layers_17_self_attn_q_proj_weight_quantized")]; tensor layers_17_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88669888))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88833792))))[name = string("layers_17_self_attn_k_proj_weight_quantized")]; tensor layers_17_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88834368))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88998272))))[name = string("layers_17_self_attn_v_proj_weight_quantized")]; tensor layers_17_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88998848))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90309632))))[name = string("layers_17_mlp_gate_proj_weight_quantized")]; tensor layers_17_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90313792))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91624576))))[name = string("layers_17_mlp_up_proj_weight_quantized")]; tensor layers_17_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91628736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92939520))))[name = string("layers_17_mlp_down_proj_weight_quantized")]; int32 var_880_batch_dims_0 = const()[name = string("op_880_batch_dims_0"), val = int32(0)]; bool var_880_validate_indices_0 = const()[name = string("op_880_validate_indices_0"), val = bool(false)]; tensor embed_tokens_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92940864))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260713088))))[name = string("embed_tokens_weight_to_fp16_quantized")]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor greater_equal_0 = greater_equal(x = input_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(262144)]; tensor add_0 = add(x = input_ids, y = slice_by_index_0)[name = string("add_0")]; tensor select_0 = select(a = input_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")]; int32 greater_equal_0_y_0_1 = const()[name = string("greater_equal_0_y_0_1"), val = int32(0)]; tensor greater_equal_0_1 = greater_equal(x = select_0, y = greater_equal_0_y_0_1)[name = string("greater_equal_0_1")]; int32 slice_by_index_0_1 = const()[name = string("slice_by_index_0_1"), val = int32(262144)]; tensor add_0_1 = add(x = select_0, y = slice_by_index_0_1)[name = string("add_0_1")]; tensor select_0_1 = select(a = select_0, b = add_0_1, cond = greater_equal_0_1)[name = string("select_0_1")]; int32 op_880_cast_fp16_axis_0 = const()[name = string("op_880_cast_fp16_axis_0"), val = int32(0)]; tensor op_880_cast_fp16 = gather(axis = op_880_cast_fp16_axis_0, batch_dims = var_880_batch_dims_0, indices = select_0_1, validate_indices = var_880_validate_indices_0, x = embed_tokens_weight_to_fp16_quantized)[name = string("op_880_cast_fp16")]; fp16 var_886_to_fp16 = const()[name = string("op_886_to_fp16"), val = fp16(0x1.94cp+4)]; tensor x_1_cast_fp16 = mul(x = op_880_cast_fp16, y = var_886_to_fp16)[name = string("x_1_cast_fp16")]; int32 var_888 = const()[name = string("op_888"), val = int32(0)]; int32 var_889_batch_dims_0 = const()[name = string("op_889_batch_dims_0"), val = int32(0)]; bool var_889_validate_indices_0 = const()[name = string("op_889_validate_indices_0"), val = bool(false)]; string position_ids_to_uint16_dtype_0 = const()[name = string("position_ids_to_uint16_dtype_0"), val = string("uint16")]; tensor position_ids_to_uint16 = cast(dtype = position_ids_to_uint16_dtype_0, x = position_ids)[name = string("cast_0")]; tensor var_889_cast_uint16 = gather(axis = var_888, batch_dims = var_889_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_889_validate_indices_0, x = cos_sliding_quantized)[name = string("op_889_cast_uint16")]; tensor var_891_axes_0 = const()[name = string("op_891_axes_0"), val = tensor([0])]; tensor var_891 = expand_dims(axes = var_891_axes_0, x = var_889_cast_uint16)[name = string("op_891")]; tensor cos_1_axes_0 = const()[name = string("cos_1_axes_0"), val = tensor([0])]; tensor cos_1 = expand_dims(axes = cos_1_axes_0, x = var_891)[name = string("cos_1")]; int32 var_894 = const()[name = string("op_894"), val = int32(0)]; int32 var_895_batch_dims_0 = const()[name = string("op_895_batch_dims_0"), val = int32(0)]; bool var_895_validate_indices_0 = const()[name = string("op_895_validate_indices_0"), val = bool(false)]; tensor var_895_cast_uint16 = gather(axis = var_894, batch_dims = var_895_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_895_validate_indices_0, x = sin_sliding_quantized)[name = string("op_895_cast_uint16")]; tensor var_897_axes_0 = const()[name = string("op_897_axes_0"), val = tensor([0])]; tensor var_897 = expand_dims(axes = var_897_axes_0, x = var_895_cast_uint16)[name = string("op_897")]; tensor sin_1_axes_0 = const()[name = string("sin_1_axes_0"), val = tensor([0])]; tensor sin_1 = expand_dims(axes = sin_1_axes_0, x = var_897)[name = string("sin_1")]; int32 var_900 = const()[name = string("op_900"), val = int32(0)]; int32 var_901_batch_dims_0 = const()[name = string("op_901_batch_dims_0"), val = int32(0)]; bool var_901_validate_indices_0 = const()[name = string("op_901_validate_indices_0"), val = bool(false)]; tensor var_901_cast_uint16 = gather(axis = var_900, batch_dims = var_901_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_901_validate_indices_0, x = cos_full_quantized)[name = string("op_901_cast_uint16")]; tensor var_903_axes_0 = const()[name = string("op_903_axes_0"), val = tensor([0])]; tensor var_903 = expand_dims(axes = var_903_axes_0, x = var_901_cast_uint16)[name = string("op_903")]; tensor cos_axes_0 = const()[name = string("cos_axes_0"), val = tensor([0])]; tensor cos = expand_dims(axes = cos_axes_0, x = var_903)[name = string("cos")]; int32 var_906 = const()[name = string("op_906"), val = int32(0)]; int32 var_907_batch_dims_0 = const()[name = string("op_907_batch_dims_0"), val = int32(0)]; bool var_907_validate_indices_0 = const()[name = string("op_907_validate_indices_0"), val = bool(false)]; tensor var_907_cast_uint16 = gather(axis = var_906, batch_dims = var_907_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_907_validate_indices_0, x = sin_full_quantized)[name = string("op_907_cast_uint16")]; tensor var_909_axes_0 = const()[name = string("op_909_axes_0"), val = tensor([0])]; tensor var_909 = expand_dims(axes = var_909_axes_0, x = var_907_cast_uint16)[name = string("op_909")]; tensor sin_axes_0 = const()[name = string("sin_axes_0"), val = tensor([0])]; tensor sin = expand_dims(axes = sin_axes_0, x = var_909)[name = string("sin")]; tensor var_916_axes_0 = const()[name = string("op_916_axes_0"), val = tensor([-1])]; bool var_916_keep_dims_0 = const()[name = string("op_916_keep_dims_0"), val = bool(true)]; tensor var_916_cast_fp16 = reduce_sum(axes = var_916_axes_0, keep_dims = var_916_keep_dims_0, x = update_mask)[name = string("op_916_cast_fp16")]; int32 var_921 = const()[name = string("op_921"), val = int32(-1)]; fp16 const_0_promoted_to_fp16 = const()[name = string("const_0_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_927_cast_fp16 = mul(x = x_1_cast_fp16, y = const_0_promoted_to_fp16)[name = string("op_927_cast_fp16")]; bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)]; tensor input_1_cast_fp16 = concat(axis = var_921, interleave = input_1_interleave_0, values = (x_1_cast_fp16, var_927_cast_fp16))[name = string("input_1_cast_fp16")]; tensor normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor([-1])]; fp16 var_919_to_fp16 = const()[name = string("op_919_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_919_to_fp16, x = input_1_cast_fp16)[name = string("normed_1_cast_fp16")]; tensor var_932_split_sizes_0 = const()[name = string("op_932_split_sizes_0"), val = tensor([640, 640])]; int32 var_932_axis_0 = const()[name = string("op_932_axis_0"), val = int32(-1)]; tensor var_932_cast_fp16_0, tensor var_932_cast_fp16_1 = split(axis = var_932_axis_0, split_sizes = var_932_split_sizes_0, x = normed_1_cast_fp16)[name = string("op_932_cast_fp16")]; tensor var_936_to_fp16 = const()[name = string("op_936_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261237440)))]; tensor out_1_cast_fp16 = mul(x = var_932_cast_fp16_0, y = var_936_to_fp16)[name = string("out_1_cast_fp16")]; tensor var_950 = const()[name = string("op_950"), val = tensor([0, 2, 1])]; tensor input_3_axes_0 = const()[name = string("input_3_axes_0"), val = tensor([2])]; tensor var_951 = transpose(perm = var_950, x = out_1_cast_fp16)[name = string("transpose_145")]; tensor input_3 = expand_dims(axes = input_3_axes_0, x = var_951)[name = string("input_3")]; string var_964_pad_type_0 = const()[name = string("op_964_pad_type_0"), val = string("valid")]; tensor var_964_strides_0 = const()[name = string("op_964_strides_0"), val = tensor([1, 1])]; tensor var_964_pad_0 = const()[name = string("op_964_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_964_dilations_0 = const()[name = string("op_964_dilations_0"), val = tensor([1, 1])]; int32 var_964_groups_0 = const()[name = string("op_964_groups_0"), val = int32(1)]; tensor var_964 = conv(dilations = var_964_dilations_0, groups = var_964_groups_0, pad = var_964_pad_0, pad_type = var_964_pad_type_0, strides = var_964_strides_0, weight = layers_0_self_attn_q_proj_weight_quantized, x = input_3)[name = string("op_964")]; tensor var_969 = const()[name = string("op_969"), val = tensor([1, 4, 256, 32])]; tensor var_970 = reshape(shape = var_969, x = var_964)[name = string("op_970")]; tensor var_975 = const()[name = string("op_975"), val = tensor([0, 1, 3, 2])]; int32 var_988 = const()[name = string("op_988"), val = int32(-1)]; fp16 const_2_promoted_to_fp16 = const()[name = string("const_2_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_1 = transpose(perm = var_975, x = var_970)[name = string("transpose_144")]; tensor var_994_cast_fp16 = mul(x = q_1, y = const_2_promoted_to_fp16)[name = string("op_994_cast_fp16")]; bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)]; tensor input_5_cast_fp16 = concat(axis = var_988, interleave = input_5_interleave_0, values = (q_1, var_994_cast_fp16))[name = string("input_5_cast_fp16")]; tensor normed_7_axes_0 = const()[name = string("normed_7_axes_0"), val = tensor([-1])]; fp16 var_986_to_fp16 = const()[name = string("op_986_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_7_cast_fp16 = layer_norm(axes = normed_7_axes_0, epsilon = var_986_to_fp16, x = input_5_cast_fp16)[name = string("normed_7_cast_fp16")]; tensor var_999_split_sizes_0 = const()[name = string("op_999_split_sizes_0"), val = tensor([256, 256])]; int32 var_999_axis_0 = const()[name = string("op_999_axis_0"), val = int32(-1)]; tensor var_999_cast_fp16_0, tensor var_999_cast_fp16_1 = split(axis = var_999_axis_0, split_sizes = var_999_split_sizes_0, x = normed_7_cast_fp16)[name = string("op_999_cast_fp16")]; tensor var_1003_to_fp16 = const()[name = string("op_1003_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261238784)))]; tensor out_3_cast_fp16 = mul(x = var_999_cast_fp16_0, y = var_1003_to_fp16)[name = string("out_3_cast_fp16")]; string var_1016_pad_type_0 = const()[name = string("op_1016_pad_type_0"), val = string("valid")]; tensor var_1016_strides_0 = const()[name = string("op_1016_strides_0"), val = tensor([1, 1])]; tensor var_1016_pad_0 = const()[name = string("op_1016_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1016_dilations_0 = const()[name = string("op_1016_dilations_0"), val = tensor([1, 1])]; int32 var_1016_groups_0 = const()[name = string("op_1016_groups_0"), val = int32(1)]; tensor var_1016 = conv(dilations = var_1016_dilations_0, groups = var_1016_groups_0, pad = var_1016_pad_0, pad_type = var_1016_pad_type_0, strides = var_1016_strides_0, weight = layers_0_self_attn_k_proj_weight_quantized, x = input_3)[name = string("op_1016")]; tensor var_1021 = const()[name = string("op_1021"), val = tensor([1, 1, 256, 32])]; tensor var_1022 = reshape(shape = var_1021, x = var_1016)[name = string("op_1022")]; tensor var_1027 = const()[name = string("op_1027"), val = tensor([0, 1, 3, 2])]; int32 var_1040 = const()[name = string("op_1040"), val = int32(-1)]; fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_1 = transpose(perm = var_1027, x = var_1022)[name = string("transpose_143")]; tensor var_1046_cast_fp16 = mul(x = k_1, y = const_4_promoted_to_fp16)[name = string("op_1046_cast_fp16")]; bool input_7_interleave_0 = const()[name = string("input_7_interleave_0"), val = bool(false)]; tensor input_7_cast_fp16 = concat(axis = var_1040, interleave = input_7_interleave_0, values = (k_1, var_1046_cast_fp16))[name = string("input_7_cast_fp16")]; tensor normed_11_axes_0 = const()[name = string("normed_11_axes_0"), val = tensor([-1])]; fp16 var_1038_to_fp16 = const()[name = string("op_1038_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_11_cast_fp16 = layer_norm(axes = normed_11_axes_0, epsilon = var_1038_to_fp16, x = input_7_cast_fp16)[name = string("normed_11_cast_fp16")]; tensor var_1051_split_sizes_0 = const()[name = string("op_1051_split_sizes_0"), val = tensor([256, 256])]; int32 var_1051_axis_0 = const()[name = string("op_1051_axis_0"), val = int32(-1)]; tensor var_1051_cast_fp16_0, tensor var_1051_cast_fp16_1 = split(axis = var_1051_axis_0, split_sizes = var_1051_split_sizes_0, x = normed_11_cast_fp16)[name = string("op_1051_cast_fp16")]; tensor var_1055_to_fp16 = const()[name = string("op_1055_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261239360)))]; tensor out_5_cast_fp16 = mul(x = var_1051_cast_fp16_0, y = var_1055_to_fp16)[name = string("out_5_cast_fp16")]; string var_1068_pad_type_0 = const()[name = string("op_1068_pad_type_0"), val = string("valid")]; tensor var_1068_strides_0 = const()[name = string("op_1068_strides_0"), val = tensor([1, 1])]; tensor var_1068_pad_0 = const()[name = string("op_1068_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1068_dilations_0 = const()[name = string("op_1068_dilations_0"), val = tensor([1, 1])]; int32 var_1068_groups_0 = const()[name = string("op_1068_groups_0"), val = int32(1)]; tensor var_1068 = conv(dilations = var_1068_dilations_0, groups = var_1068_groups_0, pad = var_1068_pad_0, pad_type = var_1068_pad_type_0, strides = var_1068_strides_0, weight = layers_0_self_attn_v_proj_weight_quantized, x = input_3)[name = string("op_1068")]; tensor var_1073 = const()[name = string("op_1073"), val = tensor([1, 1, 256, 32])]; tensor var_1074 = reshape(shape = var_1073, x = var_1068)[name = string("op_1074")]; tensor var_1081 = mul(x = out_3_cast_fp16, y = cos_1)[name = string("op_1081")]; tensor var_1082_split_sizes_0 = const()[name = string("op_1082_split_sizes_0"), val = tensor([128, 128])]; int32 var_1082_axis_0 = const()[name = string("op_1082_axis_0"), val = int32(-1)]; tensor var_1082_0, tensor var_1082_1 = split(axis = var_1082_axis_0, split_sizes = var_1082_split_sizes_0, x = out_3_cast_fp16)[name = string("op_1082")]; fp16 const_6_promoted = const()[name = string("const_6_promoted"), val = fp16(-0x1p+0)]; tensor var_1084 = mul(x = var_1082_1, y = const_6_promoted)[name = string("op_1084")]; int32 var_1086 = const()[name = string("op_1086"), val = int32(-1)]; bool var_1087_interleave_0 = const()[name = string("op_1087_interleave_0"), val = bool(false)]; tensor var_1087 = concat(axis = var_1086, interleave = var_1087_interleave_0, values = (var_1084, var_1082_0))[name = string("op_1087")]; tensor var_1088 = mul(x = var_1087, y = sin_1)[name = string("op_1088")]; tensor q_5 = add(x = var_1081, y = var_1088)[name = string("q_5")]; tensor var_1091 = mul(x = out_5_cast_fp16, y = cos_1)[name = string("op_1091")]; tensor var_1092_split_sizes_0 = const()[name = string("op_1092_split_sizes_0"), val = tensor([128, 128])]; int32 var_1092_axis_0 = const()[name = string("op_1092_axis_0"), val = int32(-1)]; tensor var_1092_0, tensor var_1092_1 = split(axis = var_1092_axis_0, split_sizes = var_1092_split_sizes_0, x = out_5_cast_fp16)[name = string("op_1092")]; fp16 const_7_promoted = const()[name = string("const_7_promoted"), val = fp16(-0x1p+0)]; tensor var_1094 = mul(x = var_1092_1, y = const_7_promoted)[name = string("op_1094")]; int32 var_1096 = const()[name = string("op_1096"), val = int32(-1)]; bool var_1097_interleave_0 = const()[name = string("op_1097_interleave_0"), val = bool(false)]; tensor var_1097 = concat(axis = var_1096, interleave = var_1097_interleave_0, values = (var_1094, var_1092_0))[name = string("op_1097")]; tensor var_1098 = mul(x = var_1097, y = sin_1)[name = string("op_1098")]; tensor k_5 = add(x = var_1091, y = var_1098)[name = string("k_5")]; tensor read_state_0 = read_state(input = kv_cache_0)[name = string("read_state_0")]; tensor var_1103_begin_0 = const()[name = string("op_1103_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1103_end_0 = const()[name = string("op_1103_end_0"), val = tensor([1, 1, 2048, 256])]; tensor var_1103_end_mask_0 = const()[name = string("op_1103_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1103_squeeze_mask_0 = const()[name = string("op_1103_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_1103_cast_fp16 = slice_by_index(begin = var_1103_begin_0, end = var_1103_end_0, end_mask = var_1103_end_mask_0, squeeze_mask = var_1103_squeeze_mask_0, x = read_state_0)[name = string("op_1103_cast_fp16")]; tensor K_cache_1_axes_0 = const()[name = string("K_cache_1_axes_0"), val = tensor([0])]; tensor K_cache_1_cast_fp16 = expand_dims(axes = K_cache_1_axes_0, x = var_1103_cast_fp16)[name = string("K_cache_1_cast_fp16")]; tensor var_1108_begin_0 = const()[name = string("op_1108_begin_0"), val = tensor([18, 0, 0, 0])]; tensor var_1108_end_0 = const()[name = string("op_1108_end_0"), val = tensor([19, 1, 2048, 256])]; tensor var_1108_end_mask_0 = const()[name = string("op_1108_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1108_squeeze_mask_0 = const()[name = string("op_1108_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_1108_cast_fp16 = slice_by_index(begin = var_1108_begin_0, end = var_1108_end_0, end_mask = var_1108_end_mask_0, squeeze_mask = var_1108_squeeze_mask_0, x = read_state_0)[name = string("op_1108_cast_fp16")]; tensor V_cache_1_axes_0 = const()[name = string("V_cache_1_axes_0"), val = tensor([0])]; tensor V_cache_1_cast_fp16 = expand_dims(axes = V_cache_1_axes_0, x = var_1108_cast_fp16)[name = string("V_cache_1_cast_fp16")]; bool k_increment_1_transpose_x_0 = const()[name = string("k_increment_1_transpose_x_0"), val = bool(false)]; bool k_increment_1_transpose_y_0 = const()[name = string("k_increment_1_transpose_y_0"), val = bool(false)]; tensor k_increment_1 = matmul(transpose_x = k_increment_1_transpose_x_0, transpose_y = k_increment_1_transpose_y_0, x = update_mask, y = k_5)[name = string("k_increment_1")]; bool v_increment_1_transpose_x_1 = const()[name = string("v_increment_1_transpose_x_1"), val = bool(false)]; bool v_increment_1_transpose_y_1 = const()[name = string("v_increment_1_transpose_y_1"), val = bool(true)]; tensor v_increment_1 = matmul(transpose_x = v_increment_1_transpose_x_1, transpose_y = v_increment_1_transpose_y_1, x = update_mask, y = var_1074)[name = string("v_increment_1")]; fp16 var_1123_promoted_to_fp16 = const()[name = string("op_1123_promoted_to_fp16"), val = fp16(0x1p+0)]; tensor var_1125_cast_fp16 = sub(x = var_1123_promoted_to_fp16, y = var_916_cast_fp16)[name = string("op_1125_cast_fp16")]; tensor var_1126_cast_fp16 = mul(x = K_cache_1_cast_fp16, y = var_1125_cast_fp16)[name = string("op_1126_cast_fp16")]; tensor K_new_1_cast_fp16 = add(x = var_1126_cast_fp16, y = k_increment_1)[name = string("K_new_1_cast_fp16")]; tensor var_1132_cast_fp16 = mul(x = V_cache_1_cast_fp16, y = var_1125_cast_fp16)[name = string("op_1132_cast_fp16")]; tensor V_new_1_cast_fp16 = add(x = var_1132_cast_fp16, y = v_increment_1)[name = string("V_new_1_cast_fp16")]; tensor var_1136_axes_0 = const()[name = string("op_1136_axes_0"), val = tensor([0])]; tensor var_1136_cast_fp16 = squeeze(axes = var_1136_axes_0, x = K_new_1_cast_fp16)[name = string("op_1136_cast_fp16")]; tensor concat_0 = const()[name = string("concat_0"), val = tensor([0, 0, 0, 0])]; tensor concat_1 = const()[name = string("concat_1"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_0, begin_mask = kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_1, end_mask = kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_1_stride_0, update = var_1136_cast_fp16, x = read_state_0)[name = string("kv_cache_0_internal_tensor_assign_1_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_1_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_36_write_state")]; tensor coreml_update_state_36 = read_state(input = kv_cache_0)[name = string("coreml_update_state_36")]; tensor var_1143_axes_0 = const()[name = string("op_1143_axes_0"), val = tensor([0])]; tensor var_1143_cast_fp16 = squeeze(axes = var_1143_axes_0, x = V_new_1_cast_fp16)[name = string("op_1143_cast_fp16")]; tensor concat_2 = const()[name = string("concat_2"), val = tensor([18, 0, 0, 0])]; tensor concat_3 = const()[name = string("concat_3"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_2, begin_mask = kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_3, end_mask = kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_2_stride_0, update = var_1143_cast_fp16, x = coreml_update_state_36)[name = string("kv_cache_0_internal_tensor_assign_2_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_2_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_37_write_state")]; tensor coreml_update_state_37 = read_state(input = kv_cache_0)[name = string("coreml_update_state_37")]; tensor hidden_states_1_axes_0 = const()[name = string("hidden_states_1_axes_0"), val = tensor([2])]; tensor hidden_states_1_cast_fp16 = expand_dims(axes = hidden_states_1_axes_0, x = K_new_1_cast_fp16)[name = string("hidden_states_1_cast_fp16")]; tensor var_1156 = const()[name = string("op_1156"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_3_cast_fp16 = tile(reps = var_1156, x = hidden_states_1_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; tensor var_1162 = const()[name = string("op_1162"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_1_cast_fp16 = reshape(shape = var_1162, x = hidden_states_3_cast_fp16)[name = string("K_expanded_1_cast_fp16")]; tensor hidden_states_5_axes_0 = const()[name = string("hidden_states_5_axes_0"), val = tensor([2])]; tensor hidden_states_5_cast_fp16 = expand_dims(axes = hidden_states_5_axes_0, x = V_new_1_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; tensor var_1171 = const()[name = string("op_1171"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_7_cast_fp16 = tile(reps = var_1171, x = hidden_states_5_cast_fp16)[name = string("hidden_states_7_cast_fp16")]; tensor var_1177 = const()[name = string("op_1177"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_1_cast_fp16 = reshape(shape = var_1177, x = hidden_states_7_cast_fp16)[name = string("V_expanded_1_cast_fp16")]; bool var_1192_transpose_x_1 = const()[name = string("op_1192_transpose_x_1"), val = bool(false)]; bool var_1192_transpose_y_1 = const()[name = string("op_1192_transpose_y_1"), val = bool(true)]; tensor var_1192_cast_fp16 = matmul(transpose_x = var_1192_transpose_x_1, transpose_y = var_1192_transpose_y_1, x = q_5, y = K_expanded_1_cast_fp16)[name = string("op_1192_cast_fp16")]; fp16 var_1193_to_fp16 = const()[name = string("op_1193_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_1_cast_fp16 = mul(x = var_1192_cast_fp16, y = var_1193_to_fp16)[name = string("attn_weights_1_cast_fp16")]; tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("attn_weights_3_cast_fp16")]; int32 var_1202 = const()[name = string("op_1202"), val = int32(-1)]; tensor var_1204_cast_fp16 = softmax(axis = var_1202, x = attn_weights_3_cast_fp16)[name = string("op_1204_cast_fp16")]; bool var_1220_transpose_x_0 = const()[name = string("op_1220_transpose_x_0"), val = bool(false)]; bool var_1220_transpose_y_0 = const()[name = string("op_1220_transpose_y_0"), val = bool(false)]; tensor var_1220_cast_fp16 = matmul(transpose_x = var_1220_transpose_x_0, transpose_y = var_1220_transpose_y_0, x = var_1204_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("op_1220_cast_fp16")]; tensor var_1230 = const()[name = string("op_1230"), val = tensor([0, 2, 1, 3])]; tensor var_1237 = const()[name = string("op_1237"), val = tensor([1, 32, 1024])]; tensor var_1231 = transpose(perm = var_1230, x = var_1220_cast_fp16)[name = string("transpose_142")]; tensor attn_output_3 = reshape(shape = var_1237, x = var_1231)[name = string("attn_output_3")]; tensor var_1242 = const()[name = string("op_1242"), val = tensor([0, 2, 1])]; tensor squeeze_0_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261239936))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261895360))))[name = string("squeeze_0_quantized")]; string var_1258_pad_type_0 = const()[name = string("op_1258_pad_type_0"), val = string("valid")]; int32 var_1258_groups_0 = const()[name = string("op_1258_groups_0"), val = int32(1)]; tensor var_1258_strides_0 = const()[name = string("op_1258_strides_0"), val = tensor([1])]; tensor var_1258_pad_0 = const()[name = string("op_1258_pad_0"), val = tensor([0, 0])]; tensor var_1258_dilations_0 = const()[name = string("op_1258_dilations_0"), val = tensor([1])]; tensor var_1243 = transpose(perm = var_1242, x = attn_output_3)[name = string("transpose_141")]; tensor var_1258 = conv(dilations = var_1258_dilations_0, groups = var_1258_groups_0, pad = var_1258_pad_0, pad_type = var_1258_pad_type_0, strides = var_1258_strides_0, weight = squeeze_0_quantized, x = var_1243)[name = string("op_1258")]; tensor var_1262 = const()[name = string("op_1262"), val = tensor([0, 2, 1])]; int32 var_1269 = const()[name = string("op_1269"), val = int32(-1)]; fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_9 = transpose(perm = var_1262, x = var_1258)[name = string("transpose_140")]; tensor var_1275_cast_fp16 = mul(x = x_9, y = const_8_promoted_to_fp16)[name = string("op_1275_cast_fp16")]; bool input_11_interleave_0 = const()[name = string("input_11_interleave_0"), val = bool(false)]; tensor input_11_cast_fp16 = concat(axis = var_1269, interleave = input_11_interleave_0, values = (x_9, var_1275_cast_fp16))[name = string("input_11_cast_fp16")]; tensor normed_15_axes_0 = const()[name = string("normed_15_axes_0"), val = tensor([-1])]; fp16 var_1267_to_fp16 = const()[name = string("op_1267_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_15_cast_fp16 = layer_norm(axes = normed_15_axes_0, epsilon = var_1267_to_fp16, x = input_11_cast_fp16)[name = string("normed_15_cast_fp16")]; tensor var_1280_split_sizes_0 = const()[name = string("op_1280_split_sizes_0"), val = tensor([640, 640])]; int32 var_1280_axis_0 = const()[name = string("op_1280_axis_0"), val = int32(-1)]; tensor var_1280_cast_fp16_0, tensor var_1280_cast_fp16_1 = split(axis = var_1280_axis_0, split_sizes = var_1280_split_sizes_0, x = normed_15_cast_fp16)[name = string("op_1280_cast_fp16")]; tensor var_1284_to_fp16 = const()[name = string("op_1284_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261896704)))]; tensor out_7_cast_fp16 = mul(x = var_1280_cast_fp16_0, y = var_1284_to_fp16)[name = string("out_7_cast_fp16")]; tensor x_11_cast_fp16 = add(x = x_1_cast_fp16, y = out_7_cast_fp16)[name = string("x_11_cast_fp16")]; int32 var_1298 = const()[name = string("op_1298"), val = int32(-1)]; fp16 const_10_promoted_to_fp16 = const()[name = string("const_10_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1304_cast_fp16 = mul(x = x_11_cast_fp16, y = const_10_promoted_to_fp16)[name = string("op_1304_cast_fp16")]; bool input_13_interleave_0 = const()[name = string("input_13_interleave_0"), val = bool(false)]; tensor input_13_cast_fp16 = concat(axis = var_1298, interleave = input_13_interleave_0, values = (x_11_cast_fp16, var_1304_cast_fp16))[name = string("input_13_cast_fp16")]; tensor normed_19_axes_0 = const()[name = string("normed_19_axes_0"), val = tensor([-1])]; fp16 var_1296_to_fp16 = const()[name = string("op_1296_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_19_cast_fp16 = layer_norm(axes = normed_19_axes_0, epsilon = var_1296_to_fp16, x = input_13_cast_fp16)[name = string("normed_19_cast_fp16")]; tensor var_1309_split_sizes_0 = const()[name = string("op_1309_split_sizes_0"), val = tensor([640, 640])]; int32 var_1309_axis_0 = const()[name = string("op_1309_axis_0"), val = int32(-1)]; tensor var_1309_cast_fp16_0, tensor var_1309_cast_fp16_1 = split(axis = var_1309_axis_0, split_sizes = var_1309_split_sizes_0, x = normed_19_cast_fp16)[name = string("op_1309_cast_fp16")]; tensor var_1313_to_fp16 = const()[name = string("op_1313_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261898048)))]; tensor out_9_cast_fp16 = mul(x = var_1309_cast_fp16_0, y = var_1313_to_fp16)[name = string("out_9_cast_fp16")]; tensor var_1327 = const()[name = string("op_1327"), val = tensor([0, 2, 1])]; tensor input_15_axes_0 = const()[name = string("input_15_axes_0"), val = tensor([2])]; tensor var_1328 = transpose(perm = var_1327, x = out_9_cast_fp16)[name = string("transpose_139")]; tensor input_15 = expand_dims(axes = input_15_axes_0, x = var_1328)[name = string("input_15")]; string gate_1_pad_type_0 = const()[name = string("gate_1_pad_type_0"), val = string("valid")]; tensor gate_1_strides_0 = const()[name = string("gate_1_strides_0"), val = tensor([1, 1])]; tensor gate_1_pad_0 = const()[name = string("gate_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_1_dilations_0 = const()[name = string("gate_1_dilations_0"), val = tensor([1, 1])]; int32 gate_1_groups_0 = const()[name = string("gate_1_groups_0"), val = int32(1)]; tensor gate_1 = conv(dilations = gate_1_dilations_0, groups = gate_1_groups_0, pad = gate_1_pad_0, pad_type = gate_1_pad_type_0, strides = gate_1_strides_0, weight = layers_0_mlp_gate_proj_weight_quantized, x = input_15)[name = string("gate_1")]; string up_1_pad_type_0 = const()[name = string("up_1_pad_type_0"), val = string("valid")]; tensor up_1_strides_0 = const()[name = string("up_1_strides_0"), val = tensor([1, 1])]; tensor up_1_pad_0 = const()[name = string("up_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_1_dilations_0 = const()[name = string("up_1_dilations_0"), val = tensor([1, 1])]; int32 up_1_groups_0 = const()[name = string("up_1_groups_0"), val = int32(1)]; tensor up_1 = conv(dilations = up_1_dilations_0, groups = up_1_groups_0, pad = up_1_pad_0, pad_type = up_1_pad_type_0, strides = up_1_strides_0, weight = layers_0_mlp_up_proj_weight_quantized, x = input_15)[name = string("up_1")]; string gate_3_mode_0 = const()[name = string("gate_3_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_3 = gelu(mode = gate_3_mode_0, x = gate_1)[name = string("gate_3")]; tensor input_17 = mul(x = gate_3, y = up_1)[name = string("input_17")]; string var_1366_pad_type_0 = const()[name = string("op_1366_pad_type_0"), val = string("valid")]; tensor var_1366_strides_0 = const()[name = string("op_1366_strides_0"), val = tensor([1, 1])]; tensor var_1366_pad_0 = const()[name = string("op_1366_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1366_dilations_0 = const()[name = string("op_1366_dilations_0"), val = tensor([1, 1])]; int32 var_1366_groups_0 = const()[name = string("op_1366_groups_0"), val = int32(1)]; tensor var_1366 = conv(dilations = var_1366_dilations_0, groups = var_1366_groups_0, pad = var_1366_pad_0, pad_type = var_1366_pad_type_0, strides = var_1366_strides_0, weight = layers_0_mlp_down_proj_weight_quantized, x = input_17)[name = string("op_1366")]; tensor var_1368_axes_0 = const()[name = string("op_1368_axes_0"), val = tensor([2])]; tensor var_1368 = squeeze(axes = var_1368_axes_0, x = var_1366)[name = string("op_1368")]; tensor var_1372 = const()[name = string("op_1372"), val = tensor([0, 2, 1])]; int32 var_1379 = const()[name = string("op_1379"), val = int32(-1)]; fp16 const_12_promoted_to_fp16 = const()[name = string("const_12_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_15 = transpose(perm = var_1372, x = var_1368)[name = string("transpose_138")]; tensor var_1385_cast_fp16 = mul(x = x_15, y = const_12_promoted_to_fp16)[name = string("op_1385_cast_fp16")]; bool input_19_interleave_0 = const()[name = string("input_19_interleave_0"), val = bool(false)]; tensor input_19_cast_fp16 = concat(axis = var_1379, interleave = input_19_interleave_0, values = (x_15, var_1385_cast_fp16))[name = string("input_19_cast_fp16")]; tensor normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor([-1])]; fp16 var_1377_to_fp16 = const()[name = string("op_1377_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_1377_to_fp16, x = input_19_cast_fp16)[name = string("normed_25_cast_fp16")]; tensor var_1390_split_sizes_0 = const()[name = string("op_1390_split_sizes_0"), val = tensor([640, 640])]; int32 var_1390_axis_0 = const()[name = string("op_1390_axis_0"), val = int32(-1)]; tensor var_1390_cast_fp16_0, tensor var_1390_cast_fp16_1 = split(axis = var_1390_axis_0, split_sizes = var_1390_split_sizes_0, x = normed_25_cast_fp16)[name = string("op_1390_cast_fp16")]; tensor var_1394_to_fp16 = const()[name = string("op_1394_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261899392)))]; tensor out_11_cast_fp16 = mul(x = var_1390_cast_fp16_0, y = var_1394_to_fp16)[name = string("out_11_cast_fp16")]; tensor x_17_cast_fp16 = add(x = x_11_cast_fp16, y = out_11_cast_fp16)[name = string("x_17_cast_fp16")]; int32 var_1408 = const()[name = string("op_1408"), val = int32(-1)]; fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1414_cast_fp16 = mul(x = x_17_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_1414_cast_fp16")]; bool input_21_interleave_0 = const()[name = string("input_21_interleave_0"), val = bool(false)]; tensor input_21_cast_fp16 = concat(axis = var_1408, interleave = input_21_interleave_0, values = (x_17_cast_fp16, var_1414_cast_fp16))[name = string("input_21_cast_fp16")]; tensor normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor([-1])]; fp16 var_1406_to_fp16 = const()[name = string("op_1406_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_1406_to_fp16, x = input_21_cast_fp16)[name = string("normed_29_cast_fp16")]; tensor var_1419_split_sizes_0 = const()[name = string("op_1419_split_sizes_0"), val = tensor([640, 640])]; int32 var_1419_axis_0 = const()[name = string("op_1419_axis_0"), val = int32(-1)]; tensor var_1419_cast_fp16_0, tensor var_1419_cast_fp16_1 = split(axis = var_1419_axis_0, split_sizes = var_1419_split_sizes_0, x = normed_29_cast_fp16)[name = string("op_1419_cast_fp16")]; tensor var_1423_to_fp16 = const()[name = string("op_1423_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261900736)))]; tensor out_13_cast_fp16 = mul(x = var_1419_cast_fp16_0, y = var_1423_to_fp16)[name = string("out_13_cast_fp16")]; tensor var_1437 = const()[name = string("op_1437"), val = tensor([0, 2, 1])]; tensor input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor([2])]; tensor var_1438 = transpose(perm = var_1437, x = out_13_cast_fp16)[name = string("transpose_137")]; tensor input_23 = expand_dims(axes = input_23_axes_0, x = var_1438)[name = string("input_23")]; string var_1451_pad_type_0 = const()[name = string("op_1451_pad_type_0"), val = string("valid")]; tensor var_1451_strides_0 = const()[name = string("op_1451_strides_0"), val = tensor([1, 1])]; tensor var_1451_pad_0 = const()[name = string("op_1451_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1451_dilations_0 = const()[name = string("op_1451_dilations_0"), val = tensor([1, 1])]; int32 var_1451_groups_0 = const()[name = string("op_1451_groups_0"), val = int32(1)]; tensor var_1451 = conv(dilations = var_1451_dilations_0, groups = var_1451_groups_0, pad = var_1451_pad_0, pad_type = var_1451_pad_type_0, strides = var_1451_strides_0, weight = layers_1_self_attn_q_proj_weight_quantized, x = input_23)[name = string("op_1451")]; tensor var_1456 = const()[name = string("op_1456"), val = tensor([1, 4, 256, 32])]; tensor var_1457 = reshape(shape = var_1456, x = var_1451)[name = string("op_1457")]; tensor var_1462 = const()[name = string("op_1462"), val = tensor([0, 1, 3, 2])]; int32 var_1475 = const()[name = string("op_1475"), val = int32(-1)]; fp16 const_16_promoted_to_fp16 = const()[name = string("const_16_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_7 = transpose(perm = var_1462, x = var_1457)[name = string("transpose_136")]; tensor var_1481_cast_fp16 = mul(x = q_7, y = const_16_promoted_to_fp16)[name = string("op_1481_cast_fp16")]; bool input_25_interleave_0 = const()[name = string("input_25_interleave_0"), val = bool(false)]; tensor input_25_cast_fp16 = concat(axis = var_1475, interleave = input_25_interleave_0, values = (q_7, var_1481_cast_fp16))[name = string("input_25_cast_fp16")]; tensor normed_35_axes_0 = const()[name = string("normed_35_axes_0"), val = tensor([-1])]; fp16 var_1473_to_fp16 = const()[name = string("op_1473_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_35_cast_fp16 = layer_norm(axes = normed_35_axes_0, epsilon = var_1473_to_fp16, x = input_25_cast_fp16)[name = string("normed_35_cast_fp16")]; tensor var_1486_split_sizes_0 = const()[name = string("op_1486_split_sizes_0"), val = tensor([256, 256])]; int32 var_1486_axis_0 = const()[name = string("op_1486_axis_0"), val = int32(-1)]; tensor var_1486_cast_fp16_0, tensor var_1486_cast_fp16_1 = split(axis = var_1486_axis_0, split_sizes = var_1486_split_sizes_0, x = normed_35_cast_fp16)[name = string("op_1486_cast_fp16")]; tensor var_1490_to_fp16 = const()[name = string("op_1490_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261902080)))]; tensor out_15_cast_fp16 = mul(x = var_1486_cast_fp16_0, y = var_1490_to_fp16)[name = string("out_15_cast_fp16")]; string var_1503_pad_type_0 = const()[name = string("op_1503_pad_type_0"), val = string("valid")]; tensor var_1503_strides_0 = const()[name = string("op_1503_strides_0"), val = tensor([1, 1])]; tensor var_1503_pad_0 = const()[name = string("op_1503_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1503_dilations_0 = const()[name = string("op_1503_dilations_0"), val = tensor([1, 1])]; int32 var_1503_groups_0 = const()[name = string("op_1503_groups_0"), val = int32(1)]; tensor var_1503 = conv(dilations = var_1503_dilations_0, groups = var_1503_groups_0, pad = var_1503_pad_0, pad_type = var_1503_pad_type_0, strides = var_1503_strides_0, weight = layers_1_self_attn_k_proj_weight_quantized, x = input_23)[name = string("op_1503")]; tensor var_1508 = const()[name = string("op_1508"), val = tensor([1, 1, 256, 32])]; tensor var_1509 = reshape(shape = var_1508, x = var_1503)[name = string("op_1509")]; tensor var_1514 = const()[name = string("op_1514"), val = tensor([0, 1, 3, 2])]; int32 var_1527 = const()[name = string("op_1527"), val = int32(-1)]; fp16 const_18_promoted_to_fp16 = const()[name = string("const_18_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_7 = transpose(perm = var_1514, x = var_1509)[name = string("transpose_135")]; tensor var_1533_cast_fp16 = mul(x = k_7, y = const_18_promoted_to_fp16)[name = string("op_1533_cast_fp16")]; bool input_27_interleave_0 = const()[name = string("input_27_interleave_0"), val = bool(false)]; tensor input_27_cast_fp16 = concat(axis = var_1527, interleave = input_27_interleave_0, values = (k_7, var_1533_cast_fp16))[name = string("input_27_cast_fp16")]; tensor normed_39_axes_0 = const()[name = string("normed_39_axes_0"), val = tensor([-1])]; fp16 var_1525_to_fp16 = const()[name = string("op_1525_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_39_cast_fp16 = layer_norm(axes = normed_39_axes_0, epsilon = var_1525_to_fp16, x = input_27_cast_fp16)[name = string("normed_39_cast_fp16")]; tensor var_1538_split_sizes_0 = const()[name = string("op_1538_split_sizes_0"), val = tensor([256, 256])]; int32 var_1538_axis_0 = const()[name = string("op_1538_axis_0"), val = int32(-1)]; tensor var_1538_cast_fp16_0, tensor var_1538_cast_fp16_1 = split(axis = var_1538_axis_0, split_sizes = var_1538_split_sizes_0, x = normed_39_cast_fp16)[name = string("op_1538_cast_fp16")]; tensor var_1542_to_fp16 = const()[name = string("op_1542_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261902656)))]; tensor out_17_cast_fp16 = mul(x = var_1538_cast_fp16_0, y = var_1542_to_fp16)[name = string("out_17_cast_fp16")]; string var_1555_pad_type_0 = const()[name = string("op_1555_pad_type_0"), val = string("valid")]; tensor var_1555_strides_0 = const()[name = string("op_1555_strides_0"), val = tensor([1, 1])]; tensor var_1555_pad_0 = const()[name = string("op_1555_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1555_dilations_0 = const()[name = string("op_1555_dilations_0"), val = tensor([1, 1])]; int32 var_1555_groups_0 = const()[name = string("op_1555_groups_0"), val = int32(1)]; tensor var_1555 = conv(dilations = var_1555_dilations_0, groups = var_1555_groups_0, pad = var_1555_pad_0, pad_type = var_1555_pad_type_0, strides = var_1555_strides_0, weight = layers_1_self_attn_v_proj_weight_quantized, x = input_23)[name = string("op_1555")]; tensor var_1560 = const()[name = string("op_1560"), val = tensor([1, 1, 256, 32])]; tensor var_1561 = reshape(shape = var_1560, x = var_1555)[name = string("op_1561")]; tensor var_1568 = mul(x = out_15_cast_fp16, y = cos_1)[name = string("op_1568")]; tensor var_1569_split_sizes_0 = const()[name = string("op_1569_split_sizes_0"), val = tensor([128, 128])]; int32 var_1569_axis_0 = const()[name = string("op_1569_axis_0"), val = int32(-1)]; tensor var_1569_0, tensor var_1569_1 = split(axis = var_1569_axis_0, split_sizes = var_1569_split_sizes_0, x = out_15_cast_fp16)[name = string("op_1569")]; fp16 const_20_promoted = const()[name = string("const_20_promoted"), val = fp16(-0x1p+0)]; tensor var_1571 = mul(x = var_1569_1, y = const_20_promoted)[name = string("op_1571")]; int32 var_1573 = const()[name = string("op_1573"), val = int32(-1)]; bool var_1574_interleave_0 = const()[name = string("op_1574_interleave_0"), val = bool(false)]; tensor var_1574 = concat(axis = var_1573, interleave = var_1574_interleave_0, values = (var_1571, var_1569_0))[name = string("op_1574")]; tensor var_1575 = mul(x = var_1574, y = sin_1)[name = string("op_1575")]; tensor q_11 = add(x = var_1568, y = var_1575)[name = string("q_11")]; tensor var_1578 = mul(x = out_17_cast_fp16, y = cos_1)[name = string("op_1578")]; tensor var_1579_split_sizes_0 = const()[name = string("op_1579_split_sizes_0"), val = tensor([128, 128])]; int32 var_1579_axis_0 = const()[name = string("op_1579_axis_0"), val = int32(-1)]; tensor var_1579_0, tensor var_1579_1 = split(axis = var_1579_axis_0, split_sizes = var_1579_split_sizes_0, x = out_17_cast_fp16)[name = string("op_1579")]; fp16 const_21_promoted = const()[name = string("const_21_promoted"), val = fp16(-0x1p+0)]; tensor var_1581 = mul(x = var_1579_1, y = const_21_promoted)[name = string("op_1581")]; int32 var_1583 = const()[name = string("op_1583"), val = int32(-1)]; bool var_1584_interleave_0 = const()[name = string("op_1584_interleave_0"), val = bool(false)]; tensor var_1584 = concat(axis = var_1583, interleave = var_1584_interleave_0, values = (var_1581, var_1579_0))[name = string("op_1584")]; tensor var_1585 = mul(x = var_1584, y = sin_1)[name = string("op_1585")]; tensor k_11 = add(x = var_1578, y = var_1585)[name = string("k_11")]; tensor var_1590_begin_0 = const()[name = string("op_1590_begin_0"), val = tensor([1, 0, 0, 0])]; tensor var_1590_end_0 = const()[name = string("op_1590_end_0"), val = tensor([2, 1, 2048, 256])]; tensor var_1590_end_mask_0 = const()[name = string("op_1590_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1590_squeeze_mask_0 = const()[name = string("op_1590_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_1590_cast_fp16 = slice_by_index(begin = var_1590_begin_0, end = var_1590_end_0, end_mask = var_1590_end_mask_0, squeeze_mask = var_1590_squeeze_mask_0, x = coreml_update_state_37)[name = string("op_1590_cast_fp16")]; tensor K_cache_3_axes_0 = const()[name = string("K_cache_3_axes_0"), val = tensor([0])]; tensor K_cache_3_cast_fp16 = expand_dims(axes = K_cache_3_axes_0, x = var_1590_cast_fp16)[name = string("K_cache_3_cast_fp16")]; tensor var_1595_begin_0 = const()[name = string("op_1595_begin_0"), val = tensor([19, 0, 0, 0])]; tensor var_1595_end_0 = const()[name = string("op_1595_end_0"), val = tensor([20, 1, 2048, 256])]; tensor var_1595_end_mask_0 = const()[name = string("op_1595_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1595_squeeze_mask_0 = const()[name = string("op_1595_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_1595_cast_fp16 = slice_by_index(begin = var_1595_begin_0, end = var_1595_end_0, end_mask = var_1595_end_mask_0, squeeze_mask = var_1595_squeeze_mask_0, x = coreml_update_state_37)[name = string("op_1595_cast_fp16")]; tensor V_cache_3_axes_0 = const()[name = string("V_cache_3_axes_0"), val = tensor([0])]; tensor V_cache_3_cast_fp16 = expand_dims(axes = V_cache_3_axes_0, x = var_1595_cast_fp16)[name = string("V_cache_3_cast_fp16")]; bool k_increment_3_transpose_x_0 = const()[name = string("k_increment_3_transpose_x_0"), val = bool(false)]; bool k_increment_3_transpose_y_0 = const()[name = string("k_increment_3_transpose_y_0"), val = bool(false)]; tensor k_increment_3 = matmul(transpose_x = k_increment_3_transpose_x_0, transpose_y = k_increment_3_transpose_y_0, x = update_mask, y = k_11)[name = string("k_increment_3")]; bool v_increment_3_transpose_x_1 = const()[name = string("v_increment_3_transpose_x_1"), val = bool(false)]; bool v_increment_3_transpose_y_1 = const()[name = string("v_increment_3_transpose_y_1"), val = bool(true)]; tensor v_increment_3 = matmul(transpose_x = v_increment_3_transpose_x_1, transpose_y = v_increment_3_transpose_y_1, x = update_mask, y = var_1561)[name = string("v_increment_3")]; tensor var_1613_cast_fp16 = mul(x = K_cache_3_cast_fp16, y = var_1125_cast_fp16)[name = string("op_1613_cast_fp16")]; tensor K_new_3_cast_fp16 = add(x = var_1613_cast_fp16, y = k_increment_3)[name = string("K_new_3_cast_fp16")]; tensor var_1619_cast_fp16 = mul(x = V_cache_3_cast_fp16, y = var_1125_cast_fp16)[name = string("op_1619_cast_fp16")]; tensor V_new_3_cast_fp16 = add(x = var_1619_cast_fp16, y = v_increment_3)[name = string("V_new_3_cast_fp16")]; tensor var_1623_axes_0 = const()[name = string("op_1623_axes_0"), val = tensor([0])]; tensor var_1623_cast_fp16 = squeeze(axes = var_1623_axes_0, x = K_new_3_cast_fp16)[name = string("op_1623_cast_fp16")]; tensor concat_4 = const()[name = string("concat_4"), val = tensor([1, 0, 0, 0])]; tensor concat_5 = const()[name = string("concat_5"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_4, begin_mask = kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_5, end_mask = kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_3_stride_0, update = var_1623_cast_fp16, x = coreml_update_state_37)[name = string("kv_cache_0_internal_tensor_assign_3_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_3_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_38_write_state")]; tensor coreml_update_state_38 = read_state(input = kv_cache_0)[name = string("coreml_update_state_38")]; tensor var_1630_axes_0 = const()[name = string("op_1630_axes_0"), val = tensor([0])]; tensor var_1630_cast_fp16 = squeeze(axes = var_1630_axes_0, x = V_new_3_cast_fp16)[name = string("op_1630_cast_fp16")]; tensor concat_6 = const()[name = string("concat_6"), val = tensor([19, 0, 0, 0])]; tensor concat_7 = const()[name = string("concat_7"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_6, begin_mask = kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_7, end_mask = kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_4_stride_0, update = var_1630_cast_fp16, x = coreml_update_state_38)[name = string("kv_cache_0_internal_tensor_assign_4_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_4_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_39_write_state")]; tensor coreml_update_state_39 = read_state(input = kv_cache_0)[name = string("coreml_update_state_39")]; tensor hidden_states_9_axes_0 = const()[name = string("hidden_states_9_axes_0"), val = tensor([2])]; tensor hidden_states_9_cast_fp16 = expand_dims(axes = hidden_states_9_axes_0, x = K_new_3_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; tensor var_1643 = const()[name = string("op_1643"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_11_cast_fp16 = tile(reps = var_1643, x = hidden_states_9_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; tensor var_1649 = const()[name = string("op_1649"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_3_cast_fp16 = reshape(shape = var_1649, x = hidden_states_11_cast_fp16)[name = string("K_expanded_3_cast_fp16")]; tensor hidden_states_13_axes_0 = const()[name = string("hidden_states_13_axes_0"), val = tensor([2])]; tensor hidden_states_13_cast_fp16 = expand_dims(axes = hidden_states_13_axes_0, x = V_new_3_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; tensor var_1658 = const()[name = string("op_1658"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_15_cast_fp16 = tile(reps = var_1658, x = hidden_states_13_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; tensor var_1664 = const()[name = string("op_1664"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_3_cast_fp16 = reshape(shape = var_1664, x = hidden_states_15_cast_fp16)[name = string("V_expanded_3_cast_fp16")]; bool var_1679_transpose_x_1 = const()[name = string("op_1679_transpose_x_1"), val = bool(false)]; bool var_1679_transpose_y_1 = const()[name = string("op_1679_transpose_y_1"), val = bool(true)]; tensor var_1679_cast_fp16 = matmul(transpose_x = var_1679_transpose_x_1, transpose_y = var_1679_transpose_y_1, x = q_11, y = K_expanded_3_cast_fp16)[name = string("op_1679_cast_fp16")]; fp16 var_1680_to_fp16 = const()[name = string("op_1680_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_7_cast_fp16 = mul(x = var_1679_cast_fp16, y = var_1680_to_fp16)[name = string("attn_weights_7_cast_fp16")]; tensor attn_weights_9_cast_fp16 = add(x = attn_weights_7_cast_fp16, y = causal_mask)[name = string("attn_weights_9_cast_fp16")]; int32 var_1689 = const()[name = string("op_1689"), val = int32(-1)]; tensor var_1691_cast_fp16 = softmax(axis = var_1689, x = attn_weights_9_cast_fp16)[name = string("op_1691_cast_fp16")]; bool var_1707_transpose_x_0 = const()[name = string("op_1707_transpose_x_0"), val = bool(false)]; bool var_1707_transpose_y_0 = const()[name = string("op_1707_transpose_y_0"), val = bool(false)]; tensor var_1707_cast_fp16 = matmul(transpose_x = var_1707_transpose_x_0, transpose_y = var_1707_transpose_y_0, x = var_1691_cast_fp16, y = V_expanded_3_cast_fp16)[name = string("op_1707_cast_fp16")]; tensor var_1717 = const()[name = string("op_1717"), val = tensor([0, 2, 1, 3])]; tensor var_1724 = const()[name = string("op_1724"), val = tensor([1, 32, 1024])]; tensor var_1718 = transpose(perm = var_1717, x = var_1707_cast_fp16)[name = string("transpose_134")]; tensor attn_output_9 = reshape(shape = var_1724, x = var_1718)[name = string("attn_output_9")]; tensor var_1729 = const()[name = string("op_1729"), val = tensor([0, 2, 1])]; tensor squeeze_1_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261903232))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262558656))))[name = string("squeeze_1_quantized")]; string var_1745_pad_type_0 = const()[name = string("op_1745_pad_type_0"), val = string("valid")]; int32 var_1745_groups_0 = const()[name = string("op_1745_groups_0"), val = int32(1)]; tensor var_1745_strides_0 = const()[name = string("op_1745_strides_0"), val = tensor([1])]; tensor var_1745_pad_0 = const()[name = string("op_1745_pad_0"), val = tensor([0, 0])]; tensor var_1745_dilations_0 = const()[name = string("op_1745_dilations_0"), val = tensor([1])]; tensor var_1730 = transpose(perm = var_1729, x = attn_output_9)[name = string("transpose_133")]; tensor var_1745 = conv(dilations = var_1745_dilations_0, groups = var_1745_groups_0, pad = var_1745_pad_0, pad_type = var_1745_pad_type_0, strides = var_1745_strides_0, weight = squeeze_1_quantized, x = var_1730)[name = string("op_1745")]; tensor var_1749 = const()[name = string("op_1749"), val = tensor([0, 2, 1])]; int32 var_1756 = const()[name = string("op_1756"), val = int32(-1)]; fp16 const_22_promoted_to_fp16 = const()[name = string("const_22_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_25 = transpose(perm = var_1749, x = var_1745)[name = string("transpose_132")]; tensor var_1762_cast_fp16 = mul(x = x_25, y = const_22_promoted_to_fp16)[name = string("op_1762_cast_fp16")]; bool input_31_interleave_0 = const()[name = string("input_31_interleave_0"), val = bool(false)]; tensor input_31_cast_fp16 = concat(axis = var_1756, interleave = input_31_interleave_0, values = (x_25, var_1762_cast_fp16))[name = string("input_31_cast_fp16")]; tensor normed_43_axes_0 = const()[name = string("normed_43_axes_0"), val = tensor([-1])]; fp16 var_1754_to_fp16 = const()[name = string("op_1754_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_43_cast_fp16 = layer_norm(axes = normed_43_axes_0, epsilon = var_1754_to_fp16, x = input_31_cast_fp16)[name = string("normed_43_cast_fp16")]; tensor var_1767_split_sizes_0 = const()[name = string("op_1767_split_sizes_0"), val = tensor([640, 640])]; int32 var_1767_axis_0 = const()[name = string("op_1767_axis_0"), val = int32(-1)]; tensor var_1767_cast_fp16_0, tensor var_1767_cast_fp16_1 = split(axis = var_1767_axis_0, split_sizes = var_1767_split_sizes_0, x = normed_43_cast_fp16)[name = string("op_1767_cast_fp16")]; tensor var_1771_to_fp16 = const()[name = string("op_1771_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262560000)))]; tensor out_19_cast_fp16 = mul(x = var_1767_cast_fp16_0, y = var_1771_to_fp16)[name = string("out_19_cast_fp16")]; tensor x_27_cast_fp16 = add(x = x_17_cast_fp16, y = out_19_cast_fp16)[name = string("x_27_cast_fp16")]; int32 var_1785 = const()[name = string("op_1785"), val = int32(-1)]; fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1791_cast_fp16 = mul(x = x_27_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_1791_cast_fp16")]; bool input_33_interleave_0 = const()[name = string("input_33_interleave_0"), val = bool(false)]; tensor input_33_cast_fp16 = concat(axis = var_1785, interleave = input_33_interleave_0, values = (x_27_cast_fp16, var_1791_cast_fp16))[name = string("input_33_cast_fp16")]; tensor normed_47_axes_0 = const()[name = string("normed_47_axes_0"), val = tensor([-1])]; fp16 var_1783_to_fp16 = const()[name = string("op_1783_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_47_cast_fp16 = layer_norm(axes = normed_47_axes_0, epsilon = var_1783_to_fp16, x = input_33_cast_fp16)[name = string("normed_47_cast_fp16")]; tensor var_1796_split_sizes_0 = const()[name = string("op_1796_split_sizes_0"), val = tensor([640, 640])]; int32 var_1796_axis_0 = const()[name = string("op_1796_axis_0"), val = int32(-1)]; tensor var_1796_cast_fp16_0, tensor var_1796_cast_fp16_1 = split(axis = var_1796_axis_0, split_sizes = var_1796_split_sizes_0, x = normed_47_cast_fp16)[name = string("op_1796_cast_fp16")]; tensor var_1800_to_fp16 = const()[name = string("op_1800_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262561344)))]; tensor out_21_cast_fp16 = mul(x = var_1796_cast_fp16_0, y = var_1800_to_fp16)[name = string("out_21_cast_fp16")]; tensor var_1814 = const()[name = string("op_1814"), val = tensor([0, 2, 1])]; tensor input_35_axes_0 = const()[name = string("input_35_axes_0"), val = tensor([2])]; tensor var_1815 = transpose(perm = var_1814, x = out_21_cast_fp16)[name = string("transpose_131")]; tensor input_35 = expand_dims(axes = input_35_axes_0, x = var_1815)[name = string("input_35")]; string gate_5_pad_type_0 = const()[name = string("gate_5_pad_type_0"), val = string("valid")]; tensor gate_5_strides_0 = const()[name = string("gate_5_strides_0"), val = tensor([1, 1])]; tensor gate_5_pad_0 = const()[name = string("gate_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_5_dilations_0 = const()[name = string("gate_5_dilations_0"), val = tensor([1, 1])]; int32 gate_5_groups_0 = const()[name = string("gate_5_groups_0"), val = int32(1)]; tensor gate_5 = conv(dilations = gate_5_dilations_0, groups = gate_5_groups_0, pad = gate_5_pad_0, pad_type = gate_5_pad_type_0, strides = gate_5_strides_0, weight = layers_1_mlp_gate_proj_weight_quantized, x = input_35)[name = string("gate_5")]; string up_3_pad_type_0 = const()[name = string("up_3_pad_type_0"), val = string("valid")]; tensor up_3_strides_0 = const()[name = string("up_3_strides_0"), val = tensor([1, 1])]; tensor up_3_pad_0 = const()[name = string("up_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_3_dilations_0 = const()[name = string("up_3_dilations_0"), val = tensor([1, 1])]; int32 up_3_groups_0 = const()[name = string("up_3_groups_0"), val = int32(1)]; tensor up_3 = conv(dilations = up_3_dilations_0, groups = up_3_groups_0, pad = up_3_pad_0, pad_type = up_3_pad_type_0, strides = up_3_strides_0, weight = layers_1_mlp_up_proj_weight_quantized, x = input_35)[name = string("up_3")]; string gate_7_mode_0 = const()[name = string("gate_7_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_7 = gelu(mode = gate_7_mode_0, x = gate_5)[name = string("gate_7")]; tensor input_37 = mul(x = gate_7, y = up_3)[name = string("input_37")]; string var_1853_pad_type_0 = const()[name = string("op_1853_pad_type_0"), val = string("valid")]; tensor var_1853_strides_0 = const()[name = string("op_1853_strides_0"), val = tensor([1, 1])]; tensor var_1853_pad_0 = const()[name = string("op_1853_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1853_dilations_0 = const()[name = string("op_1853_dilations_0"), val = tensor([1, 1])]; int32 var_1853_groups_0 = const()[name = string("op_1853_groups_0"), val = int32(1)]; tensor var_1853 = conv(dilations = var_1853_dilations_0, groups = var_1853_groups_0, pad = var_1853_pad_0, pad_type = var_1853_pad_type_0, strides = var_1853_strides_0, weight = layers_1_mlp_down_proj_weight_quantized, x = input_37)[name = string("op_1853")]; tensor var_1855_axes_0 = const()[name = string("op_1855_axes_0"), val = tensor([2])]; tensor var_1855 = squeeze(axes = var_1855_axes_0, x = var_1853)[name = string("op_1855")]; tensor var_1859 = const()[name = string("op_1859"), val = tensor([0, 2, 1])]; int32 var_1866 = const()[name = string("op_1866"), val = int32(-1)]; fp16 const_26_promoted_to_fp16 = const()[name = string("const_26_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_31 = transpose(perm = var_1859, x = var_1855)[name = string("transpose_130")]; tensor var_1872_cast_fp16 = mul(x = x_31, y = const_26_promoted_to_fp16)[name = string("op_1872_cast_fp16")]; bool input_39_interleave_0 = const()[name = string("input_39_interleave_0"), val = bool(false)]; tensor input_39_cast_fp16 = concat(axis = var_1866, interleave = input_39_interleave_0, values = (x_31, var_1872_cast_fp16))[name = string("input_39_cast_fp16")]; tensor normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor([-1])]; fp16 var_1864_to_fp16 = const()[name = string("op_1864_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_1864_to_fp16, x = input_39_cast_fp16)[name = string("normed_53_cast_fp16")]; tensor var_1877_split_sizes_0 = const()[name = string("op_1877_split_sizes_0"), val = tensor([640, 640])]; int32 var_1877_axis_0 = const()[name = string("op_1877_axis_0"), val = int32(-1)]; tensor var_1877_cast_fp16_0, tensor var_1877_cast_fp16_1 = split(axis = var_1877_axis_0, split_sizes = var_1877_split_sizes_0, x = normed_53_cast_fp16)[name = string("op_1877_cast_fp16")]; tensor var_1881_to_fp16 = const()[name = string("op_1881_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262562688)))]; tensor out_23_cast_fp16 = mul(x = var_1877_cast_fp16_0, y = var_1881_to_fp16)[name = string("out_23_cast_fp16")]; tensor x_33_cast_fp16 = add(x = x_27_cast_fp16, y = out_23_cast_fp16)[name = string("x_33_cast_fp16")]; int32 var_1895 = const()[name = string("op_1895"), val = int32(-1)]; fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1901_cast_fp16 = mul(x = x_33_cast_fp16, y = const_28_promoted_to_fp16)[name = string("op_1901_cast_fp16")]; bool input_41_interleave_0 = const()[name = string("input_41_interleave_0"), val = bool(false)]; tensor input_41_cast_fp16 = concat(axis = var_1895, interleave = input_41_interleave_0, values = (x_33_cast_fp16, var_1901_cast_fp16))[name = string("input_41_cast_fp16")]; tensor normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor([-1])]; fp16 var_1893_to_fp16 = const()[name = string("op_1893_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_1893_to_fp16, x = input_41_cast_fp16)[name = string("normed_57_cast_fp16")]; tensor var_1906_split_sizes_0 = const()[name = string("op_1906_split_sizes_0"), val = tensor([640, 640])]; int32 var_1906_axis_0 = const()[name = string("op_1906_axis_0"), val = int32(-1)]; tensor var_1906_cast_fp16_0, tensor var_1906_cast_fp16_1 = split(axis = var_1906_axis_0, split_sizes = var_1906_split_sizes_0, x = normed_57_cast_fp16)[name = string("op_1906_cast_fp16")]; tensor var_1910_to_fp16 = const()[name = string("op_1910_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262564032)))]; tensor out_25_cast_fp16 = mul(x = var_1906_cast_fp16_0, y = var_1910_to_fp16)[name = string("out_25_cast_fp16")]; tensor var_1924 = const()[name = string("op_1924"), val = tensor([0, 2, 1])]; tensor input_43_axes_0 = const()[name = string("input_43_axes_0"), val = tensor([2])]; tensor var_1925 = transpose(perm = var_1924, x = out_25_cast_fp16)[name = string("transpose_129")]; tensor input_43 = expand_dims(axes = input_43_axes_0, x = var_1925)[name = string("input_43")]; string var_1938_pad_type_0 = const()[name = string("op_1938_pad_type_0"), val = string("valid")]; tensor var_1938_strides_0 = const()[name = string("op_1938_strides_0"), val = tensor([1, 1])]; tensor var_1938_pad_0 = const()[name = string("op_1938_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1938_dilations_0 = const()[name = string("op_1938_dilations_0"), val = tensor([1, 1])]; int32 var_1938_groups_0 = const()[name = string("op_1938_groups_0"), val = int32(1)]; tensor var_1938 = conv(dilations = var_1938_dilations_0, groups = var_1938_groups_0, pad = var_1938_pad_0, pad_type = var_1938_pad_type_0, strides = var_1938_strides_0, weight = layers_2_self_attn_q_proj_weight_quantized, x = input_43)[name = string("op_1938")]; tensor var_1943 = const()[name = string("op_1943"), val = tensor([1, 4, 256, 32])]; tensor var_1944 = reshape(shape = var_1943, x = var_1938)[name = string("op_1944")]; tensor var_1949 = const()[name = string("op_1949"), val = tensor([0, 1, 3, 2])]; int32 var_1962 = const()[name = string("op_1962"), val = int32(-1)]; fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_13 = transpose(perm = var_1949, x = var_1944)[name = string("transpose_128")]; tensor var_1968_cast_fp16 = mul(x = q_13, y = const_30_promoted_to_fp16)[name = string("op_1968_cast_fp16")]; bool input_45_interleave_0 = const()[name = string("input_45_interleave_0"), val = bool(false)]; tensor input_45_cast_fp16 = concat(axis = var_1962, interleave = input_45_interleave_0, values = (q_13, var_1968_cast_fp16))[name = string("input_45_cast_fp16")]; tensor normed_63_axes_0 = const()[name = string("normed_63_axes_0"), val = tensor([-1])]; fp16 var_1960_to_fp16 = const()[name = string("op_1960_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_63_cast_fp16 = layer_norm(axes = normed_63_axes_0, epsilon = var_1960_to_fp16, x = input_45_cast_fp16)[name = string("normed_63_cast_fp16")]; tensor var_1973_split_sizes_0 = const()[name = string("op_1973_split_sizes_0"), val = tensor([256, 256])]; int32 var_1973_axis_0 = const()[name = string("op_1973_axis_0"), val = int32(-1)]; tensor var_1973_cast_fp16_0, tensor var_1973_cast_fp16_1 = split(axis = var_1973_axis_0, split_sizes = var_1973_split_sizes_0, x = normed_63_cast_fp16)[name = string("op_1973_cast_fp16")]; tensor var_1977_to_fp16 = const()[name = string("op_1977_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262565376)))]; tensor out_27_cast_fp16 = mul(x = var_1973_cast_fp16_0, y = var_1977_to_fp16)[name = string("out_27_cast_fp16")]; string var_1990_pad_type_0 = const()[name = string("op_1990_pad_type_0"), val = string("valid")]; tensor var_1990_strides_0 = const()[name = string("op_1990_strides_0"), val = tensor([1, 1])]; tensor var_1990_pad_0 = const()[name = string("op_1990_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1990_dilations_0 = const()[name = string("op_1990_dilations_0"), val = tensor([1, 1])]; int32 var_1990_groups_0 = const()[name = string("op_1990_groups_0"), val = int32(1)]; tensor var_1990 = conv(dilations = var_1990_dilations_0, groups = var_1990_groups_0, pad = var_1990_pad_0, pad_type = var_1990_pad_type_0, strides = var_1990_strides_0, weight = layers_2_self_attn_k_proj_weight_quantized, x = input_43)[name = string("op_1990")]; tensor var_1995 = const()[name = string("op_1995"), val = tensor([1, 1, 256, 32])]; tensor var_1996 = reshape(shape = var_1995, x = var_1990)[name = string("op_1996")]; tensor var_2001 = const()[name = string("op_2001"), val = tensor([0, 1, 3, 2])]; int32 var_2014 = const()[name = string("op_2014"), val = int32(-1)]; fp16 const_32_promoted_to_fp16 = const()[name = string("const_32_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_13 = transpose(perm = var_2001, x = var_1996)[name = string("transpose_127")]; tensor var_2020_cast_fp16 = mul(x = k_13, y = const_32_promoted_to_fp16)[name = string("op_2020_cast_fp16")]; bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)]; tensor input_47_cast_fp16 = concat(axis = var_2014, interleave = input_47_interleave_0, values = (k_13, var_2020_cast_fp16))[name = string("input_47_cast_fp16")]; tensor normed_67_axes_0 = const()[name = string("normed_67_axes_0"), val = tensor([-1])]; fp16 var_2012_to_fp16 = const()[name = string("op_2012_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_67_cast_fp16 = layer_norm(axes = normed_67_axes_0, epsilon = var_2012_to_fp16, x = input_47_cast_fp16)[name = string("normed_67_cast_fp16")]; tensor var_2025_split_sizes_0 = const()[name = string("op_2025_split_sizes_0"), val = tensor([256, 256])]; int32 var_2025_axis_0 = const()[name = string("op_2025_axis_0"), val = int32(-1)]; tensor var_2025_cast_fp16_0, tensor var_2025_cast_fp16_1 = split(axis = var_2025_axis_0, split_sizes = var_2025_split_sizes_0, x = normed_67_cast_fp16)[name = string("op_2025_cast_fp16")]; tensor var_2029_to_fp16 = const()[name = string("op_2029_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262565952)))]; tensor out_29_cast_fp16 = mul(x = var_2025_cast_fp16_0, y = var_2029_to_fp16)[name = string("out_29_cast_fp16")]; string var_2042_pad_type_0 = const()[name = string("op_2042_pad_type_0"), val = string("valid")]; tensor var_2042_strides_0 = const()[name = string("op_2042_strides_0"), val = tensor([1, 1])]; tensor var_2042_pad_0 = const()[name = string("op_2042_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2042_dilations_0 = const()[name = string("op_2042_dilations_0"), val = tensor([1, 1])]; int32 var_2042_groups_0 = const()[name = string("op_2042_groups_0"), val = int32(1)]; tensor var_2042 = conv(dilations = var_2042_dilations_0, groups = var_2042_groups_0, pad = var_2042_pad_0, pad_type = var_2042_pad_type_0, strides = var_2042_strides_0, weight = layers_2_self_attn_v_proj_weight_quantized, x = input_43)[name = string("op_2042")]; tensor var_2047 = const()[name = string("op_2047"), val = tensor([1, 1, 256, 32])]; tensor var_2048 = reshape(shape = var_2047, x = var_2042)[name = string("op_2048")]; tensor var_2055 = mul(x = out_27_cast_fp16, y = cos_1)[name = string("op_2055")]; tensor var_2056_split_sizes_0 = const()[name = string("op_2056_split_sizes_0"), val = tensor([128, 128])]; int32 var_2056_axis_0 = const()[name = string("op_2056_axis_0"), val = int32(-1)]; tensor var_2056_0, tensor var_2056_1 = split(axis = var_2056_axis_0, split_sizes = var_2056_split_sizes_0, x = out_27_cast_fp16)[name = string("op_2056")]; fp16 const_34_promoted = const()[name = string("const_34_promoted"), val = fp16(-0x1p+0)]; tensor var_2058 = mul(x = var_2056_1, y = const_34_promoted)[name = string("op_2058")]; int32 var_2060 = const()[name = string("op_2060"), val = int32(-1)]; bool var_2061_interleave_0 = const()[name = string("op_2061_interleave_0"), val = bool(false)]; tensor var_2061 = concat(axis = var_2060, interleave = var_2061_interleave_0, values = (var_2058, var_2056_0))[name = string("op_2061")]; tensor var_2062 = mul(x = var_2061, y = sin_1)[name = string("op_2062")]; tensor q_17 = add(x = var_2055, y = var_2062)[name = string("q_17")]; tensor var_2065 = mul(x = out_29_cast_fp16, y = cos_1)[name = string("op_2065")]; tensor var_2066_split_sizes_0 = const()[name = string("op_2066_split_sizes_0"), val = tensor([128, 128])]; int32 var_2066_axis_0 = const()[name = string("op_2066_axis_0"), val = int32(-1)]; tensor var_2066_0, tensor var_2066_1 = split(axis = var_2066_axis_0, split_sizes = var_2066_split_sizes_0, x = out_29_cast_fp16)[name = string("op_2066")]; fp16 const_35_promoted = const()[name = string("const_35_promoted"), val = fp16(-0x1p+0)]; tensor var_2068 = mul(x = var_2066_1, y = const_35_promoted)[name = string("op_2068")]; int32 var_2070 = const()[name = string("op_2070"), val = int32(-1)]; bool var_2071_interleave_0 = const()[name = string("op_2071_interleave_0"), val = bool(false)]; tensor var_2071 = concat(axis = var_2070, interleave = var_2071_interleave_0, values = (var_2068, var_2066_0))[name = string("op_2071")]; tensor var_2072 = mul(x = var_2071, y = sin_1)[name = string("op_2072")]; tensor k_17 = add(x = var_2065, y = var_2072)[name = string("k_17")]; tensor var_2077_begin_0 = const()[name = string("op_2077_begin_0"), val = tensor([2, 0, 0, 0])]; tensor var_2077_end_0 = const()[name = string("op_2077_end_0"), val = tensor([3, 1, 2048, 256])]; tensor var_2077_end_mask_0 = const()[name = string("op_2077_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2077_squeeze_mask_0 = const()[name = string("op_2077_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_2077_cast_fp16 = slice_by_index(begin = var_2077_begin_0, end = var_2077_end_0, end_mask = var_2077_end_mask_0, squeeze_mask = var_2077_squeeze_mask_0, x = coreml_update_state_39)[name = string("op_2077_cast_fp16")]; tensor K_cache_5_axes_0 = const()[name = string("K_cache_5_axes_0"), val = tensor([0])]; tensor K_cache_5_cast_fp16 = expand_dims(axes = K_cache_5_axes_0, x = var_2077_cast_fp16)[name = string("K_cache_5_cast_fp16")]; tensor var_2082_begin_0 = const()[name = string("op_2082_begin_0"), val = tensor([20, 0, 0, 0])]; tensor var_2082_end_0 = const()[name = string("op_2082_end_0"), val = tensor([21, 1, 2048, 256])]; tensor var_2082_end_mask_0 = const()[name = string("op_2082_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2082_squeeze_mask_0 = const()[name = string("op_2082_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_2082_cast_fp16 = slice_by_index(begin = var_2082_begin_0, end = var_2082_end_0, end_mask = var_2082_end_mask_0, squeeze_mask = var_2082_squeeze_mask_0, x = coreml_update_state_39)[name = string("op_2082_cast_fp16")]; tensor V_cache_5_axes_0 = const()[name = string("V_cache_5_axes_0"), val = tensor([0])]; tensor V_cache_5_cast_fp16 = expand_dims(axes = V_cache_5_axes_0, x = var_2082_cast_fp16)[name = string("V_cache_5_cast_fp16")]; bool k_increment_5_transpose_x_0 = const()[name = string("k_increment_5_transpose_x_0"), val = bool(false)]; bool k_increment_5_transpose_y_0 = const()[name = string("k_increment_5_transpose_y_0"), val = bool(false)]; tensor k_increment_5 = matmul(transpose_x = k_increment_5_transpose_x_0, transpose_y = k_increment_5_transpose_y_0, x = update_mask, y = k_17)[name = string("k_increment_5")]; bool v_increment_5_transpose_x_1 = const()[name = string("v_increment_5_transpose_x_1"), val = bool(false)]; bool v_increment_5_transpose_y_1 = const()[name = string("v_increment_5_transpose_y_1"), val = bool(true)]; tensor v_increment_5 = matmul(transpose_x = v_increment_5_transpose_x_1, transpose_y = v_increment_5_transpose_y_1, x = update_mask, y = var_2048)[name = string("v_increment_5")]; tensor var_2100_cast_fp16 = mul(x = K_cache_5_cast_fp16, y = var_1125_cast_fp16)[name = string("op_2100_cast_fp16")]; tensor K_new_5_cast_fp16 = add(x = var_2100_cast_fp16, y = k_increment_5)[name = string("K_new_5_cast_fp16")]; tensor var_2106_cast_fp16 = mul(x = V_cache_5_cast_fp16, y = var_1125_cast_fp16)[name = string("op_2106_cast_fp16")]; tensor V_new_5_cast_fp16 = add(x = var_2106_cast_fp16, y = v_increment_5)[name = string("V_new_5_cast_fp16")]; tensor var_2110_axes_0 = const()[name = string("op_2110_axes_0"), val = tensor([0])]; tensor var_2110_cast_fp16 = squeeze(axes = var_2110_axes_0, x = K_new_5_cast_fp16)[name = string("op_2110_cast_fp16")]; tensor concat_8 = const()[name = string("concat_8"), val = tensor([2, 0, 0, 0])]; tensor concat_9 = const()[name = string("concat_9"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_8, begin_mask = kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_9, end_mask = kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_5_stride_0, update = var_2110_cast_fp16, x = coreml_update_state_39)[name = string("kv_cache_0_internal_tensor_assign_5_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_5_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_40_write_state")]; tensor coreml_update_state_40 = read_state(input = kv_cache_0)[name = string("coreml_update_state_40")]; tensor var_2117_axes_0 = const()[name = string("op_2117_axes_0"), val = tensor([0])]; tensor var_2117_cast_fp16 = squeeze(axes = var_2117_axes_0, x = V_new_5_cast_fp16)[name = string("op_2117_cast_fp16")]; tensor concat_10 = const()[name = string("concat_10"), val = tensor([20, 0, 0, 0])]; tensor concat_11 = const()[name = string("concat_11"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_10, begin_mask = kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_11, end_mask = kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_6_stride_0, update = var_2117_cast_fp16, x = coreml_update_state_40)[name = string("kv_cache_0_internal_tensor_assign_6_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_6_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_41_write_state")]; tensor coreml_update_state_41 = read_state(input = kv_cache_0)[name = string("coreml_update_state_41")]; tensor hidden_states_17_axes_0 = const()[name = string("hidden_states_17_axes_0"), val = tensor([2])]; tensor hidden_states_17_cast_fp16 = expand_dims(axes = hidden_states_17_axes_0, x = K_new_5_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; tensor var_2130 = const()[name = string("op_2130"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_19_cast_fp16 = tile(reps = var_2130, x = hidden_states_17_cast_fp16)[name = string("hidden_states_19_cast_fp16")]; tensor var_2136 = const()[name = string("op_2136"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_5_cast_fp16 = reshape(shape = var_2136, x = hidden_states_19_cast_fp16)[name = string("K_expanded_5_cast_fp16")]; tensor hidden_states_21_axes_0 = const()[name = string("hidden_states_21_axes_0"), val = tensor([2])]; tensor hidden_states_21_cast_fp16 = expand_dims(axes = hidden_states_21_axes_0, x = V_new_5_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; tensor var_2145 = const()[name = string("op_2145"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_23_cast_fp16 = tile(reps = var_2145, x = hidden_states_21_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; tensor var_2151 = const()[name = string("op_2151"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_5_cast_fp16 = reshape(shape = var_2151, x = hidden_states_23_cast_fp16)[name = string("V_expanded_5_cast_fp16")]; bool var_2166_transpose_x_1 = const()[name = string("op_2166_transpose_x_1"), val = bool(false)]; bool var_2166_transpose_y_1 = const()[name = string("op_2166_transpose_y_1"), val = bool(true)]; tensor var_2166_cast_fp16 = matmul(transpose_x = var_2166_transpose_x_1, transpose_y = var_2166_transpose_y_1, x = q_17, y = K_expanded_5_cast_fp16)[name = string("op_2166_cast_fp16")]; fp16 var_2167_to_fp16 = const()[name = string("op_2167_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_13_cast_fp16 = mul(x = var_2166_cast_fp16, y = var_2167_to_fp16)[name = string("attn_weights_13_cast_fp16")]; tensor attn_weights_15_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("attn_weights_15_cast_fp16")]; int32 var_2176 = const()[name = string("op_2176"), val = int32(-1)]; tensor var_2178_cast_fp16 = softmax(axis = var_2176, x = attn_weights_15_cast_fp16)[name = string("op_2178_cast_fp16")]; bool var_2194_transpose_x_0 = const()[name = string("op_2194_transpose_x_0"), val = bool(false)]; bool var_2194_transpose_y_0 = const()[name = string("op_2194_transpose_y_0"), val = bool(false)]; tensor var_2194_cast_fp16 = matmul(transpose_x = var_2194_transpose_x_0, transpose_y = var_2194_transpose_y_0, x = var_2178_cast_fp16, y = V_expanded_5_cast_fp16)[name = string("op_2194_cast_fp16")]; tensor var_2204 = const()[name = string("op_2204"), val = tensor([0, 2, 1, 3])]; tensor var_2211 = const()[name = string("op_2211"), val = tensor([1, 32, 1024])]; tensor var_2205 = transpose(perm = var_2204, x = var_2194_cast_fp16)[name = string("transpose_126")]; tensor attn_output_15 = reshape(shape = var_2211, x = var_2205)[name = string("attn_output_15")]; tensor var_2216 = const()[name = string("op_2216"), val = tensor([0, 2, 1])]; tensor squeeze_2_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262566528))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263221952))))[name = string("squeeze_2_quantized")]; string var_2232_pad_type_0 = const()[name = string("op_2232_pad_type_0"), val = string("valid")]; int32 var_2232_groups_0 = const()[name = string("op_2232_groups_0"), val = int32(1)]; tensor var_2232_strides_0 = const()[name = string("op_2232_strides_0"), val = tensor([1])]; tensor var_2232_pad_0 = const()[name = string("op_2232_pad_0"), val = tensor([0, 0])]; tensor var_2232_dilations_0 = const()[name = string("op_2232_dilations_0"), val = tensor([1])]; tensor var_2217 = transpose(perm = var_2216, x = attn_output_15)[name = string("transpose_125")]; tensor var_2232 = conv(dilations = var_2232_dilations_0, groups = var_2232_groups_0, pad = var_2232_pad_0, pad_type = var_2232_pad_type_0, strides = var_2232_strides_0, weight = squeeze_2_quantized, x = var_2217)[name = string("op_2232")]; tensor var_2236 = const()[name = string("op_2236"), val = tensor([0, 2, 1])]; int32 var_2243 = const()[name = string("op_2243"), val = int32(-1)]; fp16 const_36_promoted_to_fp16 = const()[name = string("const_36_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_41 = transpose(perm = var_2236, x = var_2232)[name = string("transpose_124")]; tensor var_2249_cast_fp16 = mul(x = x_41, y = const_36_promoted_to_fp16)[name = string("op_2249_cast_fp16")]; bool input_51_interleave_0 = const()[name = string("input_51_interleave_0"), val = bool(false)]; tensor input_51_cast_fp16 = concat(axis = var_2243, interleave = input_51_interleave_0, values = (x_41, var_2249_cast_fp16))[name = string("input_51_cast_fp16")]; tensor normed_71_axes_0 = const()[name = string("normed_71_axes_0"), val = tensor([-1])]; fp16 var_2241_to_fp16 = const()[name = string("op_2241_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_71_cast_fp16 = layer_norm(axes = normed_71_axes_0, epsilon = var_2241_to_fp16, x = input_51_cast_fp16)[name = string("normed_71_cast_fp16")]; tensor var_2254_split_sizes_0 = const()[name = string("op_2254_split_sizes_0"), val = tensor([640, 640])]; int32 var_2254_axis_0 = const()[name = string("op_2254_axis_0"), val = int32(-1)]; tensor var_2254_cast_fp16_0, tensor var_2254_cast_fp16_1 = split(axis = var_2254_axis_0, split_sizes = var_2254_split_sizes_0, x = normed_71_cast_fp16)[name = string("op_2254_cast_fp16")]; tensor var_2258_to_fp16 = const()[name = string("op_2258_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263223296)))]; tensor out_31_cast_fp16 = mul(x = var_2254_cast_fp16_0, y = var_2258_to_fp16)[name = string("out_31_cast_fp16")]; tensor x_43_cast_fp16 = add(x = x_33_cast_fp16, y = out_31_cast_fp16)[name = string("x_43_cast_fp16")]; int32 var_2272 = const()[name = string("op_2272"), val = int32(-1)]; fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2278_cast_fp16 = mul(x = x_43_cast_fp16, y = const_38_promoted_to_fp16)[name = string("op_2278_cast_fp16")]; bool input_53_interleave_0 = const()[name = string("input_53_interleave_0"), val = bool(false)]; tensor input_53_cast_fp16 = concat(axis = var_2272, interleave = input_53_interleave_0, values = (x_43_cast_fp16, var_2278_cast_fp16))[name = string("input_53_cast_fp16")]; tensor normed_75_axes_0 = const()[name = string("normed_75_axes_0"), val = tensor([-1])]; fp16 var_2270_to_fp16 = const()[name = string("op_2270_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_75_cast_fp16 = layer_norm(axes = normed_75_axes_0, epsilon = var_2270_to_fp16, x = input_53_cast_fp16)[name = string("normed_75_cast_fp16")]; tensor var_2283_split_sizes_0 = const()[name = string("op_2283_split_sizes_0"), val = tensor([640, 640])]; int32 var_2283_axis_0 = const()[name = string("op_2283_axis_0"), val = int32(-1)]; tensor var_2283_cast_fp16_0, tensor var_2283_cast_fp16_1 = split(axis = var_2283_axis_0, split_sizes = var_2283_split_sizes_0, x = normed_75_cast_fp16)[name = string("op_2283_cast_fp16")]; tensor var_2287_to_fp16 = const()[name = string("op_2287_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263224640)))]; tensor out_33_cast_fp16 = mul(x = var_2283_cast_fp16_0, y = var_2287_to_fp16)[name = string("out_33_cast_fp16")]; tensor var_2301 = const()[name = string("op_2301"), val = tensor([0, 2, 1])]; tensor input_55_axes_0 = const()[name = string("input_55_axes_0"), val = tensor([2])]; tensor var_2302 = transpose(perm = var_2301, x = out_33_cast_fp16)[name = string("transpose_123")]; tensor input_55 = expand_dims(axes = input_55_axes_0, x = var_2302)[name = string("input_55")]; string gate_9_pad_type_0 = const()[name = string("gate_9_pad_type_0"), val = string("valid")]; tensor gate_9_strides_0 = const()[name = string("gate_9_strides_0"), val = tensor([1, 1])]; tensor gate_9_pad_0 = const()[name = string("gate_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_9_dilations_0 = const()[name = string("gate_9_dilations_0"), val = tensor([1, 1])]; int32 gate_9_groups_0 = const()[name = string("gate_9_groups_0"), val = int32(1)]; tensor gate_9 = conv(dilations = gate_9_dilations_0, groups = gate_9_groups_0, pad = gate_9_pad_0, pad_type = gate_9_pad_type_0, strides = gate_9_strides_0, weight = layers_2_mlp_gate_proj_weight_quantized, x = input_55)[name = string("gate_9")]; string up_5_pad_type_0 = const()[name = string("up_5_pad_type_0"), val = string("valid")]; tensor up_5_strides_0 = const()[name = string("up_5_strides_0"), val = tensor([1, 1])]; tensor up_5_pad_0 = const()[name = string("up_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_5_dilations_0 = const()[name = string("up_5_dilations_0"), val = tensor([1, 1])]; int32 up_5_groups_0 = const()[name = string("up_5_groups_0"), val = int32(1)]; tensor up_5 = conv(dilations = up_5_dilations_0, groups = up_5_groups_0, pad = up_5_pad_0, pad_type = up_5_pad_type_0, strides = up_5_strides_0, weight = layers_2_mlp_up_proj_weight_quantized, x = input_55)[name = string("up_5")]; string gate_11_mode_0 = const()[name = string("gate_11_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_11 = gelu(mode = gate_11_mode_0, x = gate_9)[name = string("gate_11")]; tensor input_57 = mul(x = gate_11, y = up_5)[name = string("input_57")]; string var_2340_pad_type_0 = const()[name = string("op_2340_pad_type_0"), val = string("valid")]; tensor var_2340_strides_0 = const()[name = string("op_2340_strides_0"), val = tensor([1, 1])]; tensor var_2340_pad_0 = const()[name = string("op_2340_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2340_dilations_0 = const()[name = string("op_2340_dilations_0"), val = tensor([1, 1])]; int32 var_2340_groups_0 = const()[name = string("op_2340_groups_0"), val = int32(1)]; tensor var_2340 = conv(dilations = var_2340_dilations_0, groups = var_2340_groups_0, pad = var_2340_pad_0, pad_type = var_2340_pad_type_0, strides = var_2340_strides_0, weight = layers_2_mlp_down_proj_weight_quantized, x = input_57)[name = string("op_2340")]; tensor var_2342_axes_0 = const()[name = string("op_2342_axes_0"), val = tensor([2])]; tensor var_2342 = squeeze(axes = var_2342_axes_0, x = var_2340)[name = string("op_2342")]; tensor var_2346 = const()[name = string("op_2346"), val = tensor([0, 2, 1])]; int32 var_2353 = const()[name = string("op_2353"), val = int32(-1)]; fp16 const_40_promoted_to_fp16 = const()[name = string("const_40_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_47 = transpose(perm = var_2346, x = var_2342)[name = string("transpose_122")]; tensor var_2359_cast_fp16 = mul(x = x_47, y = const_40_promoted_to_fp16)[name = string("op_2359_cast_fp16")]; bool input_59_interleave_0 = const()[name = string("input_59_interleave_0"), val = bool(false)]; tensor input_59_cast_fp16 = concat(axis = var_2353, interleave = input_59_interleave_0, values = (x_47, var_2359_cast_fp16))[name = string("input_59_cast_fp16")]; tensor normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor([-1])]; fp16 var_2351_to_fp16 = const()[name = string("op_2351_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_2351_to_fp16, x = input_59_cast_fp16)[name = string("normed_81_cast_fp16")]; tensor var_2364_split_sizes_0 = const()[name = string("op_2364_split_sizes_0"), val = tensor([640, 640])]; int32 var_2364_axis_0 = const()[name = string("op_2364_axis_0"), val = int32(-1)]; tensor var_2364_cast_fp16_0, tensor var_2364_cast_fp16_1 = split(axis = var_2364_axis_0, split_sizes = var_2364_split_sizes_0, x = normed_81_cast_fp16)[name = string("op_2364_cast_fp16")]; tensor var_2368_to_fp16 = const()[name = string("op_2368_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263225984)))]; tensor out_35_cast_fp16 = mul(x = var_2364_cast_fp16_0, y = var_2368_to_fp16)[name = string("out_35_cast_fp16")]; tensor x_49_cast_fp16 = add(x = x_43_cast_fp16, y = out_35_cast_fp16)[name = string("x_49_cast_fp16")]; int32 var_2382 = const()[name = string("op_2382"), val = int32(-1)]; fp16 const_42_promoted_to_fp16 = const()[name = string("const_42_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2388_cast_fp16 = mul(x = x_49_cast_fp16, y = const_42_promoted_to_fp16)[name = string("op_2388_cast_fp16")]; bool input_61_interleave_0 = const()[name = string("input_61_interleave_0"), val = bool(false)]; tensor input_61_cast_fp16 = concat(axis = var_2382, interleave = input_61_interleave_0, values = (x_49_cast_fp16, var_2388_cast_fp16))[name = string("input_61_cast_fp16")]; tensor normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor([-1])]; fp16 var_2380_to_fp16 = const()[name = string("op_2380_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_2380_to_fp16, x = input_61_cast_fp16)[name = string("normed_85_cast_fp16")]; tensor var_2393_split_sizes_0 = const()[name = string("op_2393_split_sizes_0"), val = tensor([640, 640])]; int32 var_2393_axis_0 = const()[name = string("op_2393_axis_0"), val = int32(-1)]; tensor var_2393_cast_fp16_0, tensor var_2393_cast_fp16_1 = split(axis = var_2393_axis_0, split_sizes = var_2393_split_sizes_0, x = normed_85_cast_fp16)[name = string("op_2393_cast_fp16")]; tensor var_2397_to_fp16 = const()[name = string("op_2397_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263227328)))]; tensor out_37_cast_fp16 = mul(x = var_2393_cast_fp16_0, y = var_2397_to_fp16)[name = string("out_37_cast_fp16")]; tensor var_2411 = const()[name = string("op_2411"), val = tensor([0, 2, 1])]; tensor input_63_axes_0 = const()[name = string("input_63_axes_0"), val = tensor([2])]; tensor var_2412 = transpose(perm = var_2411, x = out_37_cast_fp16)[name = string("transpose_121")]; tensor input_63 = expand_dims(axes = input_63_axes_0, x = var_2412)[name = string("input_63")]; string var_2425_pad_type_0 = const()[name = string("op_2425_pad_type_0"), val = string("valid")]; tensor var_2425_strides_0 = const()[name = string("op_2425_strides_0"), val = tensor([1, 1])]; tensor var_2425_pad_0 = const()[name = string("op_2425_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2425_dilations_0 = const()[name = string("op_2425_dilations_0"), val = tensor([1, 1])]; int32 var_2425_groups_0 = const()[name = string("op_2425_groups_0"), val = int32(1)]; tensor var_2425 = conv(dilations = var_2425_dilations_0, groups = var_2425_groups_0, pad = var_2425_pad_0, pad_type = var_2425_pad_type_0, strides = var_2425_strides_0, weight = layers_3_self_attn_q_proj_weight_quantized, x = input_63)[name = string("op_2425")]; tensor var_2430 = const()[name = string("op_2430"), val = tensor([1, 4, 256, 32])]; tensor var_2431 = reshape(shape = var_2430, x = var_2425)[name = string("op_2431")]; tensor var_2436 = const()[name = string("op_2436"), val = tensor([0, 1, 3, 2])]; int32 var_2449 = const()[name = string("op_2449"), val = int32(-1)]; fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_19 = transpose(perm = var_2436, x = var_2431)[name = string("transpose_120")]; tensor var_2455_cast_fp16 = mul(x = q_19, y = const_44_promoted_to_fp16)[name = string("op_2455_cast_fp16")]; bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)]; tensor input_65_cast_fp16 = concat(axis = var_2449, interleave = input_65_interleave_0, values = (q_19, var_2455_cast_fp16))[name = string("input_65_cast_fp16")]; tensor normed_91_axes_0 = const()[name = string("normed_91_axes_0"), val = tensor([-1])]; fp16 var_2447_to_fp16 = const()[name = string("op_2447_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_91_cast_fp16 = layer_norm(axes = normed_91_axes_0, epsilon = var_2447_to_fp16, x = input_65_cast_fp16)[name = string("normed_91_cast_fp16")]; tensor var_2460_split_sizes_0 = const()[name = string("op_2460_split_sizes_0"), val = tensor([256, 256])]; int32 var_2460_axis_0 = const()[name = string("op_2460_axis_0"), val = int32(-1)]; tensor var_2460_cast_fp16_0, tensor var_2460_cast_fp16_1 = split(axis = var_2460_axis_0, split_sizes = var_2460_split_sizes_0, x = normed_91_cast_fp16)[name = string("op_2460_cast_fp16")]; tensor var_2464_to_fp16 = const()[name = string("op_2464_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263228672)))]; tensor out_39_cast_fp16 = mul(x = var_2460_cast_fp16_0, y = var_2464_to_fp16)[name = string("out_39_cast_fp16")]; string var_2477_pad_type_0 = const()[name = string("op_2477_pad_type_0"), val = string("valid")]; tensor var_2477_strides_0 = const()[name = string("op_2477_strides_0"), val = tensor([1, 1])]; tensor var_2477_pad_0 = const()[name = string("op_2477_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2477_dilations_0 = const()[name = string("op_2477_dilations_0"), val = tensor([1, 1])]; int32 var_2477_groups_0 = const()[name = string("op_2477_groups_0"), val = int32(1)]; tensor var_2477 = conv(dilations = var_2477_dilations_0, groups = var_2477_groups_0, pad = var_2477_pad_0, pad_type = var_2477_pad_type_0, strides = var_2477_strides_0, weight = layers_3_self_attn_k_proj_weight_quantized, x = input_63)[name = string("op_2477")]; tensor var_2482 = const()[name = string("op_2482"), val = tensor([1, 1, 256, 32])]; tensor var_2483 = reshape(shape = var_2482, x = var_2477)[name = string("op_2483")]; tensor var_2488 = const()[name = string("op_2488"), val = tensor([0, 1, 3, 2])]; int32 var_2501 = const()[name = string("op_2501"), val = int32(-1)]; fp16 const_46_promoted_to_fp16 = const()[name = string("const_46_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_19 = transpose(perm = var_2488, x = var_2483)[name = string("transpose_119")]; tensor var_2507_cast_fp16 = mul(x = k_19, y = const_46_promoted_to_fp16)[name = string("op_2507_cast_fp16")]; bool input_67_interleave_0 = const()[name = string("input_67_interleave_0"), val = bool(false)]; tensor input_67_cast_fp16 = concat(axis = var_2501, interleave = input_67_interleave_0, values = (k_19, var_2507_cast_fp16))[name = string("input_67_cast_fp16")]; tensor normed_95_axes_0 = const()[name = string("normed_95_axes_0"), val = tensor([-1])]; fp16 var_2499_to_fp16 = const()[name = string("op_2499_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_95_cast_fp16 = layer_norm(axes = normed_95_axes_0, epsilon = var_2499_to_fp16, x = input_67_cast_fp16)[name = string("normed_95_cast_fp16")]; tensor var_2512_split_sizes_0 = const()[name = string("op_2512_split_sizes_0"), val = tensor([256, 256])]; int32 var_2512_axis_0 = const()[name = string("op_2512_axis_0"), val = int32(-1)]; tensor var_2512_cast_fp16_0, tensor var_2512_cast_fp16_1 = split(axis = var_2512_axis_0, split_sizes = var_2512_split_sizes_0, x = normed_95_cast_fp16)[name = string("op_2512_cast_fp16")]; tensor var_2516_to_fp16 = const()[name = string("op_2516_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263229248)))]; tensor out_41_cast_fp16 = mul(x = var_2512_cast_fp16_0, y = var_2516_to_fp16)[name = string("out_41_cast_fp16")]; string var_2529_pad_type_0 = const()[name = string("op_2529_pad_type_0"), val = string("valid")]; tensor var_2529_strides_0 = const()[name = string("op_2529_strides_0"), val = tensor([1, 1])]; tensor var_2529_pad_0 = const()[name = string("op_2529_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2529_dilations_0 = const()[name = string("op_2529_dilations_0"), val = tensor([1, 1])]; int32 var_2529_groups_0 = const()[name = string("op_2529_groups_0"), val = int32(1)]; tensor var_2529 = conv(dilations = var_2529_dilations_0, groups = var_2529_groups_0, pad = var_2529_pad_0, pad_type = var_2529_pad_type_0, strides = var_2529_strides_0, weight = layers_3_self_attn_v_proj_weight_quantized, x = input_63)[name = string("op_2529")]; tensor var_2534 = const()[name = string("op_2534"), val = tensor([1, 1, 256, 32])]; tensor var_2535 = reshape(shape = var_2534, x = var_2529)[name = string("op_2535")]; tensor var_2542 = mul(x = out_39_cast_fp16, y = cos_1)[name = string("op_2542")]; tensor var_2543_split_sizes_0 = const()[name = string("op_2543_split_sizes_0"), val = tensor([128, 128])]; int32 var_2543_axis_0 = const()[name = string("op_2543_axis_0"), val = int32(-1)]; tensor var_2543_0, tensor var_2543_1 = split(axis = var_2543_axis_0, split_sizes = var_2543_split_sizes_0, x = out_39_cast_fp16)[name = string("op_2543")]; fp16 const_48_promoted = const()[name = string("const_48_promoted"), val = fp16(-0x1p+0)]; tensor var_2545 = mul(x = var_2543_1, y = const_48_promoted)[name = string("op_2545")]; int32 var_2547 = const()[name = string("op_2547"), val = int32(-1)]; bool var_2548_interleave_0 = const()[name = string("op_2548_interleave_0"), val = bool(false)]; tensor var_2548 = concat(axis = var_2547, interleave = var_2548_interleave_0, values = (var_2545, var_2543_0))[name = string("op_2548")]; tensor var_2549 = mul(x = var_2548, y = sin_1)[name = string("op_2549")]; tensor q_23 = add(x = var_2542, y = var_2549)[name = string("q_23")]; tensor var_2552 = mul(x = out_41_cast_fp16, y = cos_1)[name = string("op_2552")]; tensor var_2553_split_sizes_0 = const()[name = string("op_2553_split_sizes_0"), val = tensor([128, 128])]; int32 var_2553_axis_0 = const()[name = string("op_2553_axis_0"), val = int32(-1)]; tensor var_2553_0, tensor var_2553_1 = split(axis = var_2553_axis_0, split_sizes = var_2553_split_sizes_0, x = out_41_cast_fp16)[name = string("op_2553")]; fp16 const_49_promoted = const()[name = string("const_49_promoted"), val = fp16(-0x1p+0)]; tensor var_2555 = mul(x = var_2553_1, y = const_49_promoted)[name = string("op_2555")]; int32 var_2557 = const()[name = string("op_2557"), val = int32(-1)]; bool var_2558_interleave_0 = const()[name = string("op_2558_interleave_0"), val = bool(false)]; tensor var_2558 = concat(axis = var_2557, interleave = var_2558_interleave_0, values = (var_2555, var_2553_0))[name = string("op_2558")]; tensor var_2559 = mul(x = var_2558, y = sin_1)[name = string("op_2559")]; tensor k_23 = add(x = var_2552, y = var_2559)[name = string("k_23")]; tensor var_2564_begin_0 = const()[name = string("op_2564_begin_0"), val = tensor([3, 0, 0, 0])]; tensor var_2564_end_0 = const()[name = string("op_2564_end_0"), val = tensor([4, 1, 2048, 256])]; tensor var_2564_end_mask_0 = const()[name = string("op_2564_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2564_squeeze_mask_0 = const()[name = string("op_2564_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_2564_cast_fp16 = slice_by_index(begin = var_2564_begin_0, end = var_2564_end_0, end_mask = var_2564_end_mask_0, squeeze_mask = var_2564_squeeze_mask_0, x = coreml_update_state_41)[name = string("op_2564_cast_fp16")]; tensor K_cache_7_axes_0 = const()[name = string("K_cache_7_axes_0"), val = tensor([0])]; tensor K_cache_7_cast_fp16 = expand_dims(axes = K_cache_7_axes_0, x = var_2564_cast_fp16)[name = string("K_cache_7_cast_fp16")]; tensor var_2569_begin_0 = const()[name = string("op_2569_begin_0"), val = tensor([21, 0, 0, 0])]; tensor var_2569_end_0 = const()[name = string("op_2569_end_0"), val = tensor([22, 1, 2048, 256])]; tensor var_2569_end_mask_0 = const()[name = string("op_2569_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_2569_squeeze_mask_0 = const()[name = string("op_2569_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_2569_cast_fp16 = slice_by_index(begin = var_2569_begin_0, end = var_2569_end_0, end_mask = var_2569_end_mask_0, squeeze_mask = var_2569_squeeze_mask_0, x = coreml_update_state_41)[name = string("op_2569_cast_fp16")]; tensor V_cache_7_axes_0 = const()[name = string("V_cache_7_axes_0"), val = tensor([0])]; tensor V_cache_7_cast_fp16 = expand_dims(axes = V_cache_7_axes_0, x = var_2569_cast_fp16)[name = string("V_cache_7_cast_fp16")]; bool k_increment_7_transpose_x_0 = const()[name = string("k_increment_7_transpose_x_0"), val = bool(false)]; bool k_increment_7_transpose_y_0 = const()[name = string("k_increment_7_transpose_y_0"), val = bool(false)]; tensor k_increment_7 = matmul(transpose_x = k_increment_7_transpose_x_0, transpose_y = k_increment_7_transpose_y_0, x = update_mask, y = k_23)[name = string("k_increment_7")]; bool v_increment_7_transpose_x_1 = const()[name = string("v_increment_7_transpose_x_1"), val = bool(false)]; bool v_increment_7_transpose_y_1 = const()[name = string("v_increment_7_transpose_y_1"), val = bool(true)]; tensor v_increment_7 = matmul(transpose_x = v_increment_7_transpose_x_1, transpose_y = v_increment_7_transpose_y_1, x = update_mask, y = var_2535)[name = string("v_increment_7")]; tensor var_2587_cast_fp16 = mul(x = K_cache_7_cast_fp16, y = var_1125_cast_fp16)[name = string("op_2587_cast_fp16")]; tensor K_new_7_cast_fp16 = add(x = var_2587_cast_fp16, y = k_increment_7)[name = string("K_new_7_cast_fp16")]; tensor var_2593_cast_fp16 = mul(x = V_cache_7_cast_fp16, y = var_1125_cast_fp16)[name = string("op_2593_cast_fp16")]; tensor V_new_7_cast_fp16 = add(x = var_2593_cast_fp16, y = v_increment_7)[name = string("V_new_7_cast_fp16")]; tensor var_2597_axes_0 = const()[name = string("op_2597_axes_0"), val = tensor([0])]; tensor var_2597_cast_fp16 = squeeze(axes = var_2597_axes_0, x = K_new_7_cast_fp16)[name = string("op_2597_cast_fp16")]; tensor concat_12 = const()[name = string("concat_12"), val = tensor([3, 0, 0, 0])]; tensor concat_13 = const()[name = string("concat_13"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_12, begin_mask = kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_13, end_mask = kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_7_stride_0, update = var_2597_cast_fp16, x = coreml_update_state_41)[name = string("kv_cache_0_internal_tensor_assign_7_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_7_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_42_write_state")]; tensor coreml_update_state_42 = read_state(input = kv_cache_0)[name = string("coreml_update_state_42")]; tensor var_2604_axes_0 = const()[name = string("op_2604_axes_0"), val = tensor([0])]; tensor var_2604_cast_fp16 = squeeze(axes = var_2604_axes_0, x = V_new_7_cast_fp16)[name = string("op_2604_cast_fp16")]; tensor concat_14 = const()[name = string("concat_14"), val = tensor([21, 0, 0, 0])]; tensor concat_15 = const()[name = string("concat_15"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_14, begin_mask = kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_15, end_mask = kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_8_stride_0, update = var_2604_cast_fp16, x = coreml_update_state_42)[name = string("kv_cache_0_internal_tensor_assign_8_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_8_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_43_write_state")]; tensor coreml_update_state_43 = read_state(input = kv_cache_0)[name = string("coreml_update_state_43")]; tensor hidden_states_25_axes_0 = const()[name = string("hidden_states_25_axes_0"), val = tensor([2])]; tensor hidden_states_25_cast_fp16 = expand_dims(axes = hidden_states_25_axes_0, x = K_new_7_cast_fp16)[name = string("hidden_states_25_cast_fp16")]; tensor var_2617 = const()[name = string("op_2617"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_27_cast_fp16 = tile(reps = var_2617, x = hidden_states_25_cast_fp16)[name = string("hidden_states_27_cast_fp16")]; tensor var_2623 = const()[name = string("op_2623"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_7_cast_fp16 = reshape(shape = var_2623, x = hidden_states_27_cast_fp16)[name = string("K_expanded_7_cast_fp16")]; tensor hidden_states_29_axes_0 = const()[name = string("hidden_states_29_axes_0"), val = tensor([2])]; tensor hidden_states_29_cast_fp16 = expand_dims(axes = hidden_states_29_axes_0, x = V_new_7_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; tensor var_2632 = const()[name = string("op_2632"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_31_cast_fp16 = tile(reps = var_2632, x = hidden_states_29_cast_fp16)[name = string("hidden_states_31_cast_fp16")]; tensor var_2638 = const()[name = string("op_2638"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_7_cast_fp16 = reshape(shape = var_2638, x = hidden_states_31_cast_fp16)[name = string("V_expanded_7_cast_fp16")]; bool var_2653_transpose_x_1 = const()[name = string("op_2653_transpose_x_1"), val = bool(false)]; bool var_2653_transpose_y_1 = const()[name = string("op_2653_transpose_y_1"), val = bool(true)]; tensor var_2653_cast_fp16 = matmul(transpose_x = var_2653_transpose_x_1, transpose_y = var_2653_transpose_y_1, x = q_23, y = K_expanded_7_cast_fp16)[name = string("op_2653_cast_fp16")]; fp16 var_2654_to_fp16 = const()[name = string("op_2654_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_19_cast_fp16 = mul(x = var_2653_cast_fp16, y = var_2654_to_fp16)[name = string("attn_weights_19_cast_fp16")]; tensor attn_weights_21_cast_fp16 = add(x = attn_weights_19_cast_fp16, y = causal_mask)[name = string("attn_weights_21_cast_fp16")]; int32 var_2663 = const()[name = string("op_2663"), val = int32(-1)]; tensor var_2665_cast_fp16 = softmax(axis = var_2663, x = attn_weights_21_cast_fp16)[name = string("op_2665_cast_fp16")]; bool var_2681_transpose_x_0 = const()[name = string("op_2681_transpose_x_0"), val = bool(false)]; bool var_2681_transpose_y_0 = const()[name = string("op_2681_transpose_y_0"), val = bool(false)]; tensor var_2681_cast_fp16 = matmul(transpose_x = var_2681_transpose_x_0, transpose_y = var_2681_transpose_y_0, x = var_2665_cast_fp16, y = V_expanded_7_cast_fp16)[name = string("op_2681_cast_fp16")]; tensor var_2691 = const()[name = string("op_2691"), val = tensor([0, 2, 1, 3])]; tensor var_2698 = const()[name = string("op_2698"), val = tensor([1, 32, 1024])]; tensor var_2692 = transpose(perm = var_2691, x = var_2681_cast_fp16)[name = string("transpose_118")]; tensor attn_output_21 = reshape(shape = var_2698, x = var_2692)[name = string("attn_output_21")]; tensor var_2703 = const()[name = string("op_2703"), val = tensor([0, 2, 1])]; tensor squeeze_3_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263229824))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263885248))))[name = string("squeeze_3_quantized")]; string var_2719_pad_type_0 = const()[name = string("op_2719_pad_type_0"), val = string("valid")]; int32 var_2719_groups_0 = const()[name = string("op_2719_groups_0"), val = int32(1)]; tensor var_2719_strides_0 = const()[name = string("op_2719_strides_0"), val = tensor([1])]; tensor var_2719_pad_0 = const()[name = string("op_2719_pad_0"), val = tensor([0, 0])]; tensor var_2719_dilations_0 = const()[name = string("op_2719_dilations_0"), val = tensor([1])]; tensor var_2704 = transpose(perm = var_2703, x = attn_output_21)[name = string("transpose_117")]; tensor var_2719 = conv(dilations = var_2719_dilations_0, groups = var_2719_groups_0, pad = var_2719_pad_0, pad_type = var_2719_pad_type_0, strides = var_2719_strides_0, weight = squeeze_3_quantized, x = var_2704)[name = string("op_2719")]; tensor var_2723 = const()[name = string("op_2723"), val = tensor([0, 2, 1])]; int32 var_2730 = const()[name = string("op_2730"), val = int32(-1)]; fp16 const_50_promoted_to_fp16 = const()[name = string("const_50_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_57 = transpose(perm = var_2723, x = var_2719)[name = string("transpose_116")]; tensor var_2736_cast_fp16 = mul(x = x_57, y = const_50_promoted_to_fp16)[name = string("op_2736_cast_fp16")]; bool input_71_interleave_0 = const()[name = string("input_71_interleave_0"), val = bool(false)]; tensor input_71_cast_fp16 = concat(axis = var_2730, interleave = input_71_interleave_0, values = (x_57, var_2736_cast_fp16))[name = string("input_71_cast_fp16")]; tensor normed_99_axes_0 = const()[name = string("normed_99_axes_0"), val = tensor([-1])]; fp16 var_2728_to_fp16 = const()[name = string("op_2728_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_99_cast_fp16 = layer_norm(axes = normed_99_axes_0, epsilon = var_2728_to_fp16, x = input_71_cast_fp16)[name = string("normed_99_cast_fp16")]; tensor var_2741_split_sizes_0 = const()[name = string("op_2741_split_sizes_0"), val = tensor([640, 640])]; int32 var_2741_axis_0 = const()[name = string("op_2741_axis_0"), val = int32(-1)]; tensor var_2741_cast_fp16_0, tensor var_2741_cast_fp16_1 = split(axis = var_2741_axis_0, split_sizes = var_2741_split_sizes_0, x = normed_99_cast_fp16)[name = string("op_2741_cast_fp16")]; tensor var_2745_to_fp16 = const()[name = string("op_2745_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263886592)))]; tensor out_43_cast_fp16 = mul(x = var_2741_cast_fp16_0, y = var_2745_to_fp16)[name = string("out_43_cast_fp16")]; tensor x_59_cast_fp16 = add(x = x_49_cast_fp16, y = out_43_cast_fp16)[name = string("x_59_cast_fp16")]; int32 var_2759 = const()[name = string("op_2759"), val = int32(-1)]; fp16 const_52_promoted_to_fp16 = const()[name = string("const_52_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2765_cast_fp16 = mul(x = x_59_cast_fp16, y = const_52_promoted_to_fp16)[name = string("op_2765_cast_fp16")]; bool input_73_interleave_0 = const()[name = string("input_73_interleave_0"), val = bool(false)]; tensor input_73_cast_fp16 = concat(axis = var_2759, interleave = input_73_interleave_0, values = (x_59_cast_fp16, var_2765_cast_fp16))[name = string("input_73_cast_fp16")]; tensor normed_103_axes_0 = const()[name = string("normed_103_axes_0"), val = tensor([-1])]; fp16 var_2757_to_fp16 = const()[name = string("op_2757_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_103_cast_fp16 = layer_norm(axes = normed_103_axes_0, epsilon = var_2757_to_fp16, x = input_73_cast_fp16)[name = string("normed_103_cast_fp16")]; tensor var_2770_split_sizes_0 = const()[name = string("op_2770_split_sizes_0"), val = tensor([640, 640])]; int32 var_2770_axis_0 = const()[name = string("op_2770_axis_0"), val = int32(-1)]; tensor var_2770_cast_fp16_0, tensor var_2770_cast_fp16_1 = split(axis = var_2770_axis_0, split_sizes = var_2770_split_sizes_0, x = normed_103_cast_fp16)[name = string("op_2770_cast_fp16")]; tensor var_2774_to_fp16 = const()[name = string("op_2774_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263887936)))]; tensor out_45_cast_fp16 = mul(x = var_2770_cast_fp16_0, y = var_2774_to_fp16)[name = string("out_45_cast_fp16")]; tensor var_2788 = const()[name = string("op_2788"), val = tensor([0, 2, 1])]; tensor input_75_axes_0 = const()[name = string("input_75_axes_0"), val = tensor([2])]; tensor var_2789 = transpose(perm = var_2788, x = out_45_cast_fp16)[name = string("transpose_115")]; tensor input_75 = expand_dims(axes = input_75_axes_0, x = var_2789)[name = string("input_75")]; string gate_13_pad_type_0 = const()[name = string("gate_13_pad_type_0"), val = string("valid")]; tensor gate_13_strides_0 = const()[name = string("gate_13_strides_0"), val = tensor([1, 1])]; tensor gate_13_pad_0 = const()[name = string("gate_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_13_dilations_0 = const()[name = string("gate_13_dilations_0"), val = tensor([1, 1])]; int32 gate_13_groups_0 = const()[name = string("gate_13_groups_0"), val = int32(1)]; tensor gate_13 = conv(dilations = gate_13_dilations_0, groups = gate_13_groups_0, pad = gate_13_pad_0, pad_type = gate_13_pad_type_0, strides = gate_13_strides_0, weight = layers_3_mlp_gate_proj_weight_quantized, x = input_75)[name = string("gate_13")]; string up_7_pad_type_0 = const()[name = string("up_7_pad_type_0"), val = string("valid")]; tensor up_7_strides_0 = const()[name = string("up_7_strides_0"), val = tensor([1, 1])]; tensor up_7_pad_0 = const()[name = string("up_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_7_dilations_0 = const()[name = string("up_7_dilations_0"), val = tensor([1, 1])]; int32 up_7_groups_0 = const()[name = string("up_7_groups_0"), val = int32(1)]; tensor up_7 = conv(dilations = up_7_dilations_0, groups = up_7_groups_0, pad = up_7_pad_0, pad_type = up_7_pad_type_0, strides = up_7_strides_0, weight = layers_3_mlp_up_proj_weight_quantized, x = input_75)[name = string("up_7")]; string gate_15_mode_0 = const()[name = string("gate_15_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_15 = gelu(mode = gate_15_mode_0, x = gate_13)[name = string("gate_15")]; tensor input_77 = mul(x = gate_15, y = up_7)[name = string("input_77")]; string var_2827_pad_type_0 = const()[name = string("op_2827_pad_type_0"), val = string("valid")]; tensor var_2827_strides_0 = const()[name = string("op_2827_strides_0"), val = tensor([1, 1])]; tensor var_2827_pad_0 = const()[name = string("op_2827_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2827_dilations_0 = const()[name = string("op_2827_dilations_0"), val = tensor([1, 1])]; int32 var_2827_groups_0 = const()[name = string("op_2827_groups_0"), val = int32(1)]; tensor var_2827 = conv(dilations = var_2827_dilations_0, groups = var_2827_groups_0, pad = var_2827_pad_0, pad_type = var_2827_pad_type_0, strides = var_2827_strides_0, weight = layers_3_mlp_down_proj_weight_quantized, x = input_77)[name = string("op_2827")]; tensor var_2829_axes_0 = const()[name = string("op_2829_axes_0"), val = tensor([2])]; tensor var_2829 = squeeze(axes = var_2829_axes_0, x = var_2827)[name = string("op_2829")]; tensor var_2833 = const()[name = string("op_2833"), val = tensor([0, 2, 1])]; int32 var_2840 = const()[name = string("op_2840"), val = int32(-1)]; fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_63 = transpose(perm = var_2833, x = var_2829)[name = string("transpose_114")]; tensor var_2846_cast_fp16 = mul(x = x_63, y = const_54_promoted_to_fp16)[name = string("op_2846_cast_fp16")]; bool input_79_interleave_0 = const()[name = string("input_79_interleave_0"), val = bool(false)]; tensor input_79_cast_fp16 = concat(axis = var_2840, interleave = input_79_interleave_0, values = (x_63, var_2846_cast_fp16))[name = string("input_79_cast_fp16")]; tensor normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor([-1])]; fp16 var_2838_to_fp16 = const()[name = string("op_2838_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_2838_to_fp16, x = input_79_cast_fp16)[name = string("normed_109_cast_fp16")]; tensor var_2851_split_sizes_0 = const()[name = string("op_2851_split_sizes_0"), val = tensor([640, 640])]; int32 var_2851_axis_0 = const()[name = string("op_2851_axis_0"), val = int32(-1)]; tensor var_2851_cast_fp16_0, tensor var_2851_cast_fp16_1 = split(axis = var_2851_axis_0, split_sizes = var_2851_split_sizes_0, x = normed_109_cast_fp16)[name = string("op_2851_cast_fp16")]; tensor var_2855_to_fp16 = const()[name = string("op_2855_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263889280)))]; tensor out_47_cast_fp16 = mul(x = var_2851_cast_fp16_0, y = var_2855_to_fp16)[name = string("out_47_cast_fp16")]; tensor x_65_cast_fp16 = add(x = x_59_cast_fp16, y = out_47_cast_fp16)[name = string("x_65_cast_fp16")]; int32 var_2869 = const()[name = string("op_2869"), val = int32(-1)]; fp16 const_56_promoted_to_fp16 = const()[name = string("const_56_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2875_cast_fp16 = mul(x = x_65_cast_fp16, y = const_56_promoted_to_fp16)[name = string("op_2875_cast_fp16")]; bool input_81_interleave_0 = const()[name = string("input_81_interleave_0"), val = bool(false)]; tensor input_81_cast_fp16 = concat(axis = var_2869, interleave = input_81_interleave_0, values = (x_65_cast_fp16, var_2875_cast_fp16))[name = string("input_81_cast_fp16")]; tensor normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor([-1])]; fp16 var_2867_to_fp16 = const()[name = string("op_2867_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_2867_to_fp16, x = input_81_cast_fp16)[name = string("normed_113_cast_fp16")]; tensor var_2880_split_sizes_0 = const()[name = string("op_2880_split_sizes_0"), val = tensor([640, 640])]; int32 var_2880_axis_0 = const()[name = string("op_2880_axis_0"), val = int32(-1)]; tensor var_2880_cast_fp16_0, tensor var_2880_cast_fp16_1 = split(axis = var_2880_axis_0, split_sizes = var_2880_split_sizes_0, x = normed_113_cast_fp16)[name = string("op_2880_cast_fp16")]; tensor var_2884_to_fp16 = const()[name = string("op_2884_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263890624)))]; tensor out_49_cast_fp16 = mul(x = var_2880_cast_fp16_0, y = var_2884_to_fp16)[name = string("out_49_cast_fp16")]; tensor var_2898 = const()[name = string("op_2898"), val = tensor([0, 2, 1])]; tensor input_83_axes_0 = const()[name = string("input_83_axes_0"), val = tensor([2])]; tensor var_2899 = transpose(perm = var_2898, x = out_49_cast_fp16)[name = string("transpose_113")]; tensor input_83 = expand_dims(axes = input_83_axes_0, x = var_2899)[name = string("input_83")]; string var_2912_pad_type_0 = const()[name = string("op_2912_pad_type_0"), val = string("valid")]; tensor var_2912_strides_0 = const()[name = string("op_2912_strides_0"), val = tensor([1, 1])]; tensor var_2912_pad_0 = const()[name = string("op_2912_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2912_dilations_0 = const()[name = string("op_2912_dilations_0"), val = tensor([1, 1])]; int32 var_2912_groups_0 = const()[name = string("op_2912_groups_0"), val = int32(1)]; tensor var_2912 = conv(dilations = var_2912_dilations_0, groups = var_2912_groups_0, pad = var_2912_pad_0, pad_type = var_2912_pad_type_0, strides = var_2912_strides_0, weight = layers_4_self_attn_q_proj_weight_quantized, x = input_83)[name = string("op_2912")]; tensor var_2917 = const()[name = string("op_2917"), val = tensor([1, 4, 256, 32])]; tensor var_2918 = reshape(shape = var_2917, x = var_2912)[name = string("op_2918")]; tensor var_2923 = const()[name = string("op_2923"), val = tensor([0, 1, 3, 2])]; int32 var_2936 = const()[name = string("op_2936"), val = int32(-1)]; fp16 const_58_promoted_to_fp16 = const()[name = string("const_58_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_25 = transpose(perm = var_2923, x = var_2918)[name = string("transpose_112")]; tensor var_2942_cast_fp16 = mul(x = q_25, y = const_58_promoted_to_fp16)[name = string("op_2942_cast_fp16")]; bool input_85_interleave_0 = const()[name = string("input_85_interleave_0"), val = bool(false)]; tensor input_85_cast_fp16 = concat(axis = var_2936, interleave = input_85_interleave_0, values = (q_25, var_2942_cast_fp16))[name = string("input_85_cast_fp16")]; tensor normed_119_axes_0 = const()[name = string("normed_119_axes_0"), val = tensor([-1])]; fp16 var_2934_to_fp16 = const()[name = string("op_2934_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_119_cast_fp16 = layer_norm(axes = normed_119_axes_0, epsilon = var_2934_to_fp16, x = input_85_cast_fp16)[name = string("normed_119_cast_fp16")]; tensor var_2947_split_sizes_0 = const()[name = string("op_2947_split_sizes_0"), val = tensor([256, 256])]; int32 var_2947_axis_0 = const()[name = string("op_2947_axis_0"), val = int32(-1)]; tensor var_2947_cast_fp16_0, tensor var_2947_cast_fp16_1 = split(axis = var_2947_axis_0, split_sizes = var_2947_split_sizes_0, x = normed_119_cast_fp16)[name = string("op_2947_cast_fp16")]; tensor var_2951_to_fp16 = const()[name = string("op_2951_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263891968)))]; tensor out_51_cast_fp16 = mul(x = var_2947_cast_fp16_0, y = var_2951_to_fp16)[name = string("out_51_cast_fp16")]; string var_2964_pad_type_0 = const()[name = string("op_2964_pad_type_0"), val = string("valid")]; tensor var_2964_strides_0 = const()[name = string("op_2964_strides_0"), val = tensor([1, 1])]; tensor var_2964_pad_0 = const()[name = string("op_2964_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2964_dilations_0 = const()[name = string("op_2964_dilations_0"), val = tensor([1, 1])]; int32 var_2964_groups_0 = const()[name = string("op_2964_groups_0"), val = int32(1)]; tensor var_2964 = conv(dilations = var_2964_dilations_0, groups = var_2964_groups_0, pad = var_2964_pad_0, pad_type = var_2964_pad_type_0, strides = var_2964_strides_0, weight = layers_4_self_attn_k_proj_weight_quantized, x = input_83)[name = string("op_2964")]; tensor var_2969 = const()[name = string("op_2969"), val = tensor([1, 1, 256, 32])]; tensor var_2970 = reshape(shape = var_2969, x = var_2964)[name = string("op_2970")]; tensor var_2975 = const()[name = string("op_2975"), val = tensor([0, 1, 3, 2])]; int32 var_2988 = const()[name = string("op_2988"), val = int32(-1)]; fp16 const_60_promoted_to_fp16 = const()[name = string("const_60_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_25 = transpose(perm = var_2975, x = var_2970)[name = string("transpose_111")]; tensor var_2994_cast_fp16 = mul(x = k_25, y = const_60_promoted_to_fp16)[name = string("op_2994_cast_fp16")]; bool input_87_interleave_0 = const()[name = string("input_87_interleave_0"), val = bool(false)]; tensor input_87_cast_fp16 = concat(axis = var_2988, interleave = input_87_interleave_0, values = (k_25, var_2994_cast_fp16))[name = string("input_87_cast_fp16")]; tensor normed_123_axes_0 = const()[name = string("normed_123_axes_0"), val = tensor([-1])]; fp16 var_2986_to_fp16 = const()[name = string("op_2986_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_123_cast_fp16 = layer_norm(axes = normed_123_axes_0, epsilon = var_2986_to_fp16, x = input_87_cast_fp16)[name = string("normed_123_cast_fp16")]; tensor var_2999_split_sizes_0 = const()[name = string("op_2999_split_sizes_0"), val = tensor([256, 256])]; int32 var_2999_axis_0 = const()[name = string("op_2999_axis_0"), val = int32(-1)]; tensor var_2999_cast_fp16_0, tensor var_2999_cast_fp16_1 = split(axis = var_2999_axis_0, split_sizes = var_2999_split_sizes_0, x = normed_123_cast_fp16)[name = string("op_2999_cast_fp16")]; tensor var_3003_to_fp16 = const()[name = string("op_3003_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263892544)))]; tensor out_53_cast_fp16 = mul(x = var_2999_cast_fp16_0, y = var_3003_to_fp16)[name = string("out_53_cast_fp16")]; string var_3016_pad_type_0 = const()[name = string("op_3016_pad_type_0"), val = string("valid")]; tensor var_3016_strides_0 = const()[name = string("op_3016_strides_0"), val = tensor([1, 1])]; tensor var_3016_pad_0 = const()[name = string("op_3016_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3016_dilations_0 = const()[name = string("op_3016_dilations_0"), val = tensor([1, 1])]; int32 var_3016_groups_0 = const()[name = string("op_3016_groups_0"), val = int32(1)]; tensor var_3016 = conv(dilations = var_3016_dilations_0, groups = var_3016_groups_0, pad = var_3016_pad_0, pad_type = var_3016_pad_type_0, strides = var_3016_strides_0, weight = layers_4_self_attn_v_proj_weight_quantized, x = input_83)[name = string("op_3016")]; tensor var_3021 = const()[name = string("op_3021"), val = tensor([1, 1, 256, 32])]; tensor var_3022 = reshape(shape = var_3021, x = var_3016)[name = string("op_3022")]; tensor var_3029 = mul(x = out_51_cast_fp16, y = cos_1)[name = string("op_3029")]; tensor var_3030_split_sizes_0 = const()[name = string("op_3030_split_sizes_0"), val = tensor([128, 128])]; int32 var_3030_axis_0 = const()[name = string("op_3030_axis_0"), val = int32(-1)]; tensor var_3030_0, tensor var_3030_1 = split(axis = var_3030_axis_0, split_sizes = var_3030_split_sizes_0, x = out_51_cast_fp16)[name = string("op_3030")]; fp16 const_62_promoted = const()[name = string("const_62_promoted"), val = fp16(-0x1p+0)]; tensor var_3032 = mul(x = var_3030_1, y = const_62_promoted)[name = string("op_3032")]; int32 var_3034 = const()[name = string("op_3034"), val = int32(-1)]; bool var_3035_interleave_0 = const()[name = string("op_3035_interleave_0"), val = bool(false)]; tensor var_3035 = concat(axis = var_3034, interleave = var_3035_interleave_0, values = (var_3032, var_3030_0))[name = string("op_3035")]; tensor var_3036 = mul(x = var_3035, y = sin_1)[name = string("op_3036")]; tensor q_29 = add(x = var_3029, y = var_3036)[name = string("q_29")]; tensor var_3039 = mul(x = out_53_cast_fp16, y = cos_1)[name = string("op_3039")]; tensor var_3040_split_sizes_0 = const()[name = string("op_3040_split_sizes_0"), val = tensor([128, 128])]; int32 var_3040_axis_0 = const()[name = string("op_3040_axis_0"), val = int32(-1)]; tensor var_3040_0, tensor var_3040_1 = split(axis = var_3040_axis_0, split_sizes = var_3040_split_sizes_0, x = out_53_cast_fp16)[name = string("op_3040")]; fp16 const_63_promoted = const()[name = string("const_63_promoted"), val = fp16(-0x1p+0)]; tensor var_3042 = mul(x = var_3040_1, y = const_63_promoted)[name = string("op_3042")]; int32 var_3044 = const()[name = string("op_3044"), val = int32(-1)]; bool var_3045_interleave_0 = const()[name = string("op_3045_interleave_0"), val = bool(false)]; tensor var_3045 = concat(axis = var_3044, interleave = var_3045_interleave_0, values = (var_3042, var_3040_0))[name = string("op_3045")]; tensor var_3046 = mul(x = var_3045, y = sin_1)[name = string("op_3046")]; tensor k_29 = add(x = var_3039, y = var_3046)[name = string("k_29")]; tensor var_3051_begin_0 = const()[name = string("op_3051_begin_0"), val = tensor([4, 0, 0, 0])]; tensor var_3051_end_0 = const()[name = string("op_3051_end_0"), val = tensor([5, 1, 2048, 256])]; tensor var_3051_end_mask_0 = const()[name = string("op_3051_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3051_squeeze_mask_0 = const()[name = string("op_3051_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_3051_cast_fp16 = slice_by_index(begin = var_3051_begin_0, end = var_3051_end_0, end_mask = var_3051_end_mask_0, squeeze_mask = var_3051_squeeze_mask_0, x = coreml_update_state_43)[name = string("op_3051_cast_fp16")]; tensor K_cache_9_axes_0 = const()[name = string("K_cache_9_axes_0"), val = tensor([0])]; tensor K_cache_9_cast_fp16 = expand_dims(axes = K_cache_9_axes_0, x = var_3051_cast_fp16)[name = string("K_cache_9_cast_fp16")]; tensor var_3056_begin_0 = const()[name = string("op_3056_begin_0"), val = tensor([22, 0, 0, 0])]; tensor var_3056_end_0 = const()[name = string("op_3056_end_0"), val = tensor([23, 1, 2048, 256])]; tensor var_3056_end_mask_0 = const()[name = string("op_3056_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3056_squeeze_mask_0 = const()[name = string("op_3056_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_3056_cast_fp16 = slice_by_index(begin = var_3056_begin_0, end = var_3056_end_0, end_mask = var_3056_end_mask_0, squeeze_mask = var_3056_squeeze_mask_0, x = coreml_update_state_43)[name = string("op_3056_cast_fp16")]; tensor V_cache_9_axes_0 = const()[name = string("V_cache_9_axes_0"), val = tensor([0])]; tensor V_cache_9_cast_fp16 = expand_dims(axes = V_cache_9_axes_0, x = var_3056_cast_fp16)[name = string("V_cache_9_cast_fp16")]; bool k_increment_9_transpose_x_0 = const()[name = string("k_increment_9_transpose_x_0"), val = bool(false)]; bool k_increment_9_transpose_y_0 = const()[name = string("k_increment_9_transpose_y_0"), val = bool(false)]; tensor k_increment_9 = matmul(transpose_x = k_increment_9_transpose_x_0, transpose_y = k_increment_9_transpose_y_0, x = update_mask, y = k_29)[name = string("k_increment_9")]; bool v_increment_9_transpose_x_1 = const()[name = string("v_increment_9_transpose_x_1"), val = bool(false)]; bool v_increment_9_transpose_y_1 = const()[name = string("v_increment_9_transpose_y_1"), val = bool(true)]; tensor v_increment_9 = matmul(transpose_x = v_increment_9_transpose_x_1, transpose_y = v_increment_9_transpose_y_1, x = update_mask, y = var_3022)[name = string("v_increment_9")]; tensor var_3074_cast_fp16 = mul(x = K_cache_9_cast_fp16, y = var_1125_cast_fp16)[name = string("op_3074_cast_fp16")]; tensor K_new_9_cast_fp16 = add(x = var_3074_cast_fp16, y = k_increment_9)[name = string("K_new_9_cast_fp16")]; tensor var_3080_cast_fp16 = mul(x = V_cache_9_cast_fp16, y = var_1125_cast_fp16)[name = string("op_3080_cast_fp16")]; tensor V_new_9_cast_fp16 = add(x = var_3080_cast_fp16, y = v_increment_9)[name = string("V_new_9_cast_fp16")]; tensor var_3084_axes_0 = const()[name = string("op_3084_axes_0"), val = tensor([0])]; tensor var_3084_cast_fp16 = squeeze(axes = var_3084_axes_0, x = K_new_9_cast_fp16)[name = string("op_3084_cast_fp16")]; tensor concat_16 = const()[name = string("concat_16"), val = tensor([4, 0, 0, 0])]; tensor concat_17 = const()[name = string("concat_17"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_16, begin_mask = kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_17, end_mask = kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_9_stride_0, update = var_3084_cast_fp16, x = coreml_update_state_43)[name = string("kv_cache_0_internal_tensor_assign_9_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_9_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_44_write_state")]; tensor coreml_update_state_44 = read_state(input = kv_cache_0)[name = string("coreml_update_state_44")]; tensor var_3091_axes_0 = const()[name = string("op_3091_axes_0"), val = tensor([0])]; tensor var_3091_cast_fp16 = squeeze(axes = var_3091_axes_0, x = V_new_9_cast_fp16)[name = string("op_3091_cast_fp16")]; tensor concat_18 = const()[name = string("concat_18"), val = tensor([22, 0, 0, 0])]; tensor concat_19 = const()[name = string("concat_19"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_18, begin_mask = kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_19, end_mask = kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_10_stride_0, update = var_3091_cast_fp16, x = coreml_update_state_44)[name = string("kv_cache_0_internal_tensor_assign_10_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_10_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_45_write_state")]; tensor coreml_update_state_45 = read_state(input = kv_cache_0)[name = string("coreml_update_state_45")]; tensor hidden_states_33_axes_0 = const()[name = string("hidden_states_33_axes_0"), val = tensor([2])]; tensor hidden_states_33_cast_fp16 = expand_dims(axes = hidden_states_33_axes_0, x = K_new_9_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; tensor var_3104 = const()[name = string("op_3104"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_35_cast_fp16 = tile(reps = var_3104, x = hidden_states_33_cast_fp16)[name = string("hidden_states_35_cast_fp16")]; tensor var_3110 = const()[name = string("op_3110"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_9_cast_fp16 = reshape(shape = var_3110, x = hidden_states_35_cast_fp16)[name = string("K_expanded_9_cast_fp16")]; tensor hidden_states_37_axes_0 = const()[name = string("hidden_states_37_axes_0"), val = tensor([2])]; tensor hidden_states_37_cast_fp16 = expand_dims(axes = hidden_states_37_axes_0, x = V_new_9_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; tensor var_3119 = const()[name = string("op_3119"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_39_cast_fp16 = tile(reps = var_3119, x = hidden_states_37_cast_fp16)[name = string("hidden_states_39_cast_fp16")]; tensor var_3125 = const()[name = string("op_3125"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_9_cast_fp16 = reshape(shape = var_3125, x = hidden_states_39_cast_fp16)[name = string("V_expanded_9_cast_fp16")]; bool var_3140_transpose_x_1 = const()[name = string("op_3140_transpose_x_1"), val = bool(false)]; bool var_3140_transpose_y_1 = const()[name = string("op_3140_transpose_y_1"), val = bool(true)]; tensor var_3140_cast_fp16 = matmul(transpose_x = var_3140_transpose_x_1, transpose_y = var_3140_transpose_y_1, x = q_29, y = K_expanded_9_cast_fp16)[name = string("op_3140_cast_fp16")]; fp16 var_3141_to_fp16 = const()[name = string("op_3141_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_25_cast_fp16 = mul(x = var_3140_cast_fp16, y = var_3141_to_fp16)[name = string("attn_weights_25_cast_fp16")]; tensor attn_weights_27_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask)[name = string("attn_weights_27_cast_fp16")]; int32 var_3150 = const()[name = string("op_3150"), val = int32(-1)]; tensor var_3152_cast_fp16 = softmax(axis = var_3150, x = attn_weights_27_cast_fp16)[name = string("op_3152_cast_fp16")]; bool var_3168_transpose_x_0 = const()[name = string("op_3168_transpose_x_0"), val = bool(false)]; bool var_3168_transpose_y_0 = const()[name = string("op_3168_transpose_y_0"), val = bool(false)]; tensor var_3168_cast_fp16 = matmul(transpose_x = var_3168_transpose_x_0, transpose_y = var_3168_transpose_y_0, x = var_3152_cast_fp16, y = V_expanded_9_cast_fp16)[name = string("op_3168_cast_fp16")]; tensor var_3178 = const()[name = string("op_3178"), val = tensor([0, 2, 1, 3])]; tensor var_3185 = const()[name = string("op_3185"), val = tensor([1, 32, 1024])]; tensor var_3179 = transpose(perm = var_3178, x = var_3168_cast_fp16)[name = string("transpose_110")]; tensor attn_output_27 = reshape(shape = var_3185, x = var_3179)[name = string("attn_output_27")]; tensor var_3190 = const()[name = string("op_3190"), val = tensor([0, 2, 1])]; tensor squeeze_4_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263893120))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264548544))))[name = string("squeeze_4_quantized")]; string var_3206_pad_type_0 = const()[name = string("op_3206_pad_type_0"), val = string("valid")]; int32 var_3206_groups_0 = const()[name = string("op_3206_groups_0"), val = int32(1)]; tensor var_3206_strides_0 = const()[name = string("op_3206_strides_0"), val = tensor([1])]; tensor var_3206_pad_0 = const()[name = string("op_3206_pad_0"), val = tensor([0, 0])]; tensor var_3206_dilations_0 = const()[name = string("op_3206_dilations_0"), val = tensor([1])]; tensor var_3191 = transpose(perm = var_3190, x = attn_output_27)[name = string("transpose_109")]; tensor var_3206 = conv(dilations = var_3206_dilations_0, groups = var_3206_groups_0, pad = var_3206_pad_0, pad_type = var_3206_pad_type_0, strides = var_3206_strides_0, weight = squeeze_4_quantized, x = var_3191)[name = string("op_3206")]; tensor var_3210 = const()[name = string("op_3210"), val = tensor([0, 2, 1])]; int32 var_3217 = const()[name = string("op_3217"), val = int32(-1)]; fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_73 = transpose(perm = var_3210, x = var_3206)[name = string("transpose_108")]; tensor var_3223_cast_fp16 = mul(x = x_73, y = const_64_promoted_to_fp16)[name = string("op_3223_cast_fp16")]; bool input_91_interleave_0 = const()[name = string("input_91_interleave_0"), val = bool(false)]; tensor input_91_cast_fp16 = concat(axis = var_3217, interleave = input_91_interleave_0, values = (x_73, var_3223_cast_fp16))[name = string("input_91_cast_fp16")]; tensor normed_127_axes_0 = const()[name = string("normed_127_axes_0"), val = tensor([-1])]; fp16 var_3215_to_fp16 = const()[name = string("op_3215_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_127_cast_fp16 = layer_norm(axes = normed_127_axes_0, epsilon = var_3215_to_fp16, x = input_91_cast_fp16)[name = string("normed_127_cast_fp16")]; tensor var_3228_split_sizes_0 = const()[name = string("op_3228_split_sizes_0"), val = tensor([640, 640])]; int32 var_3228_axis_0 = const()[name = string("op_3228_axis_0"), val = int32(-1)]; tensor var_3228_cast_fp16_0, tensor var_3228_cast_fp16_1 = split(axis = var_3228_axis_0, split_sizes = var_3228_split_sizes_0, x = normed_127_cast_fp16)[name = string("op_3228_cast_fp16")]; tensor var_3232_to_fp16 = const()[name = string("op_3232_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264549888)))]; tensor out_55_cast_fp16 = mul(x = var_3228_cast_fp16_0, y = var_3232_to_fp16)[name = string("out_55_cast_fp16")]; tensor x_75_cast_fp16 = add(x = x_65_cast_fp16, y = out_55_cast_fp16)[name = string("x_75_cast_fp16")]; int32 var_3246 = const()[name = string("op_3246"), val = int32(-1)]; fp16 const_66_promoted_to_fp16 = const()[name = string("const_66_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3252_cast_fp16 = mul(x = x_75_cast_fp16, y = const_66_promoted_to_fp16)[name = string("op_3252_cast_fp16")]; bool input_93_interleave_0 = const()[name = string("input_93_interleave_0"), val = bool(false)]; tensor input_93_cast_fp16 = concat(axis = var_3246, interleave = input_93_interleave_0, values = (x_75_cast_fp16, var_3252_cast_fp16))[name = string("input_93_cast_fp16")]; tensor normed_131_axes_0 = const()[name = string("normed_131_axes_0"), val = tensor([-1])]; fp16 var_3244_to_fp16 = const()[name = string("op_3244_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_131_cast_fp16 = layer_norm(axes = normed_131_axes_0, epsilon = var_3244_to_fp16, x = input_93_cast_fp16)[name = string("normed_131_cast_fp16")]; tensor var_3257_split_sizes_0 = const()[name = string("op_3257_split_sizes_0"), val = tensor([640, 640])]; int32 var_3257_axis_0 = const()[name = string("op_3257_axis_0"), val = int32(-1)]; tensor var_3257_cast_fp16_0, tensor var_3257_cast_fp16_1 = split(axis = var_3257_axis_0, split_sizes = var_3257_split_sizes_0, x = normed_131_cast_fp16)[name = string("op_3257_cast_fp16")]; tensor var_3261_to_fp16 = const()[name = string("op_3261_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264551232)))]; tensor out_57_cast_fp16 = mul(x = var_3257_cast_fp16_0, y = var_3261_to_fp16)[name = string("out_57_cast_fp16")]; tensor var_3275 = const()[name = string("op_3275"), val = tensor([0, 2, 1])]; tensor input_95_axes_0 = const()[name = string("input_95_axes_0"), val = tensor([2])]; tensor var_3276 = transpose(perm = var_3275, x = out_57_cast_fp16)[name = string("transpose_107")]; tensor input_95 = expand_dims(axes = input_95_axes_0, x = var_3276)[name = string("input_95")]; string gate_17_pad_type_0 = const()[name = string("gate_17_pad_type_0"), val = string("valid")]; tensor gate_17_strides_0 = const()[name = string("gate_17_strides_0"), val = tensor([1, 1])]; tensor gate_17_pad_0 = const()[name = string("gate_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_17_dilations_0 = const()[name = string("gate_17_dilations_0"), val = tensor([1, 1])]; int32 gate_17_groups_0 = const()[name = string("gate_17_groups_0"), val = int32(1)]; tensor gate_17 = conv(dilations = gate_17_dilations_0, groups = gate_17_groups_0, pad = gate_17_pad_0, pad_type = gate_17_pad_type_0, strides = gate_17_strides_0, weight = layers_4_mlp_gate_proj_weight_quantized, x = input_95)[name = string("gate_17")]; string up_9_pad_type_0 = const()[name = string("up_9_pad_type_0"), val = string("valid")]; tensor up_9_strides_0 = const()[name = string("up_9_strides_0"), val = tensor([1, 1])]; tensor up_9_pad_0 = const()[name = string("up_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_9_dilations_0 = const()[name = string("up_9_dilations_0"), val = tensor([1, 1])]; int32 up_9_groups_0 = const()[name = string("up_9_groups_0"), val = int32(1)]; tensor up_9 = conv(dilations = up_9_dilations_0, groups = up_9_groups_0, pad = up_9_pad_0, pad_type = up_9_pad_type_0, strides = up_9_strides_0, weight = layers_4_mlp_up_proj_weight_quantized, x = input_95)[name = string("up_9")]; string gate_19_mode_0 = const()[name = string("gate_19_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_19 = gelu(mode = gate_19_mode_0, x = gate_17)[name = string("gate_19")]; tensor input_97 = mul(x = gate_19, y = up_9)[name = string("input_97")]; string var_3314_pad_type_0 = const()[name = string("op_3314_pad_type_0"), val = string("valid")]; tensor var_3314_strides_0 = const()[name = string("op_3314_strides_0"), val = tensor([1, 1])]; tensor var_3314_pad_0 = const()[name = string("op_3314_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3314_dilations_0 = const()[name = string("op_3314_dilations_0"), val = tensor([1, 1])]; int32 var_3314_groups_0 = const()[name = string("op_3314_groups_0"), val = int32(1)]; tensor var_3314 = conv(dilations = var_3314_dilations_0, groups = var_3314_groups_0, pad = var_3314_pad_0, pad_type = var_3314_pad_type_0, strides = var_3314_strides_0, weight = layers_4_mlp_down_proj_weight_quantized, x = input_97)[name = string("op_3314")]; tensor var_3316_axes_0 = const()[name = string("op_3316_axes_0"), val = tensor([2])]; tensor var_3316 = squeeze(axes = var_3316_axes_0, x = var_3314)[name = string("op_3316")]; tensor var_3320 = const()[name = string("op_3320"), val = tensor([0, 2, 1])]; int32 var_3327 = const()[name = string("op_3327"), val = int32(-1)]; fp16 const_68_promoted_to_fp16 = const()[name = string("const_68_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_79 = transpose(perm = var_3320, x = var_3316)[name = string("transpose_106")]; tensor var_3333_cast_fp16 = mul(x = x_79, y = const_68_promoted_to_fp16)[name = string("op_3333_cast_fp16")]; bool input_99_interleave_0 = const()[name = string("input_99_interleave_0"), val = bool(false)]; tensor input_99_cast_fp16 = concat(axis = var_3327, interleave = input_99_interleave_0, values = (x_79, var_3333_cast_fp16))[name = string("input_99_cast_fp16")]; tensor normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor([-1])]; fp16 var_3325_to_fp16 = const()[name = string("op_3325_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_3325_to_fp16, x = input_99_cast_fp16)[name = string("normed_137_cast_fp16")]; tensor var_3338_split_sizes_0 = const()[name = string("op_3338_split_sizes_0"), val = tensor([640, 640])]; int32 var_3338_axis_0 = const()[name = string("op_3338_axis_0"), val = int32(-1)]; tensor var_3338_cast_fp16_0, tensor var_3338_cast_fp16_1 = split(axis = var_3338_axis_0, split_sizes = var_3338_split_sizes_0, x = normed_137_cast_fp16)[name = string("op_3338_cast_fp16")]; tensor var_3342_to_fp16 = const()[name = string("op_3342_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264552576)))]; tensor out_59_cast_fp16 = mul(x = var_3338_cast_fp16_0, y = var_3342_to_fp16)[name = string("out_59_cast_fp16")]; tensor x_81_cast_fp16 = add(x = x_75_cast_fp16, y = out_59_cast_fp16)[name = string("x_81_cast_fp16")]; int32 var_3356 = const()[name = string("op_3356"), val = int32(-1)]; fp16 const_70_promoted_to_fp16 = const()[name = string("const_70_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3362_cast_fp16 = mul(x = x_81_cast_fp16, y = const_70_promoted_to_fp16)[name = string("op_3362_cast_fp16")]; bool input_101_interleave_0 = const()[name = string("input_101_interleave_0"), val = bool(false)]; tensor input_101_cast_fp16 = concat(axis = var_3356, interleave = input_101_interleave_0, values = (x_81_cast_fp16, var_3362_cast_fp16))[name = string("input_101_cast_fp16")]; tensor normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor([-1])]; fp16 var_3354_to_fp16 = const()[name = string("op_3354_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_3354_to_fp16, x = input_101_cast_fp16)[name = string("normed_141_cast_fp16")]; tensor var_3367_split_sizes_0 = const()[name = string("op_3367_split_sizes_0"), val = tensor([640, 640])]; int32 var_3367_axis_0 = const()[name = string("op_3367_axis_0"), val = int32(-1)]; tensor var_3367_cast_fp16_0, tensor var_3367_cast_fp16_1 = split(axis = var_3367_axis_0, split_sizes = var_3367_split_sizes_0, x = normed_141_cast_fp16)[name = string("op_3367_cast_fp16")]; tensor var_3371_to_fp16 = const()[name = string("op_3371_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264553920)))]; tensor out_61_cast_fp16 = mul(x = var_3367_cast_fp16_0, y = var_3371_to_fp16)[name = string("out_61_cast_fp16")]; tensor var_3385 = const()[name = string("op_3385"), val = tensor([0, 2, 1])]; tensor input_103_axes_0 = const()[name = string("input_103_axes_0"), val = tensor([2])]; tensor var_3386 = transpose(perm = var_3385, x = out_61_cast_fp16)[name = string("transpose_105")]; tensor input_103 = expand_dims(axes = input_103_axes_0, x = var_3386)[name = string("input_103")]; string var_3399_pad_type_0 = const()[name = string("op_3399_pad_type_0"), val = string("valid")]; tensor var_3399_strides_0 = const()[name = string("op_3399_strides_0"), val = tensor([1, 1])]; tensor var_3399_pad_0 = const()[name = string("op_3399_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3399_dilations_0 = const()[name = string("op_3399_dilations_0"), val = tensor([1, 1])]; int32 var_3399_groups_0 = const()[name = string("op_3399_groups_0"), val = int32(1)]; tensor var_3399 = conv(dilations = var_3399_dilations_0, groups = var_3399_groups_0, pad = var_3399_pad_0, pad_type = var_3399_pad_type_0, strides = var_3399_strides_0, weight = layers_5_self_attn_q_proj_weight_quantized, x = input_103)[name = string("op_3399")]; tensor var_3404 = const()[name = string("op_3404"), val = tensor([1, 4, 256, 32])]; tensor var_3405 = reshape(shape = var_3404, x = var_3399)[name = string("op_3405")]; tensor var_3410 = const()[name = string("op_3410"), val = tensor([0, 1, 3, 2])]; int32 var_3423 = const()[name = string("op_3423"), val = int32(-1)]; fp16 const_72_promoted_to_fp16 = const()[name = string("const_72_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_31 = transpose(perm = var_3410, x = var_3405)[name = string("transpose_104")]; tensor var_3429_cast_fp16 = mul(x = q_31, y = const_72_promoted_to_fp16)[name = string("op_3429_cast_fp16")]; bool input_105_interleave_0 = const()[name = string("input_105_interleave_0"), val = bool(false)]; tensor input_105_cast_fp16 = concat(axis = var_3423, interleave = input_105_interleave_0, values = (q_31, var_3429_cast_fp16))[name = string("input_105_cast_fp16")]; tensor normed_147_axes_0 = const()[name = string("normed_147_axes_0"), val = tensor([-1])]; fp16 var_3421_to_fp16 = const()[name = string("op_3421_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_147_cast_fp16 = layer_norm(axes = normed_147_axes_0, epsilon = var_3421_to_fp16, x = input_105_cast_fp16)[name = string("normed_147_cast_fp16")]; tensor var_3434_split_sizes_0 = const()[name = string("op_3434_split_sizes_0"), val = tensor([256, 256])]; int32 var_3434_axis_0 = const()[name = string("op_3434_axis_0"), val = int32(-1)]; tensor var_3434_cast_fp16_0, tensor var_3434_cast_fp16_1 = split(axis = var_3434_axis_0, split_sizes = var_3434_split_sizes_0, x = normed_147_cast_fp16)[name = string("op_3434_cast_fp16")]; tensor var_3438_to_fp16 = const()[name = string("op_3438_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264555264)))]; tensor out_63_cast_fp16 = mul(x = var_3434_cast_fp16_0, y = var_3438_to_fp16)[name = string("out_63_cast_fp16")]; string var_3451_pad_type_0 = const()[name = string("op_3451_pad_type_0"), val = string("valid")]; tensor var_3451_strides_0 = const()[name = string("op_3451_strides_0"), val = tensor([1, 1])]; tensor var_3451_pad_0 = const()[name = string("op_3451_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3451_dilations_0 = const()[name = string("op_3451_dilations_0"), val = tensor([1, 1])]; int32 var_3451_groups_0 = const()[name = string("op_3451_groups_0"), val = int32(1)]; tensor var_3451 = conv(dilations = var_3451_dilations_0, groups = var_3451_groups_0, pad = var_3451_pad_0, pad_type = var_3451_pad_type_0, strides = var_3451_strides_0, weight = layers_5_self_attn_k_proj_weight_quantized, x = input_103)[name = string("op_3451")]; tensor var_3456 = const()[name = string("op_3456"), val = tensor([1, 1, 256, 32])]; tensor var_3457 = reshape(shape = var_3456, x = var_3451)[name = string("op_3457")]; tensor var_3462 = const()[name = string("op_3462"), val = tensor([0, 1, 3, 2])]; int32 var_3475 = const()[name = string("op_3475"), val = int32(-1)]; fp16 const_74_promoted_to_fp16 = const()[name = string("const_74_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_31 = transpose(perm = var_3462, x = var_3457)[name = string("transpose_103")]; tensor var_3481_cast_fp16 = mul(x = k_31, y = const_74_promoted_to_fp16)[name = string("op_3481_cast_fp16")]; bool input_107_interleave_0 = const()[name = string("input_107_interleave_0"), val = bool(false)]; tensor input_107_cast_fp16 = concat(axis = var_3475, interleave = input_107_interleave_0, values = (k_31, var_3481_cast_fp16))[name = string("input_107_cast_fp16")]; tensor normed_151_axes_0 = const()[name = string("normed_151_axes_0"), val = tensor([-1])]; fp16 var_3473_to_fp16 = const()[name = string("op_3473_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_151_cast_fp16 = layer_norm(axes = normed_151_axes_0, epsilon = var_3473_to_fp16, x = input_107_cast_fp16)[name = string("normed_151_cast_fp16")]; tensor var_3486_split_sizes_0 = const()[name = string("op_3486_split_sizes_0"), val = tensor([256, 256])]; int32 var_3486_axis_0 = const()[name = string("op_3486_axis_0"), val = int32(-1)]; tensor var_3486_cast_fp16_0, tensor var_3486_cast_fp16_1 = split(axis = var_3486_axis_0, split_sizes = var_3486_split_sizes_0, x = normed_151_cast_fp16)[name = string("op_3486_cast_fp16")]; tensor var_3490_to_fp16 = const()[name = string("op_3490_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264555840)))]; tensor out_65_cast_fp16 = mul(x = var_3486_cast_fp16_0, y = var_3490_to_fp16)[name = string("out_65_cast_fp16")]; string var_3503_pad_type_0 = const()[name = string("op_3503_pad_type_0"), val = string("valid")]; tensor var_3503_strides_0 = const()[name = string("op_3503_strides_0"), val = tensor([1, 1])]; tensor var_3503_pad_0 = const()[name = string("op_3503_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3503_dilations_0 = const()[name = string("op_3503_dilations_0"), val = tensor([1, 1])]; int32 var_3503_groups_0 = const()[name = string("op_3503_groups_0"), val = int32(1)]; tensor var_3503 = conv(dilations = var_3503_dilations_0, groups = var_3503_groups_0, pad = var_3503_pad_0, pad_type = var_3503_pad_type_0, strides = var_3503_strides_0, weight = layers_5_self_attn_v_proj_weight_quantized, x = input_103)[name = string("op_3503")]; tensor var_3508 = const()[name = string("op_3508"), val = tensor([1, 1, 256, 32])]; tensor var_3509 = reshape(shape = var_3508, x = var_3503)[name = string("op_3509")]; tensor var_3516 = mul(x = out_63_cast_fp16, y = cos)[name = string("op_3516")]; tensor var_3517_split_sizes_0 = const()[name = string("op_3517_split_sizes_0"), val = tensor([128, 128])]; int32 var_3517_axis_0 = const()[name = string("op_3517_axis_0"), val = int32(-1)]; tensor var_3517_0, tensor var_3517_1 = split(axis = var_3517_axis_0, split_sizes = var_3517_split_sizes_0, x = out_63_cast_fp16)[name = string("op_3517")]; fp16 const_76_promoted = const()[name = string("const_76_promoted"), val = fp16(-0x1p+0)]; tensor var_3519 = mul(x = var_3517_1, y = const_76_promoted)[name = string("op_3519")]; int32 var_3521 = const()[name = string("op_3521"), val = int32(-1)]; bool var_3522_interleave_0 = const()[name = string("op_3522_interleave_0"), val = bool(false)]; tensor var_3522 = concat(axis = var_3521, interleave = var_3522_interleave_0, values = (var_3519, var_3517_0))[name = string("op_3522")]; tensor var_3523 = mul(x = var_3522, y = sin)[name = string("op_3523")]; tensor q_35 = add(x = var_3516, y = var_3523)[name = string("q_35")]; tensor var_3526 = mul(x = out_65_cast_fp16, y = cos)[name = string("op_3526")]; tensor var_3527_split_sizes_0 = const()[name = string("op_3527_split_sizes_0"), val = tensor([128, 128])]; int32 var_3527_axis_0 = const()[name = string("op_3527_axis_0"), val = int32(-1)]; tensor var_3527_0, tensor var_3527_1 = split(axis = var_3527_axis_0, split_sizes = var_3527_split_sizes_0, x = out_65_cast_fp16)[name = string("op_3527")]; fp16 const_77_promoted = const()[name = string("const_77_promoted"), val = fp16(-0x1p+0)]; tensor var_3529 = mul(x = var_3527_1, y = const_77_promoted)[name = string("op_3529")]; int32 var_3531 = const()[name = string("op_3531"), val = int32(-1)]; bool var_3532_interleave_0 = const()[name = string("op_3532_interleave_0"), val = bool(false)]; tensor var_3532 = concat(axis = var_3531, interleave = var_3532_interleave_0, values = (var_3529, var_3527_0))[name = string("op_3532")]; tensor var_3533 = mul(x = var_3532, y = sin)[name = string("op_3533")]; tensor k_35 = add(x = var_3526, y = var_3533)[name = string("k_35")]; tensor var_3538_begin_0 = const()[name = string("op_3538_begin_0"), val = tensor([5, 0, 0, 0])]; tensor var_3538_end_0 = const()[name = string("op_3538_end_0"), val = tensor([6, 1, 2048, 256])]; tensor var_3538_end_mask_0 = const()[name = string("op_3538_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3538_squeeze_mask_0 = const()[name = string("op_3538_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_3538_cast_fp16 = slice_by_index(begin = var_3538_begin_0, end = var_3538_end_0, end_mask = var_3538_end_mask_0, squeeze_mask = var_3538_squeeze_mask_0, x = coreml_update_state_45)[name = string("op_3538_cast_fp16")]; tensor K_cache_11_axes_0 = const()[name = string("K_cache_11_axes_0"), val = tensor([0])]; tensor K_cache_11_cast_fp16 = expand_dims(axes = K_cache_11_axes_0, x = var_3538_cast_fp16)[name = string("K_cache_11_cast_fp16")]; tensor var_3543_begin_0 = const()[name = string("op_3543_begin_0"), val = tensor([23, 0, 0, 0])]; tensor var_3543_end_0 = const()[name = string("op_3543_end_0"), val = tensor([24, 1, 2048, 256])]; tensor var_3543_end_mask_0 = const()[name = string("op_3543_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3543_squeeze_mask_0 = const()[name = string("op_3543_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_3543_cast_fp16 = slice_by_index(begin = var_3543_begin_0, end = var_3543_end_0, end_mask = var_3543_end_mask_0, squeeze_mask = var_3543_squeeze_mask_0, x = coreml_update_state_45)[name = string("op_3543_cast_fp16")]; tensor V_cache_11_axes_0 = const()[name = string("V_cache_11_axes_0"), val = tensor([0])]; tensor V_cache_11_cast_fp16 = expand_dims(axes = V_cache_11_axes_0, x = var_3543_cast_fp16)[name = string("V_cache_11_cast_fp16")]; bool k_increment_11_transpose_x_0 = const()[name = string("k_increment_11_transpose_x_0"), val = bool(false)]; bool k_increment_11_transpose_y_0 = const()[name = string("k_increment_11_transpose_y_0"), val = bool(false)]; tensor k_increment_11 = matmul(transpose_x = k_increment_11_transpose_x_0, transpose_y = k_increment_11_transpose_y_0, x = update_mask, y = k_35)[name = string("k_increment_11")]; bool v_increment_11_transpose_x_1 = const()[name = string("v_increment_11_transpose_x_1"), val = bool(false)]; bool v_increment_11_transpose_y_1 = const()[name = string("v_increment_11_transpose_y_1"), val = bool(true)]; tensor v_increment_11 = matmul(transpose_x = v_increment_11_transpose_x_1, transpose_y = v_increment_11_transpose_y_1, x = update_mask, y = var_3509)[name = string("v_increment_11")]; tensor var_3561_cast_fp16 = mul(x = K_cache_11_cast_fp16, y = var_1125_cast_fp16)[name = string("op_3561_cast_fp16")]; tensor K_new_11_cast_fp16 = add(x = var_3561_cast_fp16, y = k_increment_11)[name = string("K_new_11_cast_fp16")]; tensor var_3567_cast_fp16 = mul(x = V_cache_11_cast_fp16, y = var_1125_cast_fp16)[name = string("op_3567_cast_fp16")]; tensor V_new_11_cast_fp16 = add(x = var_3567_cast_fp16, y = v_increment_11)[name = string("V_new_11_cast_fp16")]; tensor var_3571_axes_0 = const()[name = string("op_3571_axes_0"), val = tensor([0])]; tensor var_3571_cast_fp16 = squeeze(axes = var_3571_axes_0, x = K_new_11_cast_fp16)[name = string("op_3571_cast_fp16")]; tensor concat_20 = const()[name = string("concat_20"), val = tensor([5, 0, 0, 0])]; tensor concat_21 = const()[name = string("concat_21"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_20, begin_mask = kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_21, end_mask = kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_11_stride_0, update = var_3571_cast_fp16, x = coreml_update_state_45)[name = string("kv_cache_0_internal_tensor_assign_11_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_11_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_46_write_state")]; tensor coreml_update_state_46 = read_state(input = kv_cache_0)[name = string("coreml_update_state_46")]; tensor var_3578_axes_0 = const()[name = string("op_3578_axes_0"), val = tensor([0])]; tensor var_3578_cast_fp16 = squeeze(axes = var_3578_axes_0, x = V_new_11_cast_fp16)[name = string("op_3578_cast_fp16")]; tensor concat_22 = const()[name = string("concat_22"), val = tensor([23, 0, 0, 0])]; tensor concat_23 = const()[name = string("concat_23"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_22, begin_mask = kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_23, end_mask = kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_12_stride_0, update = var_3578_cast_fp16, x = coreml_update_state_46)[name = string("kv_cache_0_internal_tensor_assign_12_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_12_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_47_write_state")]; tensor coreml_update_state_47 = read_state(input = kv_cache_0)[name = string("coreml_update_state_47")]; tensor hidden_states_41_axes_0 = const()[name = string("hidden_states_41_axes_0"), val = tensor([2])]; tensor hidden_states_41_cast_fp16 = expand_dims(axes = hidden_states_41_axes_0, x = K_new_11_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; tensor var_3591 = const()[name = string("op_3591"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_43_cast_fp16 = tile(reps = var_3591, x = hidden_states_41_cast_fp16)[name = string("hidden_states_43_cast_fp16")]; tensor var_3597 = const()[name = string("op_3597"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_11_cast_fp16 = reshape(shape = var_3597, x = hidden_states_43_cast_fp16)[name = string("K_expanded_11_cast_fp16")]; tensor hidden_states_45_axes_0 = const()[name = string("hidden_states_45_axes_0"), val = tensor([2])]; tensor hidden_states_45_cast_fp16 = expand_dims(axes = hidden_states_45_axes_0, x = V_new_11_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; tensor var_3606 = const()[name = string("op_3606"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_47_cast_fp16 = tile(reps = var_3606, x = hidden_states_45_cast_fp16)[name = string("hidden_states_47_cast_fp16")]; tensor var_3612 = const()[name = string("op_3612"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_11_cast_fp16 = reshape(shape = var_3612, x = hidden_states_47_cast_fp16)[name = string("V_expanded_11_cast_fp16")]; bool var_3627_transpose_x_1 = const()[name = string("op_3627_transpose_x_1"), val = bool(false)]; bool var_3627_transpose_y_1 = const()[name = string("op_3627_transpose_y_1"), val = bool(true)]; tensor var_3627_cast_fp16 = matmul(transpose_x = var_3627_transpose_x_1, transpose_y = var_3627_transpose_y_1, x = q_35, y = K_expanded_11_cast_fp16)[name = string("op_3627_cast_fp16")]; fp16 var_3628_to_fp16 = const()[name = string("op_3628_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_31_cast_fp16 = mul(x = var_3627_cast_fp16, y = var_3628_to_fp16)[name = string("attn_weights_31_cast_fp16")]; tensor attn_weights_33_cast_fp16 = add(x = attn_weights_31_cast_fp16, y = causal_mask)[name = string("attn_weights_33_cast_fp16")]; int32 var_3637 = const()[name = string("op_3637"), val = int32(-1)]; tensor var_3639_cast_fp16 = softmax(axis = var_3637, x = attn_weights_33_cast_fp16)[name = string("op_3639_cast_fp16")]; bool var_3655_transpose_x_0 = const()[name = string("op_3655_transpose_x_0"), val = bool(false)]; bool var_3655_transpose_y_0 = const()[name = string("op_3655_transpose_y_0"), val = bool(false)]; tensor var_3655_cast_fp16 = matmul(transpose_x = var_3655_transpose_x_0, transpose_y = var_3655_transpose_y_0, x = var_3639_cast_fp16, y = V_expanded_11_cast_fp16)[name = string("op_3655_cast_fp16")]; tensor var_3665 = const()[name = string("op_3665"), val = tensor([0, 2, 1, 3])]; tensor var_3672 = const()[name = string("op_3672"), val = tensor([1, 32, 1024])]; tensor var_3666 = transpose(perm = var_3665, x = var_3655_cast_fp16)[name = string("transpose_102")]; tensor attn_output_33 = reshape(shape = var_3672, x = var_3666)[name = string("attn_output_33")]; tensor var_3677 = const()[name = string("op_3677"), val = tensor([0, 2, 1])]; tensor squeeze_5_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264556416))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265211840))))[name = string("squeeze_5_quantized")]; string var_3693_pad_type_0 = const()[name = string("op_3693_pad_type_0"), val = string("valid")]; int32 var_3693_groups_0 = const()[name = string("op_3693_groups_0"), val = int32(1)]; tensor var_3693_strides_0 = const()[name = string("op_3693_strides_0"), val = tensor([1])]; tensor var_3693_pad_0 = const()[name = string("op_3693_pad_0"), val = tensor([0, 0])]; tensor var_3693_dilations_0 = const()[name = string("op_3693_dilations_0"), val = tensor([1])]; tensor var_3678 = transpose(perm = var_3677, x = attn_output_33)[name = string("transpose_101")]; tensor var_3693 = conv(dilations = var_3693_dilations_0, groups = var_3693_groups_0, pad = var_3693_pad_0, pad_type = var_3693_pad_type_0, strides = var_3693_strides_0, weight = squeeze_5_quantized, x = var_3678)[name = string("op_3693")]; tensor var_3697 = const()[name = string("op_3697"), val = tensor([0, 2, 1])]; int32 var_3704 = const()[name = string("op_3704"), val = int32(-1)]; fp16 const_78_promoted_to_fp16 = const()[name = string("const_78_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_89 = transpose(perm = var_3697, x = var_3693)[name = string("transpose_100")]; tensor var_3710_cast_fp16 = mul(x = x_89, y = const_78_promoted_to_fp16)[name = string("op_3710_cast_fp16")]; bool input_111_interleave_0 = const()[name = string("input_111_interleave_0"), val = bool(false)]; tensor input_111_cast_fp16 = concat(axis = var_3704, interleave = input_111_interleave_0, values = (x_89, var_3710_cast_fp16))[name = string("input_111_cast_fp16")]; tensor normed_155_axes_0 = const()[name = string("normed_155_axes_0"), val = tensor([-1])]; fp16 var_3702_to_fp16 = const()[name = string("op_3702_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_155_cast_fp16 = layer_norm(axes = normed_155_axes_0, epsilon = var_3702_to_fp16, x = input_111_cast_fp16)[name = string("normed_155_cast_fp16")]; tensor var_3715_split_sizes_0 = const()[name = string("op_3715_split_sizes_0"), val = tensor([640, 640])]; int32 var_3715_axis_0 = const()[name = string("op_3715_axis_0"), val = int32(-1)]; tensor var_3715_cast_fp16_0, tensor var_3715_cast_fp16_1 = split(axis = var_3715_axis_0, split_sizes = var_3715_split_sizes_0, x = normed_155_cast_fp16)[name = string("op_3715_cast_fp16")]; tensor var_3719_to_fp16 = const()[name = string("op_3719_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265213184)))]; tensor out_67_cast_fp16 = mul(x = var_3715_cast_fp16_0, y = var_3719_to_fp16)[name = string("out_67_cast_fp16")]; tensor x_91_cast_fp16 = add(x = x_81_cast_fp16, y = out_67_cast_fp16)[name = string("x_91_cast_fp16")]; int32 var_3733 = const()[name = string("op_3733"), val = int32(-1)]; fp16 const_80_promoted_to_fp16 = const()[name = string("const_80_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3739_cast_fp16 = mul(x = x_91_cast_fp16, y = const_80_promoted_to_fp16)[name = string("op_3739_cast_fp16")]; bool input_113_interleave_0 = const()[name = string("input_113_interleave_0"), val = bool(false)]; tensor input_113_cast_fp16 = concat(axis = var_3733, interleave = input_113_interleave_0, values = (x_91_cast_fp16, var_3739_cast_fp16))[name = string("input_113_cast_fp16")]; tensor normed_159_axes_0 = const()[name = string("normed_159_axes_0"), val = tensor([-1])]; fp16 var_3731_to_fp16 = const()[name = string("op_3731_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_159_cast_fp16 = layer_norm(axes = normed_159_axes_0, epsilon = var_3731_to_fp16, x = input_113_cast_fp16)[name = string("normed_159_cast_fp16")]; tensor var_3744_split_sizes_0 = const()[name = string("op_3744_split_sizes_0"), val = tensor([640, 640])]; int32 var_3744_axis_0 = const()[name = string("op_3744_axis_0"), val = int32(-1)]; tensor var_3744_cast_fp16_0, tensor var_3744_cast_fp16_1 = split(axis = var_3744_axis_0, split_sizes = var_3744_split_sizes_0, x = normed_159_cast_fp16)[name = string("op_3744_cast_fp16")]; tensor var_3748_to_fp16 = const()[name = string("op_3748_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265214528)))]; tensor out_69_cast_fp16 = mul(x = var_3744_cast_fp16_0, y = var_3748_to_fp16)[name = string("out_69_cast_fp16")]; tensor var_3762 = const()[name = string("op_3762"), val = tensor([0, 2, 1])]; tensor input_115_axes_0 = const()[name = string("input_115_axes_0"), val = tensor([2])]; tensor var_3763 = transpose(perm = var_3762, x = out_69_cast_fp16)[name = string("transpose_99")]; tensor input_115 = expand_dims(axes = input_115_axes_0, x = var_3763)[name = string("input_115")]; string gate_21_pad_type_0 = const()[name = string("gate_21_pad_type_0"), val = string("valid")]; tensor gate_21_strides_0 = const()[name = string("gate_21_strides_0"), val = tensor([1, 1])]; tensor gate_21_pad_0 = const()[name = string("gate_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_21_dilations_0 = const()[name = string("gate_21_dilations_0"), val = tensor([1, 1])]; int32 gate_21_groups_0 = const()[name = string("gate_21_groups_0"), val = int32(1)]; tensor gate_21 = conv(dilations = gate_21_dilations_0, groups = gate_21_groups_0, pad = gate_21_pad_0, pad_type = gate_21_pad_type_0, strides = gate_21_strides_0, weight = layers_5_mlp_gate_proj_weight_quantized, x = input_115)[name = string("gate_21")]; string up_11_pad_type_0 = const()[name = string("up_11_pad_type_0"), val = string("valid")]; tensor up_11_strides_0 = const()[name = string("up_11_strides_0"), val = tensor([1, 1])]; tensor up_11_pad_0 = const()[name = string("up_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_11_dilations_0 = const()[name = string("up_11_dilations_0"), val = tensor([1, 1])]; int32 up_11_groups_0 = const()[name = string("up_11_groups_0"), val = int32(1)]; tensor up_11 = conv(dilations = up_11_dilations_0, groups = up_11_groups_0, pad = up_11_pad_0, pad_type = up_11_pad_type_0, strides = up_11_strides_0, weight = layers_5_mlp_up_proj_weight_quantized, x = input_115)[name = string("up_11")]; string gate_23_mode_0 = const()[name = string("gate_23_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_23 = gelu(mode = gate_23_mode_0, x = gate_21)[name = string("gate_23")]; tensor input_117 = mul(x = gate_23, y = up_11)[name = string("input_117")]; string var_3801_pad_type_0 = const()[name = string("op_3801_pad_type_0"), val = string("valid")]; tensor var_3801_strides_0 = const()[name = string("op_3801_strides_0"), val = tensor([1, 1])]; tensor var_3801_pad_0 = const()[name = string("op_3801_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3801_dilations_0 = const()[name = string("op_3801_dilations_0"), val = tensor([1, 1])]; int32 var_3801_groups_0 = const()[name = string("op_3801_groups_0"), val = int32(1)]; tensor var_3801 = conv(dilations = var_3801_dilations_0, groups = var_3801_groups_0, pad = var_3801_pad_0, pad_type = var_3801_pad_type_0, strides = var_3801_strides_0, weight = layers_5_mlp_down_proj_weight_quantized, x = input_117)[name = string("op_3801")]; tensor var_3803_axes_0 = const()[name = string("op_3803_axes_0"), val = tensor([2])]; tensor var_3803 = squeeze(axes = var_3803_axes_0, x = var_3801)[name = string("op_3803")]; tensor var_3807 = const()[name = string("op_3807"), val = tensor([0, 2, 1])]; int32 var_3814 = const()[name = string("op_3814"), val = int32(-1)]; fp16 const_82_promoted_to_fp16 = const()[name = string("const_82_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_95 = transpose(perm = var_3807, x = var_3803)[name = string("transpose_98")]; tensor var_3820_cast_fp16 = mul(x = x_95, y = const_82_promoted_to_fp16)[name = string("op_3820_cast_fp16")]; bool input_119_interleave_0 = const()[name = string("input_119_interleave_0"), val = bool(false)]; tensor input_119_cast_fp16 = concat(axis = var_3814, interleave = input_119_interleave_0, values = (x_95, var_3820_cast_fp16))[name = string("input_119_cast_fp16")]; tensor normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor([-1])]; fp16 var_3812_to_fp16 = const()[name = string("op_3812_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_3812_to_fp16, x = input_119_cast_fp16)[name = string("normed_165_cast_fp16")]; tensor var_3825_split_sizes_0 = const()[name = string("op_3825_split_sizes_0"), val = tensor([640, 640])]; int32 var_3825_axis_0 = const()[name = string("op_3825_axis_0"), val = int32(-1)]; tensor var_3825_cast_fp16_0, tensor var_3825_cast_fp16_1 = split(axis = var_3825_axis_0, split_sizes = var_3825_split_sizes_0, x = normed_165_cast_fp16)[name = string("op_3825_cast_fp16")]; tensor var_3829_to_fp16 = const()[name = string("op_3829_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265215872)))]; tensor out_71_cast_fp16 = mul(x = var_3825_cast_fp16_0, y = var_3829_to_fp16)[name = string("out_71_cast_fp16")]; tensor x_97_cast_fp16 = add(x = x_91_cast_fp16, y = out_71_cast_fp16)[name = string("x_97_cast_fp16")]; int32 var_3843 = const()[name = string("op_3843"), val = int32(-1)]; fp16 const_84_promoted_to_fp16 = const()[name = string("const_84_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3849_cast_fp16 = mul(x = x_97_cast_fp16, y = const_84_promoted_to_fp16)[name = string("op_3849_cast_fp16")]; bool input_121_interleave_0 = const()[name = string("input_121_interleave_0"), val = bool(false)]; tensor input_121_cast_fp16 = concat(axis = var_3843, interleave = input_121_interleave_0, values = (x_97_cast_fp16, var_3849_cast_fp16))[name = string("input_121_cast_fp16")]; tensor normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor([-1])]; fp16 var_3841_to_fp16 = const()[name = string("op_3841_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_3841_to_fp16, x = input_121_cast_fp16)[name = string("normed_169_cast_fp16")]; tensor var_3854_split_sizes_0 = const()[name = string("op_3854_split_sizes_0"), val = tensor([640, 640])]; int32 var_3854_axis_0 = const()[name = string("op_3854_axis_0"), val = int32(-1)]; tensor var_3854_cast_fp16_0, tensor var_3854_cast_fp16_1 = split(axis = var_3854_axis_0, split_sizes = var_3854_split_sizes_0, x = normed_169_cast_fp16)[name = string("op_3854_cast_fp16")]; tensor var_3858_to_fp16 = const()[name = string("op_3858_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265217216)))]; tensor out_73_cast_fp16 = mul(x = var_3854_cast_fp16_0, y = var_3858_to_fp16)[name = string("out_73_cast_fp16")]; tensor var_3872 = const()[name = string("op_3872"), val = tensor([0, 2, 1])]; tensor input_123_axes_0 = const()[name = string("input_123_axes_0"), val = tensor([2])]; tensor var_3873 = transpose(perm = var_3872, x = out_73_cast_fp16)[name = string("transpose_97")]; tensor input_123 = expand_dims(axes = input_123_axes_0, x = var_3873)[name = string("input_123")]; string var_3886_pad_type_0 = const()[name = string("op_3886_pad_type_0"), val = string("valid")]; tensor var_3886_strides_0 = const()[name = string("op_3886_strides_0"), val = tensor([1, 1])]; tensor var_3886_pad_0 = const()[name = string("op_3886_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3886_dilations_0 = const()[name = string("op_3886_dilations_0"), val = tensor([1, 1])]; int32 var_3886_groups_0 = const()[name = string("op_3886_groups_0"), val = int32(1)]; tensor var_3886 = conv(dilations = var_3886_dilations_0, groups = var_3886_groups_0, pad = var_3886_pad_0, pad_type = var_3886_pad_type_0, strides = var_3886_strides_0, weight = layers_6_self_attn_q_proj_weight_quantized, x = input_123)[name = string("op_3886")]; tensor var_3891 = const()[name = string("op_3891"), val = tensor([1, 4, 256, 32])]; tensor var_3892 = reshape(shape = var_3891, x = var_3886)[name = string("op_3892")]; tensor var_3897 = const()[name = string("op_3897"), val = tensor([0, 1, 3, 2])]; int32 var_3910 = const()[name = string("op_3910"), val = int32(-1)]; fp16 const_86_promoted_to_fp16 = const()[name = string("const_86_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_37 = transpose(perm = var_3897, x = var_3892)[name = string("transpose_96")]; tensor var_3916_cast_fp16 = mul(x = q_37, y = const_86_promoted_to_fp16)[name = string("op_3916_cast_fp16")]; bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)]; tensor input_125_cast_fp16 = concat(axis = var_3910, interleave = input_125_interleave_0, values = (q_37, var_3916_cast_fp16))[name = string("input_125_cast_fp16")]; tensor normed_175_axes_0 = const()[name = string("normed_175_axes_0"), val = tensor([-1])]; fp16 var_3908_to_fp16 = const()[name = string("op_3908_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_175_cast_fp16 = layer_norm(axes = normed_175_axes_0, epsilon = var_3908_to_fp16, x = input_125_cast_fp16)[name = string("normed_175_cast_fp16")]; tensor var_3921_split_sizes_0 = const()[name = string("op_3921_split_sizes_0"), val = tensor([256, 256])]; int32 var_3921_axis_0 = const()[name = string("op_3921_axis_0"), val = int32(-1)]; tensor var_3921_cast_fp16_0, tensor var_3921_cast_fp16_1 = split(axis = var_3921_axis_0, split_sizes = var_3921_split_sizes_0, x = normed_175_cast_fp16)[name = string("op_3921_cast_fp16")]; tensor var_3925_to_fp16 = const()[name = string("op_3925_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265218560)))]; tensor out_75_cast_fp16 = mul(x = var_3921_cast_fp16_0, y = var_3925_to_fp16)[name = string("out_75_cast_fp16")]; string var_3938_pad_type_0 = const()[name = string("op_3938_pad_type_0"), val = string("valid")]; tensor var_3938_strides_0 = const()[name = string("op_3938_strides_0"), val = tensor([1, 1])]; tensor var_3938_pad_0 = const()[name = string("op_3938_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3938_dilations_0 = const()[name = string("op_3938_dilations_0"), val = tensor([1, 1])]; int32 var_3938_groups_0 = const()[name = string("op_3938_groups_0"), val = int32(1)]; tensor var_3938 = conv(dilations = var_3938_dilations_0, groups = var_3938_groups_0, pad = var_3938_pad_0, pad_type = var_3938_pad_type_0, strides = var_3938_strides_0, weight = layers_6_self_attn_k_proj_weight_quantized, x = input_123)[name = string("op_3938")]; tensor var_3943 = const()[name = string("op_3943"), val = tensor([1, 1, 256, 32])]; tensor var_3944 = reshape(shape = var_3943, x = var_3938)[name = string("op_3944")]; tensor var_3949 = const()[name = string("op_3949"), val = tensor([0, 1, 3, 2])]; int32 var_3962 = const()[name = string("op_3962"), val = int32(-1)]; fp16 const_88_promoted_to_fp16 = const()[name = string("const_88_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_37 = transpose(perm = var_3949, x = var_3944)[name = string("transpose_95")]; tensor var_3968_cast_fp16 = mul(x = k_37, y = const_88_promoted_to_fp16)[name = string("op_3968_cast_fp16")]; bool input_127_interleave_0 = const()[name = string("input_127_interleave_0"), val = bool(false)]; tensor input_127_cast_fp16 = concat(axis = var_3962, interleave = input_127_interleave_0, values = (k_37, var_3968_cast_fp16))[name = string("input_127_cast_fp16")]; tensor normed_179_axes_0 = const()[name = string("normed_179_axes_0"), val = tensor([-1])]; fp16 var_3960_to_fp16 = const()[name = string("op_3960_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_179_cast_fp16 = layer_norm(axes = normed_179_axes_0, epsilon = var_3960_to_fp16, x = input_127_cast_fp16)[name = string("normed_179_cast_fp16")]; tensor var_3973_split_sizes_0 = const()[name = string("op_3973_split_sizes_0"), val = tensor([256, 256])]; int32 var_3973_axis_0 = const()[name = string("op_3973_axis_0"), val = int32(-1)]; tensor var_3973_cast_fp16_0, tensor var_3973_cast_fp16_1 = split(axis = var_3973_axis_0, split_sizes = var_3973_split_sizes_0, x = normed_179_cast_fp16)[name = string("op_3973_cast_fp16")]; tensor var_3977_to_fp16 = const()[name = string("op_3977_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265219136)))]; tensor out_77_cast_fp16 = mul(x = var_3973_cast_fp16_0, y = var_3977_to_fp16)[name = string("out_77_cast_fp16")]; string var_3990_pad_type_0 = const()[name = string("op_3990_pad_type_0"), val = string("valid")]; tensor var_3990_strides_0 = const()[name = string("op_3990_strides_0"), val = tensor([1, 1])]; tensor var_3990_pad_0 = const()[name = string("op_3990_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3990_dilations_0 = const()[name = string("op_3990_dilations_0"), val = tensor([1, 1])]; int32 var_3990_groups_0 = const()[name = string("op_3990_groups_0"), val = int32(1)]; tensor var_3990 = conv(dilations = var_3990_dilations_0, groups = var_3990_groups_0, pad = var_3990_pad_0, pad_type = var_3990_pad_type_0, strides = var_3990_strides_0, weight = layers_6_self_attn_v_proj_weight_quantized, x = input_123)[name = string("op_3990")]; tensor var_3995 = const()[name = string("op_3995"), val = tensor([1, 1, 256, 32])]; tensor var_3996 = reshape(shape = var_3995, x = var_3990)[name = string("op_3996")]; tensor var_4003 = mul(x = out_75_cast_fp16, y = cos_1)[name = string("op_4003")]; tensor var_4004_split_sizes_0 = const()[name = string("op_4004_split_sizes_0"), val = tensor([128, 128])]; int32 var_4004_axis_0 = const()[name = string("op_4004_axis_0"), val = int32(-1)]; tensor var_4004_0, tensor var_4004_1 = split(axis = var_4004_axis_0, split_sizes = var_4004_split_sizes_0, x = out_75_cast_fp16)[name = string("op_4004")]; fp16 const_90_promoted = const()[name = string("const_90_promoted"), val = fp16(-0x1p+0)]; tensor var_4006 = mul(x = var_4004_1, y = const_90_promoted)[name = string("op_4006")]; int32 var_4008 = const()[name = string("op_4008"), val = int32(-1)]; bool var_4009_interleave_0 = const()[name = string("op_4009_interleave_0"), val = bool(false)]; tensor var_4009 = concat(axis = var_4008, interleave = var_4009_interleave_0, values = (var_4006, var_4004_0))[name = string("op_4009")]; tensor var_4010 = mul(x = var_4009, y = sin_1)[name = string("op_4010")]; tensor q_41 = add(x = var_4003, y = var_4010)[name = string("q_41")]; tensor var_4013 = mul(x = out_77_cast_fp16, y = cos_1)[name = string("op_4013")]; tensor var_4014_split_sizes_0 = const()[name = string("op_4014_split_sizes_0"), val = tensor([128, 128])]; int32 var_4014_axis_0 = const()[name = string("op_4014_axis_0"), val = int32(-1)]; tensor var_4014_0, tensor var_4014_1 = split(axis = var_4014_axis_0, split_sizes = var_4014_split_sizes_0, x = out_77_cast_fp16)[name = string("op_4014")]; fp16 const_91_promoted = const()[name = string("const_91_promoted"), val = fp16(-0x1p+0)]; tensor var_4016 = mul(x = var_4014_1, y = const_91_promoted)[name = string("op_4016")]; int32 var_4018 = const()[name = string("op_4018"), val = int32(-1)]; bool var_4019_interleave_0 = const()[name = string("op_4019_interleave_0"), val = bool(false)]; tensor var_4019 = concat(axis = var_4018, interleave = var_4019_interleave_0, values = (var_4016, var_4014_0))[name = string("op_4019")]; tensor var_4020 = mul(x = var_4019, y = sin_1)[name = string("op_4020")]; tensor k_41 = add(x = var_4013, y = var_4020)[name = string("k_41")]; tensor var_4025_begin_0 = const()[name = string("op_4025_begin_0"), val = tensor([6, 0, 0, 0])]; tensor var_4025_end_0 = const()[name = string("op_4025_end_0"), val = tensor([7, 1, 2048, 256])]; tensor var_4025_end_mask_0 = const()[name = string("op_4025_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4025_squeeze_mask_0 = const()[name = string("op_4025_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_4025_cast_fp16 = slice_by_index(begin = var_4025_begin_0, end = var_4025_end_0, end_mask = var_4025_end_mask_0, squeeze_mask = var_4025_squeeze_mask_0, x = coreml_update_state_47)[name = string("op_4025_cast_fp16")]; tensor K_cache_13_axes_0 = const()[name = string("K_cache_13_axes_0"), val = tensor([0])]; tensor K_cache_13_cast_fp16 = expand_dims(axes = K_cache_13_axes_0, x = var_4025_cast_fp16)[name = string("K_cache_13_cast_fp16")]; tensor var_4030_begin_0 = const()[name = string("op_4030_begin_0"), val = tensor([24, 0, 0, 0])]; tensor var_4030_end_0 = const()[name = string("op_4030_end_0"), val = tensor([25, 1, 2048, 256])]; tensor var_4030_end_mask_0 = const()[name = string("op_4030_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4030_squeeze_mask_0 = const()[name = string("op_4030_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_4030_cast_fp16 = slice_by_index(begin = var_4030_begin_0, end = var_4030_end_0, end_mask = var_4030_end_mask_0, squeeze_mask = var_4030_squeeze_mask_0, x = coreml_update_state_47)[name = string("op_4030_cast_fp16")]; tensor V_cache_13_axes_0 = const()[name = string("V_cache_13_axes_0"), val = tensor([0])]; tensor V_cache_13_cast_fp16 = expand_dims(axes = V_cache_13_axes_0, x = var_4030_cast_fp16)[name = string("V_cache_13_cast_fp16")]; bool k_increment_13_transpose_x_0 = const()[name = string("k_increment_13_transpose_x_0"), val = bool(false)]; bool k_increment_13_transpose_y_0 = const()[name = string("k_increment_13_transpose_y_0"), val = bool(false)]; tensor k_increment_13 = matmul(transpose_x = k_increment_13_transpose_x_0, transpose_y = k_increment_13_transpose_y_0, x = update_mask, y = k_41)[name = string("k_increment_13")]; bool v_increment_13_transpose_x_1 = const()[name = string("v_increment_13_transpose_x_1"), val = bool(false)]; bool v_increment_13_transpose_y_1 = const()[name = string("v_increment_13_transpose_y_1"), val = bool(true)]; tensor v_increment_13 = matmul(transpose_x = v_increment_13_transpose_x_1, transpose_y = v_increment_13_transpose_y_1, x = update_mask, y = var_3996)[name = string("v_increment_13")]; tensor var_4048_cast_fp16 = mul(x = K_cache_13_cast_fp16, y = var_1125_cast_fp16)[name = string("op_4048_cast_fp16")]; tensor K_new_13_cast_fp16 = add(x = var_4048_cast_fp16, y = k_increment_13)[name = string("K_new_13_cast_fp16")]; tensor var_4054_cast_fp16 = mul(x = V_cache_13_cast_fp16, y = var_1125_cast_fp16)[name = string("op_4054_cast_fp16")]; tensor V_new_13_cast_fp16 = add(x = var_4054_cast_fp16, y = v_increment_13)[name = string("V_new_13_cast_fp16")]; tensor var_4058_axes_0 = const()[name = string("op_4058_axes_0"), val = tensor([0])]; tensor var_4058_cast_fp16 = squeeze(axes = var_4058_axes_0, x = K_new_13_cast_fp16)[name = string("op_4058_cast_fp16")]; tensor concat_24 = const()[name = string("concat_24"), val = tensor([6, 0, 0, 0])]; tensor concat_25 = const()[name = string("concat_25"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_24, begin_mask = kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_25, end_mask = kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_13_stride_0, update = var_4058_cast_fp16, x = coreml_update_state_47)[name = string("kv_cache_0_internal_tensor_assign_13_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_13_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_48_write_state")]; tensor coreml_update_state_48 = read_state(input = kv_cache_0)[name = string("coreml_update_state_48")]; tensor var_4065_axes_0 = const()[name = string("op_4065_axes_0"), val = tensor([0])]; tensor var_4065_cast_fp16 = squeeze(axes = var_4065_axes_0, x = V_new_13_cast_fp16)[name = string("op_4065_cast_fp16")]; tensor concat_26 = const()[name = string("concat_26"), val = tensor([24, 0, 0, 0])]; tensor concat_27 = const()[name = string("concat_27"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_26, begin_mask = kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_27, end_mask = kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_14_stride_0, update = var_4065_cast_fp16, x = coreml_update_state_48)[name = string("kv_cache_0_internal_tensor_assign_14_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_14_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_49_write_state")]; tensor coreml_update_state_49 = read_state(input = kv_cache_0)[name = string("coreml_update_state_49")]; tensor hidden_states_49_axes_0 = const()[name = string("hidden_states_49_axes_0"), val = tensor([2])]; tensor hidden_states_49_cast_fp16 = expand_dims(axes = hidden_states_49_axes_0, x = K_new_13_cast_fp16)[name = string("hidden_states_49_cast_fp16")]; tensor var_4078 = const()[name = string("op_4078"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_51_cast_fp16 = tile(reps = var_4078, x = hidden_states_49_cast_fp16)[name = string("hidden_states_51_cast_fp16")]; tensor var_4084 = const()[name = string("op_4084"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_13_cast_fp16 = reshape(shape = var_4084, x = hidden_states_51_cast_fp16)[name = string("K_expanded_13_cast_fp16")]; tensor hidden_states_53_axes_0 = const()[name = string("hidden_states_53_axes_0"), val = tensor([2])]; tensor hidden_states_53_cast_fp16 = expand_dims(axes = hidden_states_53_axes_0, x = V_new_13_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; tensor var_4093 = const()[name = string("op_4093"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_55_cast_fp16 = tile(reps = var_4093, x = hidden_states_53_cast_fp16)[name = string("hidden_states_55_cast_fp16")]; tensor var_4099 = const()[name = string("op_4099"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_13_cast_fp16 = reshape(shape = var_4099, x = hidden_states_55_cast_fp16)[name = string("V_expanded_13_cast_fp16")]; bool var_4114_transpose_x_1 = const()[name = string("op_4114_transpose_x_1"), val = bool(false)]; bool var_4114_transpose_y_1 = const()[name = string("op_4114_transpose_y_1"), val = bool(true)]; tensor var_4114_cast_fp16 = matmul(transpose_x = var_4114_transpose_x_1, transpose_y = var_4114_transpose_y_1, x = q_41, y = K_expanded_13_cast_fp16)[name = string("op_4114_cast_fp16")]; fp16 var_4115_to_fp16 = const()[name = string("op_4115_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_37_cast_fp16 = mul(x = var_4114_cast_fp16, y = var_4115_to_fp16)[name = string("attn_weights_37_cast_fp16")]; tensor attn_weights_39_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = causal_mask)[name = string("attn_weights_39_cast_fp16")]; int32 var_4124 = const()[name = string("op_4124"), val = int32(-1)]; tensor var_4126_cast_fp16 = softmax(axis = var_4124, x = attn_weights_39_cast_fp16)[name = string("op_4126_cast_fp16")]; bool var_4142_transpose_x_0 = const()[name = string("op_4142_transpose_x_0"), val = bool(false)]; bool var_4142_transpose_y_0 = const()[name = string("op_4142_transpose_y_0"), val = bool(false)]; tensor var_4142_cast_fp16 = matmul(transpose_x = var_4142_transpose_x_0, transpose_y = var_4142_transpose_y_0, x = var_4126_cast_fp16, y = V_expanded_13_cast_fp16)[name = string("op_4142_cast_fp16")]; tensor var_4152 = const()[name = string("op_4152"), val = tensor([0, 2, 1, 3])]; tensor var_4159 = const()[name = string("op_4159"), val = tensor([1, 32, 1024])]; tensor var_4153 = transpose(perm = var_4152, x = var_4142_cast_fp16)[name = string("transpose_94")]; tensor attn_output_39 = reshape(shape = var_4159, x = var_4153)[name = string("attn_output_39")]; tensor var_4164 = const()[name = string("op_4164"), val = tensor([0, 2, 1])]; tensor squeeze_6_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265219712))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265875136))))[name = string("squeeze_6_quantized")]; string var_4180_pad_type_0 = const()[name = string("op_4180_pad_type_0"), val = string("valid")]; int32 var_4180_groups_0 = const()[name = string("op_4180_groups_0"), val = int32(1)]; tensor var_4180_strides_0 = const()[name = string("op_4180_strides_0"), val = tensor([1])]; tensor var_4180_pad_0 = const()[name = string("op_4180_pad_0"), val = tensor([0, 0])]; tensor var_4180_dilations_0 = const()[name = string("op_4180_dilations_0"), val = tensor([1])]; tensor var_4165 = transpose(perm = var_4164, x = attn_output_39)[name = string("transpose_93")]; tensor var_4180 = conv(dilations = var_4180_dilations_0, groups = var_4180_groups_0, pad = var_4180_pad_0, pad_type = var_4180_pad_type_0, strides = var_4180_strides_0, weight = squeeze_6_quantized, x = var_4165)[name = string("op_4180")]; tensor var_4184 = const()[name = string("op_4184"), val = tensor([0, 2, 1])]; int32 var_4191 = const()[name = string("op_4191"), val = int32(-1)]; fp16 const_92_promoted_to_fp16 = const()[name = string("const_92_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_105 = transpose(perm = var_4184, x = var_4180)[name = string("transpose_92")]; tensor var_4197_cast_fp16 = mul(x = x_105, y = const_92_promoted_to_fp16)[name = string("op_4197_cast_fp16")]; bool input_131_interleave_0 = const()[name = string("input_131_interleave_0"), val = bool(false)]; tensor input_131_cast_fp16 = concat(axis = var_4191, interleave = input_131_interleave_0, values = (x_105, var_4197_cast_fp16))[name = string("input_131_cast_fp16")]; tensor normed_183_axes_0 = const()[name = string("normed_183_axes_0"), val = tensor([-1])]; fp16 var_4189_to_fp16 = const()[name = string("op_4189_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_183_cast_fp16 = layer_norm(axes = normed_183_axes_0, epsilon = var_4189_to_fp16, x = input_131_cast_fp16)[name = string("normed_183_cast_fp16")]; tensor var_4202_split_sizes_0 = const()[name = string("op_4202_split_sizes_0"), val = tensor([640, 640])]; int32 var_4202_axis_0 = const()[name = string("op_4202_axis_0"), val = int32(-1)]; tensor var_4202_cast_fp16_0, tensor var_4202_cast_fp16_1 = split(axis = var_4202_axis_0, split_sizes = var_4202_split_sizes_0, x = normed_183_cast_fp16)[name = string("op_4202_cast_fp16")]; tensor var_4206_to_fp16 = const()[name = string("op_4206_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265876480)))]; tensor out_79_cast_fp16 = mul(x = var_4202_cast_fp16_0, y = var_4206_to_fp16)[name = string("out_79_cast_fp16")]; tensor x_107_cast_fp16 = add(x = x_97_cast_fp16, y = out_79_cast_fp16)[name = string("x_107_cast_fp16")]; int32 var_4220 = const()[name = string("op_4220"), val = int32(-1)]; fp16 const_94_promoted_to_fp16 = const()[name = string("const_94_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4226_cast_fp16 = mul(x = x_107_cast_fp16, y = const_94_promoted_to_fp16)[name = string("op_4226_cast_fp16")]; bool input_133_interleave_0 = const()[name = string("input_133_interleave_0"), val = bool(false)]; tensor input_133_cast_fp16 = concat(axis = var_4220, interleave = input_133_interleave_0, values = (x_107_cast_fp16, var_4226_cast_fp16))[name = string("input_133_cast_fp16")]; tensor normed_187_axes_0 = const()[name = string("normed_187_axes_0"), val = tensor([-1])]; fp16 var_4218_to_fp16 = const()[name = string("op_4218_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_187_cast_fp16 = layer_norm(axes = normed_187_axes_0, epsilon = var_4218_to_fp16, x = input_133_cast_fp16)[name = string("normed_187_cast_fp16")]; tensor var_4231_split_sizes_0 = const()[name = string("op_4231_split_sizes_0"), val = tensor([640, 640])]; int32 var_4231_axis_0 = const()[name = string("op_4231_axis_0"), val = int32(-1)]; tensor var_4231_cast_fp16_0, tensor var_4231_cast_fp16_1 = split(axis = var_4231_axis_0, split_sizes = var_4231_split_sizes_0, x = normed_187_cast_fp16)[name = string("op_4231_cast_fp16")]; tensor var_4235_to_fp16 = const()[name = string("op_4235_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265877824)))]; tensor out_81_cast_fp16 = mul(x = var_4231_cast_fp16_0, y = var_4235_to_fp16)[name = string("out_81_cast_fp16")]; tensor var_4249 = const()[name = string("op_4249"), val = tensor([0, 2, 1])]; tensor input_135_axes_0 = const()[name = string("input_135_axes_0"), val = tensor([2])]; tensor var_4250 = transpose(perm = var_4249, x = out_81_cast_fp16)[name = string("transpose_91")]; tensor input_135 = expand_dims(axes = input_135_axes_0, x = var_4250)[name = string("input_135")]; string gate_25_pad_type_0 = const()[name = string("gate_25_pad_type_0"), val = string("valid")]; tensor gate_25_strides_0 = const()[name = string("gate_25_strides_0"), val = tensor([1, 1])]; tensor gate_25_pad_0 = const()[name = string("gate_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_25_dilations_0 = const()[name = string("gate_25_dilations_0"), val = tensor([1, 1])]; int32 gate_25_groups_0 = const()[name = string("gate_25_groups_0"), val = int32(1)]; tensor gate_25 = conv(dilations = gate_25_dilations_0, groups = gate_25_groups_0, pad = gate_25_pad_0, pad_type = gate_25_pad_type_0, strides = gate_25_strides_0, weight = layers_6_mlp_gate_proj_weight_quantized, x = input_135)[name = string("gate_25")]; string up_13_pad_type_0 = const()[name = string("up_13_pad_type_0"), val = string("valid")]; tensor up_13_strides_0 = const()[name = string("up_13_strides_0"), val = tensor([1, 1])]; tensor up_13_pad_0 = const()[name = string("up_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_13_dilations_0 = const()[name = string("up_13_dilations_0"), val = tensor([1, 1])]; int32 up_13_groups_0 = const()[name = string("up_13_groups_0"), val = int32(1)]; tensor up_13 = conv(dilations = up_13_dilations_0, groups = up_13_groups_0, pad = up_13_pad_0, pad_type = up_13_pad_type_0, strides = up_13_strides_0, weight = layers_6_mlp_up_proj_weight_quantized, x = input_135)[name = string("up_13")]; string gate_27_mode_0 = const()[name = string("gate_27_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_27 = gelu(mode = gate_27_mode_0, x = gate_25)[name = string("gate_27")]; tensor input_137 = mul(x = gate_27, y = up_13)[name = string("input_137")]; string var_4288_pad_type_0 = const()[name = string("op_4288_pad_type_0"), val = string("valid")]; tensor var_4288_strides_0 = const()[name = string("op_4288_strides_0"), val = tensor([1, 1])]; tensor var_4288_pad_0 = const()[name = string("op_4288_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4288_dilations_0 = const()[name = string("op_4288_dilations_0"), val = tensor([1, 1])]; int32 var_4288_groups_0 = const()[name = string("op_4288_groups_0"), val = int32(1)]; tensor var_4288 = conv(dilations = var_4288_dilations_0, groups = var_4288_groups_0, pad = var_4288_pad_0, pad_type = var_4288_pad_type_0, strides = var_4288_strides_0, weight = layers_6_mlp_down_proj_weight_quantized, x = input_137)[name = string("op_4288")]; tensor var_4290_axes_0 = const()[name = string("op_4290_axes_0"), val = tensor([2])]; tensor var_4290 = squeeze(axes = var_4290_axes_0, x = var_4288)[name = string("op_4290")]; tensor var_4294 = const()[name = string("op_4294"), val = tensor([0, 2, 1])]; int32 var_4301 = const()[name = string("op_4301"), val = int32(-1)]; fp16 const_96_promoted_to_fp16 = const()[name = string("const_96_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_111 = transpose(perm = var_4294, x = var_4290)[name = string("transpose_90")]; tensor var_4307_cast_fp16 = mul(x = x_111, y = const_96_promoted_to_fp16)[name = string("op_4307_cast_fp16")]; bool input_139_interleave_0 = const()[name = string("input_139_interleave_0"), val = bool(false)]; tensor input_139_cast_fp16 = concat(axis = var_4301, interleave = input_139_interleave_0, values = (x_111, var_4307_cast_fp16))[name = string("input_139_cast_fp16")]; tensor normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor([-1])]; fp16 var_4299_to_fp16 = const()[name = string("op_4299_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_4299_to_fp16, x = input_139_cast_fp16)[name = string("normed_193_cast_fp16")]; tensor var_4312_split_sizes_0 = const()[name = string("op_4312_split_sizes_0"), val = tensor([640, 640])]; int32 var_4312_axis_0 = const()[name = string("op_4312_axis_0"), val = int32(-1)]; tensor var_4312_cast_fp16_0, tensor var_4312_cast_fp16_1 = split(axis = var_4312_axis_0, split_sizes = var_4312_split_sizes_0, x = normed_193_cast_fp16)[name = string("op_4312_cast_fp16")]; tensor var_4316_to_fp16 = const()[name = string("op_4316_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265879168)))]; tensor out_83_cast_fp16 = mul(x = var_4312_cast_fp16_0, y = var_4316_to_fp16)[name = string("out_83_cast_fp16")]; tensor x_113_cast_fp16 = add(x = x_107_cast_fp16, y = out_83_cast_fp16)[name = string("x_113_cast_fp16")]; int32 var_4330 = const()[name = string("op_4330"), val = int32(-1)]; fp16 const_98_promoted_to_fp16 = const()[name = string("const_98_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4336_cast_fp16 = mul(x = x_113_cast_fp16, y = const_98_promoted_to_fp16)[name = string("op_4336_cast_fp16")]; bool input_141_interleave_0 = const()[name = string("input_141_interleave_0"), val = bool(false)]; tensor input_141_cast_fp16 = concat(axis = var_4330, interleave = input_141_interleave_0, values = (x_113_cast_fp16, var_4336_cast_fp16))[name = string("input_141_cast_fp16")]; tensor normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor([-1])]; fp16 var_4328_to_fp16 = const()[name = string("op_4328_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_4328_to_fp16, x = input_141_cast_fp16)[name = string("normed_197_cast_fp16")]; tensor var_4341_split_sizes_0 = const()[name = string("op_4341_split_sizes_0"), val = tensor([640, 640])]; int32 var_4341_axis_0 = const()[name = string("op_4341_axis_0"), val = int32(-1)]; tensor var_4341_cast_fp16_0, tensor var_4341_cast_fp16_1 = split(axis = var_4341_axis_0, split_sizes = var_4341_split_sizes_0, x = normed_197_cast_fp16)[name = string("op_4341_cast_fp16")]; tensor var_4345_to_fp16 = const()[name = string("op_4345_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265880512)))]; tensor out_85_cast_fp16 = mul(x = var_4341_cast_fp16_0, y = var_4345_to_fp16)[name = string("out_85_cast_fp16")]; tensor var_4359 = const()[name = string("op_4359"), val = tensor([0, 2, 1])]; tensor input_143_axes_0 = const()[name = string("input_143_axes_0"), val = tensor([2])]; tensor var_4360 = transpose(perm = var_4359, x = out_85_cast_fp16)[name = string("transpose_89")]; tensor input_143 = expand_dims(axes = input_143_axes_0, x = var_4360)[name = string("input_143")]; string var_4373_pad_type_0 = const()[name = string("op_4373_pad_type_0"), val = string("valid")]; tensor var_4373_strides_0 = const()[name = string("op_4373_strides_0"), val = tensor([1, 1])]; tensor var_4373_pad_0 = const()[name = string("op_4373_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4373_dilations_0 = const()[name = string("op_4373_dilations_0"), val = tensor([1, 1])]; int32 var_4373_groups_0 = const()[name = string("op_4373_groups_0"), val = int32(1)]; tensor var_4373 = conv(dilations = var_4373_dilations_0, groups = var_4373_groups_0, pad = var_4373_pad_0, pad_type = var_4373_pad_type_0, strides = var_4373_strides_0, weight = layers_7_self_attn_q_proj_weight_quantized, x = input_143)[name = string("op_4373")]; tensor var_4378 = const()[name = string("op_4378"), val = tensor([1, 4, 256, 32])]; tensor var_4379 = reshape(shape = var_4378, x = var_4373)[name = string("op_4379")]; tensor var_4384 = const()[name = string("op_4384"), val = tensor([0, 1, 3, 2])]; int32 var_4397 = const()[name = string("op_4397"), val = int32(-1)]; fp16 const_100_promoted_to_fp16 = const()[name = string("const_100_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_43 = transpose(perm = var_4384, x = var_4379)[name = string("transpose_88")]; tensor var_4403_cast_fp16 = mul(x = q_43, y = const_100_promoted_to_fp16)[name = string("op_4403_cast_fp16")]; bool input_145_interleave_0 = const()[name = string("input_145_interleave_0"), val = bool(false)]; tensor input_145_cast_fp16 = concat(axis = var_4397, interleave = input_145_interleave_0, values = (q_43, var_4403_cast_fp16))[name = string("input_145_cast_fp16")]; tensor normed_203_axes_0 = const()[name = string("normed_203_axes_0"), val = tensor([-1])]; fp16 var_4395_to_fp16 = const()[name = string("op_4395_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_203_cast_fp16 = layer_norm(axes = normed_203_axes_0, epsilon = var_4395_to_fp16, x = input_145_cast_fp16)[name = string("normed_203_cast_fp16")]; tensor var_4408_split_sizes_0 = const()[name = string("op_4408_split_sizes_0"), val = tensor([256, 256])]; int32 var_4408_axis_0 = const()[name = string("op_4408_axis_0"), val = int32(-1)]; tensor var_4408_cast_fp16_0, tensor var_4408_cast_fp16_1 = split(axis = var_4408_axis_0, split_sizes = var_4408_split_sizes_0, x = normed_203_cast_fp16)[name = string("op_4408_cast_fp16")]; tensor var_4412_to_fp16 = const()[name = string("op_4412_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265881856)))]; tensor out_87_cast_fp16 = mul(x = var_4408_cast_fp16_0, y = var_4412_to_fp16)[name = string("out_87_cast_fp16")]; string var_4425_pad_type_0 = const()[name = string("op_4425_pad_type_0"), val = string("valid")]; tensor var_4425_strides_0 = const()[name = string("op_4425_strides_0"), val = tensor([1, 1])]; tensor var_4425_pad_0 = const()[name = string("op_4425_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4425_dilations_0 = const()[name = string("op_4425_dilations_0"), val = tensor([1, 1])]; int32 var_4425_groups_0 = const()[name = string("op_4425_groups_0"), val = int32(1)]; tensor var_4425 = conv(dilations = var_4425_dilations_0, groups = var_4425_groups_0, pad = var_4425_pad_0, pad_type = var_4425_pad_type_0, strides = var_4425_strides_0, weight = layers_7_self_attn_k_proj_weight_quantized, x = input_143)[name = string("op_4425")]; tensor var_4430 = const()[name = string("op_4430"), val = tensor([1, 1, 256, 32])]; tensor var_4431 = reshape(shape = var_4430, x = var_4425)[name = string("op_4431")]; tensor var_4436 = const()[name = string("op_4436"), val = tensor([0, 1, 3, 2])]; int32 var_4449 = const()[name = string("op_4449"), val = int32(-1)]; fp16 const_102_promoted_to_fp16 = const()[name = string("const_102_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_43 = transpose(perm = var_4436, x = var_4431)[name = string("transpose_87")]; tensor var_4455_cast_fp16 = mul(x = k_43, y = const_102_promoted_to_fp16)[name = string("op_4455_cast_fp16")]; bool input_147_interleave_0 = const()[name = string("input_147_interleave_0"), val = bool(false)]; tensor input_147_cast_fp16 = concat(axis = var_4449, interleave = input_147_interleave_0, values = (k_43, var_4455_cast_fp16))[name = string("input_147_cast_fp16")]; tensor normed_207_axes_0 = const()[name = string("normed_207_axes_0"), val = tensor([-1])]; fp16 var_4447_to_fp16 = const()[name = string("op_4447_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_207_cast_fp16 = layer_norm(axes = normed_207_axes_0, epsilon = var_4447_to_fp16, x = input_147_cast_fp16)[name = string("normed_207_cast_fp16")]; tensor var_4460_split_sizes_0 = const()[name = string("op_4460_split_sizes_0"), val = tensor([256, 256])]; int32 var_4460_axis_0 = const()[name = string("op_4460_axis_0"), val = int32(-1)]; tensor var_4460_cast_fp16_0, tensor var_4460_cast_fp16_1 = split(axis = var_4460_axis_0, split_sizes = var_4460_split_sizes_0, x = normed_207_cast_fp16)[name = string("op_4460_cast_fp16")]; tensor var_4464_to_fp16 = const()[name = string("op_4464_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265882432)))]; tensor out_89_cast_fp16 = mul(x = var_4460_cast_fp16_0, y = var_4464_to_fp16)[name = string("out_89_cast_fp16")]; string var_4477_pad_type_0 = const()[name = string("op_4477_pad_type_0"), val = string("valid")]; tensor var_4477_strides_0 = const()[name = string("op_4477_strides_0"), val = tensor([1, 1])]; tensor var_4477_pad_0 = const()[name = string("op_4477_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4477_dilations_0 = const()[name = string("op_4477_dilations_0"), val = tensor([1, 1])]; int32 var_4477_groups_0 = const()[name = string("op_4477_groups_0"), val = int32(1)]; tensor var_4477 = conv(dilations = var_4477_dilations_0, groups = var_4477_groups_0, pad = var_4477_pad_0, pad_type = var_4477_pad_type_0, strides = var_4477_strides_0, weight = layers_7_self_attn_v_proj_weight_quantized, x = input_143)[name = string("op_4477")]; tensor var_4482 = const()[name = string("op_4482"), val = tensor([1, 1, 256, 32])]; tensor var_4483 = reshape(shape = var_4482, x = var_4477)[name = string("op_4483")]; tensor var_4490 = mul(x = out_87_cast_fp16, y = cos_1)[name = string("op_4490")]; tensor var_4491_split_sizes_0 = const()[name = string("op_4491_split_sizes_0"), val = tensor([128, 128])]; int32 var_4491_axis_0 = const()[name = string("op_4491_axis_0"), val = int32(-1)]; tensor var_4491_0, tensor var_4491_1 = split(axis = var_4491_axis_0, split_sizes = var_4491_split_sizes_0, x = out_87_cast_fp16)[name = string("op_4491")]; fp16 const_104_promoted = const()[name = string("const_104_promoted"), val = fp16(-0x1p+0)]; tensor var_4493 = mul(x = var_4491_1, y = const_104_promoted)[name = string("op_4493")]; int32 var_4495 = const()[name = string("op_4495"), val = int32(-1)]; bool var_4496_interleave_0 = const()[name = string("op_4496_interleave_0"), val = bool(false)]; tensor var_4496 = concat(axis = var_4495, interleave = var_4496_interleave_0, values = (var_4493, var_4491_0))[name = string("op_4496")]; tensor var_4497 = mul(x = var_4496, y = sin_1)[name = string("op_4497")]; tensor q_47 = add(x = var_4490, y = var_4497)[name = string("q_47")]; tensor var_4500 = mul(x = out_89_cast_fp16, y = cos_1)[name = string("op_4500")]; tensor var_4501_split_sizes_0 = const()[name = string("op_4501_split_sizes_0"), val = tensor([128, 128])]; int32 var_4501_axis_0 = const()[name = string("op_4501_axis_0"), val = int32(-1)]; tensor var_4501_0, tensor var_4501_1 = split(axis = var_4501_axis_0, split_sizes = var_4501_split_sizes_0, x = out_89_cast_fp16)[name = string("op_4501")]; fp16 const_105_promoted = const()[name = string("const_105_promoted"), val = fp16(-0x1p+0)]; tensor var_4503 = mul(x = var_4501_1, y = const_105_promoted)[name = string("op_4503")]; int32 var_4505 = const()[name = string("op_4505"), val = int32(-1)]; bool var_4506_interleave_0 = const()[name = string("op_4506_interleave_0"), val = bool(false)]; tensor var_4506 = concat(axis = var_4505, interleave = var_4506_interleave_0, values = (var_4503, var_4501_0))[name = string("op_4506")]; tensor var_4507 = mul(x = var_4506, y = sin_1)[name = string("op_4507")]; tensor k_47 = add(x = var_4500, y = var_4507)[name = string("k_47")]; tensor var_4512_begin_0 = const()[name = string("op_4512_begin_0"), val = tensor([7, 0, 0, 0])]; tensor var_4512_end_0 = const()[name = string("op_4512_end_0"), val = tensor([8, 1, 2048, 256])]; tensor var_4512_end_mask_0 = const()[name = string("op_4512_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4512_squeeze_mask_0 = const()[name = string("op_4512_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_4512_cast_fp16 = slice_by_index(begin = var_4512_begin_0, end = var_4512_end_0, end_mask = var_4512_end_mask_0, squeeze_mask = var_4512_squeeze_mask_0, x = coreml_update_state_49)[name = string("op_4512_cast_fp16")]; tensor K_cache_15_axes_0 = const()[name = string("K_cache_15_axes_0"), val = tensor([0])]; tensor K_cache_15_cast_fp16 = expand_dims(axes = K_cache_15_axes_0, x = var_4512_cast_fp16)[name = string("K_cache_15_cast_fp16")]; tensor var_4517_begin_0 = const()[name = string("op_4517_begin_0"), val = tensor([25, 0, 0, 0])]; tensor var_4517_end_0 = const()[name = string("op_4517_end_0"), val = tensor([26, 1, 2048, 256])]; tensor var_4517_end_mask_0 = const()[name = string("op_4517_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4517_squeeze_mask_0 = const()[name = string("op_4517_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_4517_cast_fp16 = slice_by_index(begin = var_4517_begin_0, end = var_4517_end_0, end_mask = var_4517_end_mask_0, squeeze_mask = var_4517_squeeze_mask_0, x = coreml_update_state_49)[name = string("op_4517_cast_fp16")]; tensor V_cache_15_axes_0 = const()[name = string("V_cache_15_axes_0"), val = tensor([0])]; tensor V_cache_15_cast_fp16 = expand_dims(axes = V_cache_15_axes_0, x = var_4517_cast_fp16)[name = string("V_cache_15_cast_fp16")]; bool k_increment_15_transpose_x_0 = const()[name = string("k_increment_15_transpose_x_0"), val = bool(false)]; bool k_increment_15_transpose_y_0 = const()[name = string("k_increment_15_transpose_y_0"), val = bool(false)]; tensor k_increment_15 = matmul(transpose_x = k_increment_15_transpose_x_0, transpose_y = k_increment_15_transpose_y_0, x = update_mask, y = k_47)[name = string("k_increment_15")]; bool v_increment_15_transpose_x_1 = const()[name = string("v_increment_15_transpose_x_1"), val = bool(false)]; bool v_increment_15_transpose_y_1 = const()[name = string("v_increment_15_transpose_y_1"), val = bool(true)]; tensor v_increment_15 = matmul(transpose_x = v_increment_15_transpose_x_1, transpose_y = v_increment_15_transpose_y_1, x = update_mask, y = var_4483)[name = string("v_increment_15")]; tensor var_4535_cast_fp16 = mul(x = K_cache_15_cast_fp16, y = var_1125_cast_fp16)[name = string("op_4535_cast_fp16")]; tensor K_new_15_cast_fp16 = add(x = var_4535_cast_fp16, y = k_increment_15)[name = string("K_new_15_cast_fp16")]; tensor var_4541_cast_fp16 = mul(x = V_cache_15_cast_fp16, y = var_1125_cast_fp16)[name = string("op_4541_cast_fp16")]; tensor V_new_15_cast_fp16 = add(x = var_4541_cast_fp16, y = v_increment_15)[name = string("V_new_15_cast_fp16")]; tensor var_4545_axes_0 = const()[name = string("op_4545_axes_0"), val = tensor([0])]; tensor var_4545_cast_fp16 = squeeze(axes = var_4545_axes_0, x = K_new_15_cast_fp16)[name = string("op_4545_cast_fp16")]; tensor concat_28 = const()[name = string("concat_28"), val = tensor([7, 0, 0, 0])]; tensor concat_29 = const()[name = string("concat_29"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_15_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_15_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_15_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_28, begin_mask = kv_cache_0_internal_tensor_assign_15_begin_mask_0, end = concat_29, end_mask = kv_cache_0_internal_tensor_assign_15_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_15_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_15_stride_0, update = var_4545_cast_fp16, x = coreml_update_state_49)[name = string("kv_cache_0_internal_tensor_assign_15_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_15_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_50_write_state")]; tensor coreml_update_state_50 = read_state(input = kv_cache_0)[name = string("coreml_update_state_50")]; tensor var_4552_axes_0 = const()[name = string("op_4552_axes_0"), val = tensor([0])]; tensor var_4552_cast_fp16 = squeeze(axes = var_4552_axes_0, x = V_new_15_cast_fp16)[name = string("op_4552_cast_fp16")]; tensor concat_30 = const()[name = string("concat_30"), val = tensor([25, 0, 0, 0])]; tensor concat_31 = const()[name = string("concat_31"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_16_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_16_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_16_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_30, begin_mask = kv_cache_0_internal_tensor_assign_16_begin_mask_0, end = concat_31, end_mask = kv_cache_0_internal_tensor_assign_16_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_16_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_16_stride_0, update = var_4552_cast_fp16, x = coreml_update_state_50)[name = string("kv_cache_0_internal_tensor_assign_16_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_16_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_51_write_state")]; tensor coreml_update_state_51 = read_state(input = kv_cache_0)[name = string("coreml_update_state_51")]; tensor hidden_states_57_axes_0 = const()[name = string("hidden_states_57_axes_0"), val = tensor([2])]; tensor hidden_states_57_cast_fp16 = expand_dims(axes = hidden_states_57_axes_0, x = K_new_15_cast_fp16)[name = string("hidden_states_57_cast_fp16")]; tensor var_4565 = const()[name = string("op_4565"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_59_cast_fp16 = tile(reps = var_4565, x = hidden_states_57_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; tensor var_4571 = const()[name = string("op_4571"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_15_cast_fp16 = reshape(shape = var_4571, x = hidden_states_59_cast_fp16)[name = string("K_expanded_15_cast_fp16")]; tensor hidden_states_61_axes_0 = const()[name = string("hidden_states_61_axes_0"), val = tensor([2])]; tensor hidden_states_61_cast_fp16 = expand_dims(axes = hidden_states_61_axes_0, x = V_new_15_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; tensor var_4580 = const()[name = string("op_4580"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_63_cast_fp16 = tile(reps = var_4580, x = hidden_states_61_cast_fp16)[name = string("hidden_states_63_cast_fp16")]; tensor var_4586 = const()[name = string("op_4586"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_15_cast_fp16 = reshape(shape = var_4586, x = hidden_states_63_cast_fp16)[name = string("V_expanded_15_cast_fp16")]; bool var_4601_transpose_x_1 = const()[name = string("op_4601_transpose_x_1"), val = bool(false)]; bool var_4601_transpose_y_1 = const()[name = string("op_4601_transpose_y_1"), val = bool(true)]; tensor var_4601_cast_fp16 = matmul(transpose_x = var_4601_transpose_x_1, transpose_y = var_4601_transpose_y_1, x = q_47, y = K_expanded_15_cast_fp16)[name = string("op_4601_cast_fp16")]; fp16 var_4602_to_fp16 = const()[name = string("op_4602_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_43_cast_fp16 = mul(x = var_4601_cast_fp16, y = var_4602_to_fp16)[name = string("attn_weights_43_cast_fp16")]; tensor attn_weights_45_cast_fp16 = add(x = attn_weights_43_cast_fp16, y = causal_mask)[name = string("attn_weights_45_cast_fp16")]; int32 var_4611 = const()[name = string("op_4611"), val = int32(-1)]; tensor var_4613_cast_fp16 = softmax(axis = var_4611, x = attn_weights_45_cast_fp16)[name = string("op_4613_cast_fp16")]; bool var_4629_transpose_x_0 = const()[name = string("op_4629_transpose_x_0"), val = bool(false)]; bool var_4629_transpose_y_0 = const()[name = string("op_4629_transpose_y_0"), val = bool(false)]; tensor var_4629_cast_fp16 = matmul(transpose_x = var_4629_transpose_x_0, transpose_y = var_4629_transpose_y_0, x = var_4613_cast_fp16, y = V_expanded_15_cast_fp16)[name = string("op_4629_cast_fp16")]; tensor var_4639 = const()[name = string("op_4639"), val = tensor([0, 2, 1, 3])]; tensor var_4646 = const()[name = string("op_4646"), val = tensor([1, 32, 1024])]; tensor var_4640 = transpose(perm = var_4639, x = var_4629_cast_fp16)[name = string("transpose_86")]; tensor attn_output_45 = reshape(shape = var_4646, x = var_4640)[name = string("attn_output_45")]; tensor var_4651 = const()[name = string("op_4651"), val = tensor([0, 2, 1])]; tensor squeeze_7_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265883008))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(266538432))))[name = string("squeeze_7_quantized")]; string var_4667_pad_type_0 = const()[name = string("op_4667_pad_type_0"), val = string("valid")]; int32 var_4667_groups_0 = const()[name = string("op_4667_groups_0"), val = int32(1)]; tensor var_4667_strides_0 = const()[name = string("op_4667_strides_0"), val = tensor([1])]; tensor var_4667_pad_0 = const()[name = string("op_4667_pad_0"), val = tensor([0, 0])]; tensor var_4667_dilations_0 = const()[name = string("op_4667_dilations_0"), val = tensor([1])]; tensor var_4652 = transpose(perm = var_4651, x = attn_output_45)[name = string("transpose_85")]; tensor var_4667 = conv(dilations = var_4667_dilations_0, groups = var_4667_groups_0, pad = var_4667_pad_0, pad_type = var_4667_pad_type_0, strides = var_4667_strides_0, weight = squeeze_7_quantized, x = var_4652)[name = string("op_4667")]; tensor var_4671 = const()[name = string("op_4671"), val = tensor([0, 2, 1])]; int32 var_4678 = const()[name = string("op_4678"), val = int32(-1)]; fp16 const_106_promoted_to_fp16 = const()[name = string("const_106_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_121 = transpose(perm = var_4671, x = var_4667)[name = string("transpose_84")]; tensor var_4684_cast_fp16 = mul(x = x_121, y = const_106_promoted_to_fp16)[name = string("op_4684_cast_fp16")]; bool input_151_interleave_0 = const()[name = string("input_151_interleave_0"), val = bool(false)]; tensor input_151_cast_fp16 = concat(axis = var_4678, interleave = input_151_interleave_0, values = (x_121, var_4684_cast_fp16))[name = string("input_151_cast_fp16")]; tensor normed_211_axes_0 = const()[name = string("normed_211_axes_0"), val = tensor([-1])]; fp16 var_4676_to_fp16 = const()[name = string("op_4676_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_211_cast_fp16 = layer_norm(axes = normed_211_axes_0, epsilon = var_4676_to_fp16, x = input_151_cast_fp16)[name = string("normed_211_cast_fp16")]; tensor var_4689_split_sizes_0 = const()[name = string("op_4689_split_sizes_0"), val = tensor([640, 640])]; int32 var_4689_axis_0 = const()[name = string("op_4689_axis_0"), val = int32(-1)]; tensor var_4689_cast_fp16_0, tensor var_4689_cast_fp16_1 = split(axis = var_4689_axis_0, split_sizes = var_4689_split_sizes_0, x = normed_211_cast_fp16)[name = string("op_4689_cast_fp16")]; tensor var_4693_to_fp16 = const()[name = string("op_4693_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(266539776)))]; tensor out_91_cast_fp16 = mul(x = var_4689_cast_fp16_0, y = var_4693_to_fp16)[name = string("out_91_cast_fp16")]; tensor x_123_cast_fp16 = add(x = x_113_cast_fp16, y = out_91_cast_fp16)[name = string("x_123_cast_fp16")]; int32 var_4707 = const()[name = string("op_4707"), val = int32(-1)]; fp16 const_108_promoted_to_fp16 = const()[name = string("const_108_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4713_cast_fp16 = mul(x = x_123_cast_fp16, y = const_108_promoted_to_fp16)[name = string("op_4713_cast_fp16")]; bool input_153_interleave_0 = const()[name = string("input_153_interleave_0"), val = bool(false)]; tensor input_153_cast_fp16 = concat(axis = var_4707, interleave = input_153_interleave_0, values = (x_123_cast_fp16, var_4713_cast_fp16))[name = string("input_153_cast_fp16")]; tensor normed_215_axes_0 = const()[name = string("normed_215_axes_0"), val = tensor([-1])]; fp16 var_4705_to_fp16 = const()[name = string("op_4705_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_215_cast_fp16 = layer_norm(axes = normed_215_axes_0, epsilon = var_4705_to_fp16, x = input_153_cast_fp16)[name = string("normed_215_cast_fp16")]; tensor var_4718_split_sizes_0 = const()[name = string("op_4718_split_sizes_0"), val = tensor([640, 640])]; int32 var_4718_axis_0 = const()[name = string("op_4718_axis_0"), val = int32(-1)]; tensor var_4718_cast_fp16_0, tensor var_4718_cast_fp16_1 = split(axis = var_4718_axis_0, split_sizes = var_4718_split_sizes_0, x = normed_215_cast_fp16)[name = string("op_4718_cast_fp16")]; tensor var_4722_to_fp16 = const()[name = string("op_4722_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(266541120)))]; tensor out_93_cast_fp16 = mul(x = var_4718_cast_fp16_0, y = var_4722_to_fp16)[name = string("out_93_cast_fp16")]; tensor var_4736 = const()[name = string("op_4736"), val = tensor([0, 2, 1])]; tensor input_155_axes_0 = const()[name = string("input_155_axes_0"), val = tensor([2])]; tensor var_4737 = transpose(perm = var_4736, x = out_93_cast_fp16)[name = string("transpose_83")]; tensor input_155 = expand_dims(axes = input_155_axes_0, x = var_4737)[name = string("input_155")]; string gate_29_pad_type_0 = const()[name = string("gate_29_pad_type_0"), val = string("valid")]; tensor gate_29_strides_0 = const()[name = string("gate_29_strides_0"), val = tensor([1, 1])]; tensor gate_29_pad_0 = const()[name = string("gate_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_29_dilations_0 = const()[name = string("gate_29_dilations_0"), val = tensor([1, 1])]; int32 gate_29_groups_0 = const()[name = string("gate_29_groups_0"), val = int32(1)]; tensor gate_29 = conv(dilations = gate_29_dilations_0, groups = gate_29_groups_0, pad = gate_29_pad_0, pad_type = gate_29_pad_type_0, strides = gate_29_strides_0, weight = layers_7_mlp_gate_proj_weight_quantized, x = input_155)[name = string("gate_29")]; string up_15_pad_type_0 = const()[name = string("up_15_pad_type_0"), val = string("valid")]; tensor up_15_strides_0 = const()[name = string("up_15_strides_0"), val = tensor([1, 1])]; tensor up_15_pad_0 = const()[name = string("up_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_15_dilations_0 = const()[name = string("up_15_dilations_0"), val = tensor([1, 1])]; int32 up_15_groups_0 = const()[name = string("up_15_groups_0"), val = int32(1)]; tensor up_15 = conv(dilations = up_15_dilations_0, groups = up_15_groups_0, pad = up_15_pad_0, pad_type = up_15_pad_type_0, strides = up_15_strides_0, weight = layers_7_mlp_up_proj_weight_quantized, x = input_155)[name = string("up_15")]; string gate_31_mode_0 = const()[name = string("gate_31_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_31 = gelu(mode = gate_31_mode_0, x = gate_29)[name = string("gate_31")]; tensor input_157 = mul(x = gate_31, y = up_15)[name = string("input_157")]; string var_4775_pad_type_0 = const()[name = string("op_4775_pad_type_0"), val = string("valid")]; tensor var_4775_strides_0 = const()[name = string("op_4775_strides_0"), val = tensor([1, 1])]; tensor var_4775_pad_0 = const()[name = string("op_4775_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4775_dilations_0 = const()[name = string("op_4775_dilations_0"), val = tensor([1, 1])]; int32 var_4775_groups_0 = const()[name = string("op_4775_groups_0"), val = int32(1)]; tensor var_4775 = conv(dilations = var_4775_dilations_0, groups = var_4775_groups_0, pad = var_4775_pad_0, pad_type = var_4775_pad_type_0, strides = var_4775_strides_0, weight = layers_7_mlp_down_proj_weight_quantized, x = input_157)[name = string("op_4775")]; tensor var_4777_axes_0 = const()[name = string("op_4777_axes_0"), val = tensor([2])]; tensor var_4777 = squeeze(axes = var_4777_axes_0, x = var_4775)[name = string("op_4777")]; tensor var_4781 = const()[name = string("op_4781"), val = tensor([0, 2, 1])]; int32 var_4788 = const()[name = string("op_4788"), val = int32(-1)]; fp16 const_110_promoted_to_fp16 = const()[name = string("const_110_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_127 = transpose(perm = var_4781, x = var_4777)[name = string("transpose_82")]; tensor var_4794_cast_fp16 = mul(x = x_127, y = const_110_promoted_to_fp16)[name = string("op_4794_cast_fp16")]; bool input_159_interleave_0 = const()[name = string("input_159_interleave_0"), val = bool(false)]; tensor input_159_cast_fp16 = concat(axis = var_4788, interleave = input_159_interleave_0, values = (x_127, var_4794_cast_fp16))[name = string("input_159_cast_fp16")]; tensor normed_221_axes_0 = const()[name = string("normed_221_axes_0"), val = tensor([-1])]; fp16 var_4786_to_fp16 = const()[name = string("op_4786_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_221_cast_fp16 = layer_norm(axes = normed_221_axes_0, epsilon = var_4786_to_fp16, x = input_159_cast_fp16)[name = string("normed_221_cast_fp16")]; tensor var_4799_split_sizes_0 = const()[name = string("op_4799_split_sizes_0"), val = tensor([640, 640])]; int32 var_4799_axis_0 = const()[name = string("op_4799_axis_0"), val = int32(-1)]; tensor var_4799_cast_fp16_0, tensor var_4799_cast_fp16_1 = split(axis = var_4799_axis_0, split_sizes = var_4799_split_sizes_0, x = normed_221_cast_fp16)[name = string("op_4799_cast_fp16")]; tensor var_4803_to_fp16 = const()[name = string("op_4803_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(266542464)))]; tensor out_95_cast_fp16 = mul(x = var_4799_cast_fp16_0, y = var_4803_to_fp16)[name = string("out_95_cast_fp16")]; tensor x_129_cast_fp16 = add(x = x_123_cast_fp16, y = out_95_cast_fp16)[name = string("x_129_cast_fp16")]; int32 var_4817 = const()[name = string("op_4817"), val = int32(-1)]; fp16 const_112_promoted_to_fp16 = const()[name = string("const_112_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4823_cast_fp16 = mul(x = x_129_cast_fp16, y = const_112_promoted_to_fp16)[name = string("op_4823_cast_fp16")]; bool input_161_interleave_0 = const()[name = string("input_161_interleave_0"), val = bool(false)]; tensor input_161_cast_fp16 = concat(axis = var_4817, interleave = input_161_interleave_0, values = (x_129_cast_fp16, var_4823_cast_fp16))[name = string("input_161_cast_fp16")]; tensor normed_225_axes_0 = const()[name = string("normed_225_axes_0"), val = tensor([-1])]; fp16 var_4815_to_fp16 = const()[name = string("op_4815_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_225_cast_fp16 = layer_norm(axes = normed_225_axes_0, epsilon = var_4815_to_fp16, x = input_161_cast_fp16)[name = string("normed_225_cast_fp16")]; tensor var_4828_split_sizes_0 = const()[name = string("op_4828_split_sizes_0"), val = tensor([640, 640])]; int32 var_4828_axis_0 = const()[name = string("op_4828_axis_0"), val = int32(-1)]; tensor var_4828_cast_fp16_0, tensor var_4828_cast_fp16_1 = split(axis = var_4828_axis_0, split_sizes = var_4828_split_sizes_0, x = normed_225_cast_fp16)[name = string("op_4828_cast_fp16")]; tensor var_4832_to_fp16 = const()[name = string("op_4832_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(266543808)))]; tensor out_97_cast_fp16 = mul(x = var_4828_cast_fp16_0, y = var_4832_to_fp16)[name = string("out_97_cast_fp16")]; tensor var_4846 = const()[name = string("op_4846"), val = tensor([0, 2, 1])]; tensor input_163_axes_0 = const()[name = string("input_163_axes_0"), val = tensor([2])]; tensor var_4847 = transpose(perm = var_4846, x = out_97_cast_fp16)[name = string("transpose_81")]; tensor input_163 = expand_dims(axes = input_163_axes_0, x = var_4847)[name = string("input_163")]; string var_4860_pad_type_0 = const()[name = string("op_4860_pad_type_0"), val = string("valid")]; tensor var_4860_strides_0 = const()[name = string("op_4860_strides_0"), val = tensor([1, 1])]; tensor var_4860_pad_0 = const()[name = string("op_4860_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4860_dilations_0 = const()[name = string("op_4860_dilations_0"), val = tensor([1, 1])]; int32 var_4860_groups_0 = const()[name = string("op_4860_groups_0"), val = int32(1)]; tensor var_4860 = conv(dilations = var_4860_dilations_0, groups = var_4860_groups_0, pad = var_4860_pad_0, pad_type = var_4860_pad_type_0, strides = var_4860_strides_0, weight = layers_8_self_attn_q_proj_weight_quantized, x = input_163)[name = string("op_4860")]; tensor var_4865 = const()[name = string("op_4865"), val = tensor([1, 4, 256, 32])]; tensor var_4866 = reshape(shape = var_4865, x = var_4860)[name = string("op_4866")]; tensor var_4871 = const()[name = string("op_4871"), val = tensor([0, 1, 3, 2])]; int32 var_4884 = const()[name = string("op_4884"), val = int32(-1)]; fp16 const_114_promoted_to_fp16 = const()[name = string("const_114_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_49 = transpose(perm = var_4871, x = var_4866)[name = string("transpose_80")]; tensor var_4890_cast_fp16 = mul(x = q_49, y = const_114_promoted_to_fp16)[name = string("op_4890_cast_fp16")]; bool input_165_interleave_0 = const()[name = string("input_165_interleave_0"), val = bool(false)]; tensor input_165_cast_fp16 = concat(axis = var_4884, interleave = input_165_interleave_0, values = (q_49, var_4890_cast_fp16))[name = string("input_165_cast_fp16")]; tensor normed_231_axes_0 = const()[name = string("normed_231_axes_0"), val = tensor([-1])]; fp16 var_4882_to_fp16 = const()[name = string("op_4882_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_231_cast_fp16 = layer_norm(axes = normed_231_axes_0, epsilon = var_4882_to_fp16, x = input_165_cast_fp16)[name = string("normed_231_cast_fp16")]; tensor var_4895_split_sizes_0 = const()[name = string("op_4895_split_sizes_0"), val = tensor([256, 256])]; int32 var_4895_axis_0 = const()[name = string("op_4895_axis_0"), val = int32(-1)]; tensor var_4895_cast_fp16_0, tensor var_4895_cast_fp16_1 = split(axis = var_4895_axis_0, split_sizes = var_4895_split_sizes_0, x = normed_231_cast_fp16)[name = string("op_4895_cast_fp16")]; tensor var_4899_to_fp16 = const()[name = string("op_4899_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(266545152)))]; tensor out_99_cast_fp16 = mul(x = var_4895_cast_fp16_0, y = var_4899_to_fp16)[name = string("out_99_cast_fp16")]; string var_4912_pad_type_0 = const()[name = string("op_4912_pad_type_0"), val = string("valid")]; tensor var_4912_strides_0 = const()[name = string("op_4912_strides_0"), val = tensor([1, 1])]; tensor var_4912_pad_0 = const()[name = string("op_4912_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4912_dilations_0 = const()[name = string("op_4912_dilations_0"), val = tensor([1, 1])]; int32 var_4912_groups_0 = const()[name = string("op_4912_groups_0"), val = int32(1)]; tensor var_4912 = conv(dilations = var_4912_dilations_0, groups = var_4912_groups_0, pad = var_4912_pad_0, pad_type = var_4912_pad_type_0, strides = var_4912_strides_0, weight = layers_8_self_attn_k_proj_weight_quantized, x = input_163)[name = string("op_4912")]; tensor var_4917 = const()[name = string("op_4917"), val = tensor([1, 1, 256, 32])]; tensor var_4918 = reshape(shape = var_4917, x = var_4912)[name = string("op_4918")]; tensor var_4923 = const()[name = string("op_4923"), val = tensor([0, 1, 3, 2])]; int32 var_4936 = const()[name = string("op_4936"), val = int32(-1)]; fp16 const_116_promoted_to_fp16 = const()[name = string("const_116_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_49 = transpose(perm = var_4923, x = var_4918)[name = string("transpose_79")]; tensor var_4942_cast_fp16 = mul(x = k_49, y = const_116_promoted_to_fp16)[name = string("op_4942_cast_fp16")]; bool input_167_interleave_0 = const()[name = string("input_167_interleave_0"), val = bool(false)]; tensor input_167_cast_fp16 = concat(axis = var_4936, interleave = input_167_interleave_0, values = (k_49, var_4942_cast_fp16))[name = string("input_167_cast_fp16")]; tensor normed_235_axes_0 = const()[name = string("normed_235_axes_0"), val = tensor([-1])]; fp16 var_4934_to_fp16 = const()[name = string("op_4934_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_235_cast_fp16 = layer_norm(axes = normed_235_axes_0, epsilon = var_4934_to_fp16, x = input_167_cast_fp16)[name = string("normed_235_cast_fp16")]; tensor var_4947_split_sizes_0 = const()[name = string("op_4947_split_sizes_0"), val = tensor([256, 256])]; int32 var_4947_axis_0 = const()[name = string("op_4947_axis_0"), val = int32(-1)]; tensor var_4947_cast_fp16_0, tensor var_4947_cast_fp16_1 = split(axis = var_4947_axis_0, split_sizes = var_4947_split_sizes_0, x = normed_235_cast_fp16)[name = string("op_4947_cast_fp16")]; tensor var_4951_to_fp16 = const()[name = string("op_4951_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(266545728)))]; tensor out_101_cast_fp16 = mul(x = var_4947_cast_fp16_0, y = var_4951_to_fp16)[name = string("out_101_cast_fp16")]; string var_4964_pad_type_0 = const()[name = string("op_4964_pad_type_0"), val = string("valid")]; tensor var_4964_strides_0 = const()[name = string("op_4964_strides_0"), val = tensor([1, 1])]; tensor var_4964_pad_0 = const()[name = string("op_4964_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4964_dilations_0 = const()[name = string("op_4964_dilations_0"), val = tensor([1, 1])]; int32 var_4964_groups_0 = const()[name = string("op_4964_groups_0"), val = int32(1)]; tensor var_4964 = conv(dilations = var_4964_dilations_0, groups = var_4964_groups_0, pad = var_4964_pad_0, pad_type = var_4964_pad_type_0, strides = var_4964_strides_0, weight = layers_8_self_attn_v_proj_weight_quantized, x = input_163)[name = string("op_4964")]; tensor var_4969 = const()[name = string("op_4969"), val = tensor([1, 1, 256, 32])]; tensor var_4970 = reshape(shape = var_4969, x = var_4964)[name = string("op_4970")]; tensor var_4977 = mul(x = out_99_cast_fp16, y = cos_1)[name = string("op_4977")]; tensor var_4978_split_sizes_0 = const()[name = string("op_4978_split_sizes_0"), val = tensor([128, 128])]; int32 var_4978_axis_0 = const()[name = string("op_4978_axis_0"), val = int32(-1)]; tensor var_4978_0, tensor var_4978_1 = split(axis = var_4978_axis_0, split_sizes = var_4978_split_sizes_0, x = out_99_cast_fp16)[name = string("op_4978")]; fp16 const_118_promoted = const()[name = string("const_118_promoted"), val = fp16(-0x1p+0)]; tensor var_4980 = mul(x = var_4978_1, y = const_118_promoted)[name = string("op_4980")]; int32 var_4982 = const()[name = string("op_4982"), val = int32(-1)]; bool var_4983_interleave_0 = const()[name = string("op_4983_interleave_0"), val = bool(false)]; tensor var_4983 = concat(axis = var_4982, interleave = var_4983_interleave_0, values = (var_4980, var_4978_0))[name = string("op_4983")]; tensor var_4984 = mul(x = var_4983, y = sin_1)[name = string("op_4984")]; tensor q_53 = add(x = var_4977, y = var_4984)[name = string("q_53")]; tensor var_4987 = mul(x = out_101_cast_fp16, y = cos_1)[name = string("op_4987")]; tensor var_4988_split_sizes_0 = const()[name = string("op_4988_split_sizes_0"), val = tensor([128, 128])]; int32 var_4988_axis_0 = const()[name = string("op_4988_axis_0"), val = int32(-1)]; tensor var_4988_0, tensor var_4988_1 = split(axis = var_4988_axis_0, split_sizes = var_4988_split_sizes_0, x = out_101_cast_fp16)[name = string("op_4988")]; fp16 const_119_promoted = const()[name = string("const_119_promoted"), val = fp16(-0x1p+0)]; tensor var_4990 = mul(x = var_4988_1, y = const_119_promoted)[name = string("op_4990")]; int32 var_4992 = const()[name = string("op_4992"), val = int32(-1)]; bool var_4993_interleave_0 = const()[name = string("op_4993_interleave_0"), val = bool(false)]; tensor var_4993 = concat(axis = var_4992, interleave = var_4993_interleave_0, values = (var_4990, var_4988_0))[name = string("op_4993")]; tensor var_4994 = mul(x = var_4993, y = sin_1)[name = string("op_4994")]; tensor k_53 = add(x = var_4987, y = var_4994)[name = string("k_53")]; tensor var_4999_begin_0 = const()[name = string("op_4999_begin_0"), val = tensor([8, 0, 0, 0])]; tensor var_4999_end_0 = const()[name = string("op_4999_end_0"), val = tensor([9, 1, 2048, 256])]; tensor var_4999_end_mask_0 = const()[name = string("op_4999_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4999_squeeze_mask_0 = const()[name = string("op_4999_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_4999_cast_fp16 = slice_by_index(begin = var_4999_begin_0, end = var_4999_end_0, end_mask = var_4999_end_mask_0, squeeze_mask = var_4999_squeeze_mask_0, x = coreml_update_state_51)[name = string("op_4999_cast_fp16")]; tensor K_cache_17_axes_0 = const()[name = string("K_cache_17_axes_0"), val = tensor([0])]; tensor K_cache_17_cast_fp16 = expand_dims(axes = K_cache_17_axes_0, x = var_4999_cast_fp16)[name = string("K_cache_17_cast_fp16")]; tensor var_5004_begin_0 = const()[name = string("op_5004_begin_0"), val = tensor([26, 0, 0, 0])]; tensor var_5004_end_0 = const()[name = string("op_5004_end_0"), val = tensor([27, 1, 2048, 256])]; tensor var_5004_end_mask_0 = const()[name = string("op_5004_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5004_squeeze_mask_0 = const()[name = string("op_5004_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_5004_cast_fp16 = slice_by_index(begin = var_5004_begin_0, end = var_5004_end_0, end_mask = var_5004_end_mask_0, squeeze_mask = var_5004_squeeze_mask_0, x = coreml_update_state_51)[name = string("op_5004_cast_fp16")]; tensor V_cache_17_axes_0 = const()[name = string("V_cache_17_axes_0"), val = tensor([0])]; tensor V_cache_17_cast_fp16 = expand_dims(axes = V_cache_17_axes_0, x = var_5004_cast_fp16)[name = string("V_cache_17_cast_fp16")]; bool k_increment_17_transpose_x_0 = const()[name = string("k_increment_17_transpose_x_0"), val = bool(false)]; bool k_increment_17_transpose_y_0 = const()[name = string("k_increment_17_transpose_y_0"), val = bool(false)]; tensor k_increment_17 = matmul(transpose_x = k_increment_17_transpose_x_0, transpose_y = k_increment_17_transpose_y_0, x = update_mask, y = k_53)[name = string("k_increment_17")]; bool v_increment_17_transpose_x_1 = const()[name = string("v_increment_17_transpose_x_1"), val = bool(false)]; bool v_increment_17_transpose_y_1 = const()[name = string("v_increment_17_transpose_y_1"), val = bool(true)]; tensor v_increment_17 = matmul(transpose_x = v_increment_17_transpose_x_1, transpose_y = v_increment_17_transpose_y_1, x = update_mask, y = var_4970)[name = string("v_increment_17")]; tensor var_5022_cast_fp16 = mul(x = K_cache_17_cast_fp16, y = var_1125_cast_fp16)[name = string("op_5022_cast_fp16")]; tensor K_new_17_cast_fp16 = add(x = var_5022_cast_fp16, y = k_increment_17)[name = string("K_new_17_cast_fp16")]; tensor var_5028_cast_fp16 = mul(x = V_cache_17_cast_fp16, y = var_1125_cast_fp16)[name = string("op_5028_cast_fp16")]; tensor V_new_17_cast_fp16 = add(x = var_5028_cast_fp16, y = v_increment_17)[name = string("V_new_17_cast_fp16")]; tensor var_5032_axes_0 = const()[name = string("op_5032_axes_0"), val = tensor([0])]; tensor var_5032_cast_fp16 = squeeze(axes = var_5032_axes_0, x = K_new_17_cast_fp16)[name = string("op_5032_cast_fp16")]; tensor concat_32 = const()[name = string("concat_32"), val = tensor([8, 0, 0, 0])]; tensor concat_33 = const()[name = string("concat_33"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_17_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_17_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_17_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_32, begin_mask = kv_cache_0_internal_tensor_assign_17_begin_mask_0, end = concat_33, end_mask = kv_cache_0_internal_tensor_assign_17_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_17_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_17_stride_0, update = var_5032_cast_fp16, x = coreml_update_state_51)[name = string("kv_cache_0_internal_tensor_assign_17_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_17_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_52_write_state")]; tensor coreml_update_state_52 = read_state(input = kv_cache_0)[name = string("coreml_update_state_52")]; tensor var_5039_axes_0 = const()[name = string("op_5039_axes_0"), val = tensor([0])]; tensor var_5039_cast_fp16 = squeeze(axes = var_5039_axes_0, x = V_new_17_cast_fp16)[name = string("op_5039_cast_fp16")]; tensor concat_34 = const()[name = string("concat_34"), val = tensor([26, 0, 0, 0])]; tensor concat_35 = const()[name = string("concat_35"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_18_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_18_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_18_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_34, begin_mask = kv_cache_0_internal_tensor_assign_18_begin_mask_0, end = concat_35, end_mask = kv_cache_0_internal_tensor_assign_18_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_18_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_18_stride_0, update = var_5039_cast_fp16, x = coreml_update_state_52)[name = string("kv_cache_0_internal_tensor_assign_18_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_18_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_53_write_state")]; tensor coreml_update_state_53 = read_state(input = kv_cache_0)[name = string("coreml_update_state_53")]; tensor hidden_states_65_axes_0 = const()[name = string("hidden_states_65_axes_0"), val = tensor([2])]; tensor hidden_states_65_cast_fp16 = expand_dims(axes = hidden_states_65_axes_0, x = K_new_17_cast_fp16)[name = string("hidden_states_65_cast_fp16")]; tensor var_5052 = const()[name = string("op_5052"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_67_cast_fp16 = tile(reps = var_5052, x = hidden_states_65_cast_fp16)[name = string("hidden_states_67_cast_fp16")]; tensor var_5058 = const()[name = string("op_5058"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_17_cast_fp16 = reshape(shape = var_5058, x = hidden_states_67_cast_fp16)[name = string("K_expanded_17_cast_fp16")]; tensor hidden_states_69_axes_0 = const()[name = string("hidden_states_69_axes_0"), val = tensor([2])]; tensor hidden_states_69_cast_fp16 = expand_dims(axes = hidden_states_69_axes_0, x = V_new_17_cast_fp16)[name = string("hidden_states_69_cast_fp16")]; tensor var_5067 = const()[name = string("op_5067"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_71_cast_fp16 = tile(reps = var_5067, x = hidden_states_69_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; tensor var_5073 = const()[name = string("op_5073"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_17_cast_fp16 = reshape(shape = var_5073, x = hidden_states_71_cast_fp16)[name = string("V_expanded_17_cast_fp16")]; bool var_5088_transpose_x_1 = const()[name = string("op_5088_transpose_x_1"), val = bool(false)]; bool var_5088_transpose_y_1 = const()[name = string("op_5088_transpose_y_1"), val = bool(true)]; tensor var_5088_cast_fp16 = matmul(transpose_x = var_5088_transpose_x_1, transpose_y = var_5088_transpose_y_1, x = q_53, y = K_expanded_17_cast_fp16)[name = string("op_5088_cast_fp16")]; fp16 var_5089_to_fp16 = const()[name = string("op_5089_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_49_cast_fp16 = mul(x = var_5088_cast_fp16, y = var_5089_to_fp16)[name = string("attn_weights_49_cast_fp16")]; tensor attn_weights_51_cast_fp16 = add(x = attn_weights_49_cast_fp16, y = causal_mask)[name = string("attn_weights_51_cast_fp16")]; int32 var_5098 = const()[name = string("op_5098"), val = int32(-1)]; tensor var_5100_cast_fp16 = softmax(axis = var_5098, x = attn_weights_51_cast_fp16)[name = string("op_5100_cast_fp16")]; bool var_5116_transpose_x_0 = const()[name = string("op_5116_transpose_x_0"), val = bool(false)]; bool var_5116_transpose_y_0 = const()[name = string("op_5116_transpose_y_0"), val = bool(false)]; tensor var_5116_cast_fp16 = matmul(transpose_x = var_5116_transpose_x_0, transpose_y = var_5116_transpose_y_0, x = var_5100_cast_fp16, y = V_expanded_17_cast_fp16)[name = string("op_5116_cast_fp16")]; tensor var_5126 = const()[name = string("op_5126"), val = tensor([0, 2, 1, 3])]; tensor var_5133 = const()[name = string("op_5133"), val = tensor([1, 32, 1024])]; tensor var_5127 = transpose(perm = var_5126, x = var_5116_cast_fp16)[name = string("transpose_78")]; tensor attn_output_51 = reshape(shape = var_5133, x = var_5127)[name = string("attn_output_51")]; tensor var_5138 = const()[name = string("op_5138"), val = tensor([0, 2, 1])]; tensor squeeze_8_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(266546304))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267201728))))[name = string("squeeze_8_quantized")]; string var_5154_pad_type_0 = const()[name = string("op_5154_pad_type_0"), val = string("valid")]; int32 var_5154_groups_0 = const()[name = string("op_5154_groups_0"), val = int32(1)]; tensor var_5154_strides_0 = const()[name = string("op_5154_strides_0"), val = tensor([1])]; tensor var_5154_pad_0 = const()[name = string("op_5154_pad_0"), val = tensor([0, 0])]; tensor var_5154_dilations_0 = const()[name = string("op_5154_dilations_0"), val = tensor([1])]; tensor var_5139 = transpose(perm = var_5138, x = attn_output_51)[name = string("transpose_77")]; tensor var_5154 = conv(dilations = var_5154_dilations_0, groups = var_5154_groups_0, pad = var_5154_pad_0, pad_type = var_5154_pad_type_0, strides = var_5154_strides_0, weight = squeeze_8_quantized, x = var_5139)[name = string("op_5154")]; tensor var_5158 = const()[name = string("op_5158"), val = tensor([0, 2, 1])]; int32 var_5165 = const()[name = string("op_5165"), val = int32(-1)]; fp16 const_120_promoted_to_fp16 = const()[name = string("const_120_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_137 = transpose(perm = var_5158, x = var_5154)[name = string("transpose_76")]; tensor var_5171_cast_fp16 = mul(x = x_137, y = const_120_promoted_to_fp16)[name = string("op_5171_cast_fp16")]; bool input_171_interleave_0 = const()[name = string("input_171_interleave_0"), val = bool(false)]; tensor input_171_cast_fp16 = concat(axis = var_5165, interleave = input_171_interleave_0, values = (x_137, var_5171_cast_fp16))[name = string("input_171_cast_fp16")]; tensor normed_239_axes_0 = const()[name = string("normed_239_axes_0"), val = tensor([-1])]; fp16 var_5163_to_fp16 = const()[name = string("op_5163_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_239_cast_fp16 = layer_norm(axes = normed_239_axes_0, epsilon = var_5163_to_fp16, x = input_171_cast_fp16)[name = string("normed_239_cast_fp16")]; tensor var_5176_split_sizes_0 = const()[name = string("op_5176_split_sizes_0"), val = tensor([640, 640])]; int32 var_5176_axis_0 = const()[name = string("op_5176_axis_0"), val = int32(-1)]; tensor var_5176_cast_fp16_0, tensor var_5176_cast_fp16_1 = split(axis = var_5176_axis_0, split_sizes = var_5176_split_sizes_0, x = normed_239_cast_fp16)[name = string("op_5176_cast_fp16")]; tensor var_5180_to_fp16 = const()[name = string("op_5180_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267203072)))]; tensor out_103_cast_fp16 = mul(x = var_5176_cast_fp16_0, y = var_5180_to_fp16)[name = string("out_103_cast_fp16")]; tensor x_139_cast_fp16 = add(x = x_129_cast_fp16, y = out_103_cast_fp16)[name = string("x_139_cast_fp16")]; int32 var_5194 = const()[name = string("op_5194"), val = int32(-1)]; fp16 const_122_promoted_to_fp16 = const()[name = string("const_122_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5200_cast_fp16 = mul(x = x_139_cast_fp16, y = const_122_promoted_to_fp16)[name = string("op_5200_cast_fp16")]; bool input_173_interleave_0 = const()[name = string("input_173_interleave_0"), val = bool(false)]; tensor input_173_cast_fp16 = concat(axis = var_5194, interleave = input_173_interleave_0, values = (x_139_cast_fp16, var_5200_cast_fp16))[name = string("input_173_cast_fp16")]; tensor normed_243_axes_0 = const()[name = string("normed_243_axes_0"), val = tensor([-1])]; fp16 var_5192_to_fp16 = const()[name = string("op_5192_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_243_cast_fp16 = layer_norm(axes = normed_243_axes_0, epsilon = var_5192_to_fp16, x = input_173_cast_fp16)[name = string("normed_243_cast_fp16")]; tensor var_5205_split_sizes_0 = const()[name = string("op_5205_split_sizes_0"), val = tensor([640, 640])]; int32 var_5205_axis_0 = const()[name = string("op_5205_axis_0"), val = int32(-1)]; tensor var_5205_cast_fp16_0, tensor var_5205_cast_fp16_1 = split(axis = var_5205_axis_0, split_sizes = var_5205_split_sizes_0, x = normed_243_cast_fp16)[name = string("op_5205_cast_fp16")]; tensor var_5209_to_fp16 = const()[name = string("op_5209_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267204416)))]; tensor out_105_cast_fp16 = mul(x = var_5205_cast_fp16_0, y = var_5209_to_fp16)[name = string("out_105_cast_fp16")]; tensor var_5223 = const()[name = string("op_5223"), val = tensor([0, 2, 1])]; tensor input_175_axes_0 = const()[name = string("input_175_axes_0"), val = tensor([2])]; tensor var_5224 = transpose(perm = var_5223, x = out_105_cast_fp16)[name = string("transpose_75")]; tensor input_175 = expand_dims(axes = input_175_axes_0, x = var_5224)[name = string("input_175")]; string gate_33_pad_type_0 = const()[name = string("gate_33_pad_type_0"), val = string("valid")]; tensor gate_33_strides_0 = const()[name = string("gate_33_strides_0"), val = tensor([1, 1])]; tensor gate_33_pad_0 = const()[name = string("gate_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_33_dilations_0 = const()[name = string("gate_33_dilations_0"), val = tensor([1, 1])]; int32 gate_33_groups_0 = const()[name = string("gate_33_groups_0"), val = int32(1)]; tensor gate_33 = conv(dilations = gate_33_dilations_0, groups = gate_33_groups_0, pad = gate_33_pad_0, pad_type = gate_33_pad_type_0, strides = gate_33_strides_0, weight = layers_8_mlp_gate_proj_weight_quantized, x = input_175)[name = string("gate_33")]; string up_17_pad_type_0 = const()[name = string("up_17_pad_type_0"), val = string("valid")]; tensor up_17_strides_0 = const()[name = string("up_17_strides_0"), val = tensor([1, 1])]; tensor up_17_pad_0 = const()[name = string("up_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_17_dilations_0 = const()[name = string("up_17_dilations_0"), val = tensor([1, 1])]; int32 up_17_groups_0 = const()[name = string("up_17_groups_0"), val = int32(1)]; tensor up_17 = conv(dilations = up_17_dilations_0, groups = up_17_groups_0, pad = up_17_pad_0, pad_type = up_17_pad_type_0, strides = up_17_strides_0, weight = layers_8_mlp_up_proj_weight_quantized, x = input_175)[name = string("up_17")]; string gate_35_mode_0 = const()[name = string("gate_35_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_35 = gelu(mode = gate_35_mode_0, x = gate_33)[name = string("gate_35")]; tensor input_177 = mul(x = gate_35, y = up_17)[name = string("input_177")]; string var_5262_pad_type_0 = const()[name = string("op_5262_pad_type_0"), val = string("valid")]; tensor var_5262_strides_0 = const()[name = string("op_5262_strides_0"), val = tensor([1, 1])]; tensor var_5262_pad_0 = const()[name = string("op_5262_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5262_dilations_0 = const()[name = string("op_5262_dilations_0"), val = tensor([1, 1])]; int32 var_5262_groups_0 = const()[name = string("op_5262_groups_0"), val = int32(1)]; tensor var_5262 = conv(dilations = var_5262_dilations_0, groups = var_5262_groups_0, pad = var_5262_pad_0, pad_type = var_5262_pad_type_0, strides = var_5262_strides_0, weight = layers_8_mlp_down_proj_weight_quantized, x = input_177)[name = string("op_5262")]; tensor var_5264_axes_0 = const()[name = string("op_5264_axes_0"), val = tensor([2])]; tensor var_5264 = squeeze(axes = var_5264_axes_0, x = var_5262)[name = string("op_5264")]; tensor var_5268 = const()[name = string("op_5268"), val = tensor([0, 2, 1])]; int32 var_5275 = const()[name = string("op_5275"), val = int32(-1)]; fp16 const_124_promoted_to_fp16 = const()[name = string("const_124_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_143 = transpose(perm = var_5268, x = var_5264)[name = string("transpose_74")]; tensor var_5281_cast_fp16 = mul(x = x_143, y = const_124_promoted_to_fp16)[name = string("op_5281_cast_fp16")]; bool input_179_interleave_0 = const()[name = string("input_179_interleave_0"), val = bool(false)]; tensor input_179_cast_fp16 = concat(axis = var_5275, interleave = input_179_interleave_0, values = (x_143, var_5281_cast_fp16))[name = string("input_179_cast_fp16")]; tensor normed_249_axes_0 = const()[name = string("normed_249_axes_0"), val = tensor([-1])]; fp16 var_5273_to_fp16 = const()[name = string("op_5273_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_249_cast_fp16 = layer_norm(axes = normed_249_axes_0, epsilon = var_5273_to_fp16, x = input_179_cast_fp16)[name = string("normed_249_cast_fp16")]; tensor var_5286_split_sizes_0 = const()[name = string("op_5286_split_sizes_0"), val = tensor([640, 640])]; int32 var_5286_axis_0 = const()[name = string("op_5286_axis_0"), val = int32(-1)]; tensor var_5286_cast_fp16_0, tensor var_5286_cast_fp16_1 = split(axis = var_5286_axis_0, split_sizes = var_5286_split_sizes_0, x = normed_249_cast_fp16)[name = string("op_5286_cast_fp16")]; tensor var_5290_to_fp16 = const()[name = string("op_5290_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267205760)))]; tensor out_107_cast_fp16 = mul(x = var_5286_cast_fp16_0, y = var_5290_to_fp16)[name = string("out_107_cast_fp16")]; tensor x_145_cast_fp16 = add(x = x_139_cast_fp16, y = out_107_cast_fp16)[name = string("x_145_cast_fp16")]; int32 var_5304 = const()[name = string("op_5304"), val = int32(-1)]; fp16 const_126_promoted_to_fp16 = const()[name = string("const_126_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5310_cast_fp16 = mul(x = x_145_cast_fp16, y = const_126_promoted_to_fp16)[name = string("op_5310_cast_fp16")]; bool input_181_interleave_0 = const()[name = string("input_181_interleave_0"), val = bool(false)]; tensor input_181_cast_fp16 = concat(axis = var_5304, interleave = input_181_interleave_0, values = (x_145_cast_fp16, var_5310_cast_fp16))[name = string("input_181_cast_fp16")]; tensor normed_253_axes_0 = const()[name = string("normed_253_axes_0"), val = tensor([-1])]; fp16 var_5302_to_fp16 = const()[name = string("op_5302_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_253_cast_fp16 = layer_norm(axes = normed_253_axes_0, epsilon = var_5302_to_fp16, x = input_181_cast_fp16)[name = string("normed_253_cast_fp16")]; tensor var_5315_split_sizes_0 = const()[name = string("op_5315_split_sizes_0"), val = tensor([640, 640])]; int32 var_5315_axis_0 = const()[name = string("op_5315_axis_0"), val = int32(-1)]; tensor var_5315_cast_fp16_0, tensor var_5315_cast_fp16_1 = split(axis = var_5315_axis_0, split_sizes = var_5315_split_sizes_0, x = normed_253_cast_fp16)[name = string("op_5315_cast_fp16")]; tensor var_5319_to_fp16 = const()[name = string("op_5319_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267207104)))]; tensor out_109_cast_fp16 = mul(x = var_5315_cast_fp16_0, y = var_5319_to_fp16)[name = string("out_109_cast_fp16")]; tensor var_5333 = const()[name = string("op_5333"), val = tensor([0, 2, 1])]; tensor input_183_axes_0 = const()[name = string("input_183_axes_0"), val = tensor([2])]; tensor var_5334 = transpose(perm = var_5333, x = out_109_cast_fp16)[name = string("transpose_73")]; tensor input_183 = expand_dims(axes = input_183_axes_0, x = var_5334)[name = string("input_183")]; string var_5347_pad_type_0 = const()[name = string("op_5347_pad_type_0"), val = string("valid")]; tensor var_5347_strides_0 = const()[name = string("op_5347_strides_0"), val = tensor([1, 1])]; tensor var_5347_pad_0 = const()[name = string("op_5347_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5347_dilations_0 = const()[name = string("op_5347_dilations_0"), val = tensor([1, 1])]; int32 var_5347_groups_0 = const()[name = string("op_5347_groups_0"), val = int32(1)]; tensor var_5347 = conv(dilations = var_5347_dilations_0, groups = var_5347_groups_0, pad = var_5347_pad_0, pad_type = var_5347_pad_type_0, strides = var_5347_strides_0, weight = layers_9_self_attn_q_proj_weight_quantized, x = input_183)[name = string("op_5347")]; tensor var_5352 = const()[name = string("op_5352"), val = tensor([1, 4, 256, 32])]; tensor var_5353 = reshape(shape = var_5352, x = var_5347)[name = string("op_5353")]; tensor var_5358 = const()[name = string("op_5358"), val = tensor([0, 1, 3, 2])]; int32 var_5371 = const()[name = string("op_5371"), val = int32(-1)]; fp16 const_128_promoted_to_fp16 = const()[name = string("const_128_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_55 = transpose(perm = var_5358, x = var_5353)[name = string("transpose_72")]; tensor var_5377_cast_fp16 = mul(x = q_55, y = const_128_promoted_to_fp16)[name = string("op_5377_cast_fp16")]; bool input_185_interleave_0 = const()[name = string("input_185_interleave_0"), val = bool(false)]; tensor input_185_cast_fp16 = concat(axis = var_5371, interleave = input_185_interleave_0, values = (q_55, var_5377_cast_fp16))[name = string("input_185_cast_fp16")]; tensor normed_259_axes_0 = const()[name = string("normed_259_axes_0"), val = tensor([-1])]; fp16 var_5369_to_fp16 = const()[name = string("op_5369_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_259_cast_fp16 = layer_norm(axes = normed_259_axes_0, epsilon = var_5369_to_fp16, x = input_185_cast_fp16)[name = string("normed_259_cast_fp16")]; tensor var_5382_split_sizes_0 = const()[name = string("op_5382_split_sizes_0"), val = tensor([256, 256])]; int32 var_5382_axis_0 = const()[name = string("op_5382_axis_0"), val = int32(-1)]; tensor var_5382_cast_fp16_0, tensor var_5382_cast_fp16_1 = split(axis = var_5382_axis_0, split_sizes = var_5382_split_sizes_0, x = normed_259_cast_fp16)[name = string("op_5382_cast_fp16")]; tensor var_5386_to_fp16 = const()[name = string("op_5386_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267208448)))]; tensor out_111_cast_fp16 = mul(x = var_5382_cast_fp16_0, y = var_5386_to_fp16)[name = string("out_111_cast_fp16")]; string var_5399_pad_type_0 = const()[name = string("op_5399_pad_type_0"), val = string("valid")]; tensor var_5399_strides_0 = const()[name = string("op_5399_strides_0"), val = tensor([1, 1])]; tensor var_5399_pad_0 = const()[name = string("op_5399_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5399_dilations_0 = const()[name = string("op_5399_dilations_0"), val = tensor([1, 1])]; int32 var_5399_groups_0 = const()[name = string("op_5399_groups_0"), val = int32(1)]; tensor var_5399 = conv(dilations = var_5399_dilations_0, groups = var_5399_groups_0, pad = var_5399_pad_0, pad_type = var_5399_pad_type_0, strides = var_5399_strides_0, weight = layers_9_self_attn_k_proj_weight_quantized, x = input_183)[name = string("op_5399")]; tensor var_5404 = const()[name = string("op_5404"), val = tensor([1, 1, 256, 32])]; tensor var_5405 = reshape(shape = var_5404, x = var_5399)[name = string("op_5405")]; tensor var_5410 = const()[name = string("op_5410"), val = tensor([0, 1, 3, 2])]; int32 var_5423 = const()[name = string("op_5423"), val = int32(-1)]; fp16 const_130_promoted_to_fp16 = const()[name = string("const_130_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_55 = transpose(perm = var_5410, x = var_5405)[name = string("transpose_71")]; tensor var_5429_cast_fp16 = mul(x = k_55, y = const_130_promoted_to_fp16)[name = string("op_5429_cast_fp16")]; bool input_187_interleave_0 = const()[name = string("input_187_interleave_0"), val = bool(false)]; tensor input_187_cast_fp16 = concat(axis = var_5423, interleave = input_187_interleave_0, values = (k_55, var_5429_cast_fp16))[name = string("input_187_cast_fp16")]; tensor normed_263_axes_0 = const()[name = string("normed_263_axes_0"), val = tensor([-1])]; fp16 var_5421_to_fp16 = const()[name = string("op_5421_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_263_cast_fp16 = layer_norm(axes = normed_263_axes_0, epsilon = var_5421_to_fp16, x = input_187_cast_fp16)[name = string("normed_263_cast_fp16")]; tensor var_5434_split_sizes_0 = const()[name = string("op_5434_split_sizes_0"), val = tensor([256, 256])]; int32 var_5434_axis_0 = const()[name = string("op_5434_axis_0"), val = int32(-1)]; tensor var_5434_cast_fp16_0, tensor var_5434_cast_fp16_1 = split(axis = var_5434_axis_0, split_sizes = var_5434_split_sizes_0, x = normed_263_cast_fp16)[name = string("op_5434_cast_fp16")]; tensor var_5438_to_fp16 = const()[name = string("op_5438_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267209024)))]; tensor out_113_cast_fp16 = mul(x = var_5434_cast_fp16_0, y = var_5438_to_fp16)[name = string("out_113_cast_fp16")]; string var_5451_pad_type_0 = const()[name = string("op_5451_pad_type_0"), val = string("valid")]; tensor var_5451_strides_0 = const()[name = string("op_5451_strides_0"), val = tensor([1, 1])]; tensor var_5451_pad_0 = const()[name = string("op_5451_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5451_dilations_0 = const()[name = string("op_5451_dilations_0"), val = tensor([1, 1])]; int32 var_5451_groups_0 = const()[name = string("op_5451_groups_0"), val = int32(1)]; tensor var_5451 = conv(dilations = var_5451_dilations_0, groups = var_5451_groups_0, pad = var_5451_pad_0, pad_type = var_5451_pad_type_0, strides = var_5451_strides_0, weight = layers_9_self_attn_v_proj_weight_quantized, x = input_183)[name = string("op_5451")]; tensor var_5456 = const()[name = string("op_5456"), val = tensor([1, 1, 256, 32])]; tensor var_5457 = reshape(shape = var_5456, x = var_5451)[name = string("op_5457")]; tensor var_5464 = mul(x = out_111_cast_fp16, y = cos_1)[name = string("op_5464")]; tensor var_5465_split_sizes_0 = const()[name = string("op_5465_split_sizes_0"), val = tensor([128, 128])]; int32 var_5465_axis_0 = const()[name = string("op_5465_axis_0"), val = int32(-1)]; tensor var_5465_0, tensor var_5465_1 = split(axis = var_5465_axis_0, split_sizes = var_5465_split_sizes_0, x = out_111_cast_fp16)[name = string("op_5465")]; fp16 const_132_promoted = const()[name = string("const_132_promoted"), val = fp16(-0x1p+0)]; tensor var_5467 = mul(x = var_5465_1, y = const_132_promoted)[name = string("op_5467")]; int32 var_5469 = const()[name = string("op_5469"), val = int32(-1)]; bool var_5470_interleave_0 = const()[name = string("op_5470_interleave_0"), val = bool(false)]; tensor var_5470 = concat(axis = var_5469, interleave = var_5470_interleave_0, values = (var_5467, var_5465_0))[name = string("op_5470")]; tensor var_5471 = mul(x = var_5470, y = sin_1)[name = string("op_5471")]; tensor q_59 = add(x = var_5464, y = var_5471)[name = string("q_59")]; tensor var_5474 = mul(x = out_113_cast_fp16, y = cos_1)[name = string("op_5474")]; tensor var_5475_split_sizes_0 = const()[name = string("op_5475_split_sizes_0"), val = tensor([128, 128])]; int32 var_5475_axis_0 = const()[name = string("op_5475_axis_0"), val = int32(-1)]; tensor var_5475_0, tensor var_5475_1 = split(axis = var_5475_axis_0, split_sizes = var_5475_split_sizes_0, x = out_113_cast_fp16)[name = string("op_5475")]; fp16 const_133_promoted = const()[name = string("const_133_promoted"), val = fp16(-0x1p+0)]; tensor var_5477 = mul(x = var_5475_1, y = const_133_promoted)[name = string("op_5477")]; int32 var_5479 = const()[name = string("op_5479"), val = int32(-1)]; bool var_5480_interleave_0 = const()[name = string("op_5480_interleave_0"), val = bool(false)]; tensor var_5480 = concat(axis = var_5479, interleave = var_5480_interleave_0, values = (var_5477, var_5475_0))[name = string("op_5480")]; tensor var_5481 = mul(x = var_5480, y = sin_1)[name = string("op_5481")]; tensor k_59 = add(x = var_5474, y = var_5481)[name = string("k_59")]; tensor var_5486_begin_0 = const()[name = string("op_5486_begin_0"), val = tensor([9, 0, 0, 0])]; tensor var_5486_end_0 = const()[name = string("op_5486_end_0"), val = tensor([10, 1, 2048, 256])]; tensor var_5486_end_mask_0 = const()[name = string("op_5486_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5486_squeeze_mask_0 = const()[name = string("op_5486_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_5486_cast_fp16 = slice_by_index(begin = var_5486_begin_0, end = var_5486_end_0, end_mask = var_5486_end_mask_0, squeeze_mask = var_5486_squeeze_mask_0, x = coreml_update_state_53)[name = string("op_5486_cast_fp16")]; tensor K_cache_19_axes_0 = const()[name = string("K_cache_19_axes_0"), val = tensor([0])]; tensor K_cache_19_cast_fp16 = expand_dims(axes = K_cache_19_axes_0, x = var_5486_cast_fp16)[name = string("K_cache_19_cast_fp16")]; tensor var_5491_begin_0 = const()[name = string("op_5491_begin_0"), val = tensor([27, 0, 0, 0])]; tensor var_5491_end_0 = const()[name = string("op_5491_end_0"), val = tensor([28, 1, 2048, 256])]; tensor var_5491_end_mask_0 = const()[name = string("op_5491_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5491_squeeze_mask_0 = const()[name = string("op_5491_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_5491_cast_fp16 = slice_by_index(begin = var_5491_begin_0, end = var_5491_end_0, end_mask = var_5491_end_mask_0, squeeze_mask = var_5491_squeeze_mask_0, x = coreml_update_state_53)[name = string("op_5491_cast_fp16")]; tensor V_cache_19_axes_0 = const()[name = string("V_cache_19_axes_0"), val = tensor([0])]; tensor V_cache_19_cast_fp16 = expand_dims(axes = V_cache_19_axes_0, x = var_5491_cast_fp16)[name = string("V_cache_19_cast_fp16")]; bool k_increment_19_transpose_x_0 = const()[name = string("k_increment_19_transpose_x_0"), val = bool(false)]; bool k_increment_19_transpose_y_0 = const()[name = string("k_increment_19_transpose_y_0"), val = bool(false)]; tensor k_increment_19 = matmul(transpose_x = k_increment_19_transpose_x_0, transpose_y = k_increment_19_transpose_y_0, x = update_mask, y = k_59)[name = string("k_increment_19")]; bool v_increment_19_transpose_x_1 = const()[name = string("v_increment_19_transpose_x_1"), val = bool(false)]; bool v_increment_19_transpose_y_1 = const()[name = string("v_increment_19_transpose_y_1"), val = bool(true)]; tensor v_increment_19 = matmul(transpose_x = v_increment_19_transpose_x_1, transpose_y = v_increment_19_transpose_y_1, x = update_mask, y = var_5457)[name = string("v_increment_19")]; tensor var_5509_cast_fp16 = mul(x = K_cache_19_cast_fp16, y = var_1125_cast_fp16)[name = string("op_5509_cast_fp16")]; tensor K_new_19_cast_fp16 = add(x = var_5509_cast_fp16, y = k_increment_19)[name = string("K_new_19_cast_fp16")]; tensor var_5515_cast_fp16 = mul(x = V_cache_19_cast_fp16, y = var_1125_cast_fp16)[name = string("op_5515_cast_fp16")]; tensor V_new_19_cast_fp16 = add(x = var_5515_cast_fp16, y = v_increment_19)[name = string("V_new_19_cast_fp16")]; tensor var_5519_axes_0 = const()[name = string("op_5519_axes_0"), val = tensor([0])]; tensor var_5519_cast_fp16 = squeeze(axes = var_5519_axes_0, x = K_new_19_cast_fp16)[name = string("op_5519_cast_fp16")]; tensor concat_36 = const()[name = string("concat_36"), val = tensor([9, 0, 0, 0])]; tensor concat_37 = const()[name = string("concat_37"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_19_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_19_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_19_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_36, begin_mask = kv_cache_0_internal_tensor_assign_19_begin_mask_0, end = concat_37, end_mask = kv_cache_0_internal_tensor_assign_19_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_19_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_19_stride_0, update = var_5519_cast_fp16, x = coreml_update_state_53)[name = string("kv_cache_0_internal_tensor_assign_19_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_19_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_54_write_state")]; tensor coreml_update_state_54 = read_state(input = kv_cache_0)[name = string("coreml_update_state_54")]; tensor var_5526_axes_0 = const()[name = string("op_5526_axes_0"), val = tensor([0])]; tensor var_5526_cast_fp16 = squeeze(axes = var_5526_axes_0, x = V_new_19_cast_fp16)[name = string("op_5526_cast_fp16")]; tensor concat_38 = const()[name = string("concat_38"), val = tensor([27, 0, 0, 0])]; tensor concat_39 = const()[name = string("concat_39"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_20_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_20_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_20_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_38, begin_mask = kv_cache_0_internal_tensor_assign_20_begin_mask_0, end = concat_39, end_mask = kv_cache_0_internal_tensor_assign_20_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_20_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_20_stride_0, update = var_5526_cast_fp16, x = coreml_update_state_54)[name = string("kv_cache_0_internal_tensor_assign_20_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_20_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_55_write_state")]; tensor coreml_update_state_55 = read_state(input = kv_cache_0)[name = string("coreml_update_state_55")]; tensor hidden_states_73_axes_0 = const()[name = string("hidden_states_73_axes_0"), val = tensor([2])]; tensor hidden_states_73_cast_fp16 = expand_dims(axes = hidden_states_73_axes_0, x = K_new_19_cast_fp16)[name = string("hidden_states_73_cast_fp16")]; tensor var_5539 = const()[name = string("op_5539"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_75_cast_fp16 = tile(reps = var_5539, x = hidden_states_73_cast_fp16)[name = string("hidden_states_75_cast_fp16")]; tensor var_5545 = const()[name = string("op_5545"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_19_cast_fp16 = reshape(shape = var_5545, x = hidden_states_75_cast_fp16)[name = string("K_expanded_19_cast_fp16")]; tensor hidden_states_77_axes_0 = const()[name = string("hidden_states_77_axes_0"), val = tensor([2])]; tensor hidden_states_77_cast_fp16 = expand_dims(axes = hidden_states_77_axes_0, x = V_new_19_cast_fp16)[name = string("hidden_states_77_cast_fp16")]; tensor var_5554 = const()[name = string("op_5554"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_79_cast_fp16 = tile(reps = var_5554, x = hidden_states_77_cast_fp16)[name = string("hidden_states_79_cast_fp16")]; tensor var_5560 = const()[name = string("op_5560"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_19_cast_fp16 = reshape(shape = var_5560, x = hidden_states_79_cast_fp16)[name = string("V_expanded_19_cast_fp16")]; bool var_5575_transpose_x_1 = const()[name = string("op_5575_transpose_x_1"), val = bool(false)]; bool var_5575_transpose_y_1 = const()[name = string("op_5575_transpose_y_1"), val = bool(true)]; tensor var_5575_cast_fp16 = matmul(transpose_x = var_5575_transpose_x_1, transpose_y = var_5575_transpose_y_1, x = q_59, y = K_expanded_19_cast_fp16)[name = string("op_5575_cast_fp16")]; fp16 var_5576_to_fp16 = const()[name = string("op_5576_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_55_cast_fp16 = mul(x = var_5575_cast_fp16, y = var_5576_to_fp16)[name = string("attn_weights_55_cast_fp16")]; tensor attn_weights_57_cast_fp16 = add(x = attn_weights_55_cast_fp16, y = causal_mask)[name = string("attn_weights_57_cast_fp16")]; int32 var_5585 = const()[name = string("op_5585"), val = int32(-1)]; tensor var_5587_cast_fp16 = softmax(axis = var_5585, x = attn_weights_57_cast_fp16)[name = string("op_5587_cast_fp16")]; bool var_5603_transpose_x_0 = const()[name = string("op_5603_transpose_x_0"), val = bool(false)]; bool var_5603_transpose_y_0 = const()[name = string("op_5603_transpose_y_0"), val = bool(false)]; tensor var_5603_cast_fp16 = matmul(transpose_x = var_5603_transpose_x_0, transpose_y = var_5603_transpose_y_0, x = var_5587_cast_fp16, y = V_expanded_19_cast_fp16)[name = string("op_5603_cast_fp16")]; tensor var_5613 = const()[name = string("op_5613"), val = tensor([0, 2, 1, 3])]; tensor var_5620 = const()[name = string("op_5620"), val = tensor([1, 32, 1024])]; tensor var_5614 = transpose(perm = var_5613, x = var_5603_cast_fp16)[name = string("transpose_70")]; tensor attn_output_57 = reshape(shape = var_5620, x = var_5614)[name = string("attn_output_57")]; tensor var_5625 = const()[name = string("op_5625"), val = tensor([0, 2, 1])]; tensor squeeze_9_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267209600))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267865024))))[name = string("squeeze_9_quantized")]; string var_5641_pad_type_0 = const()[name = string("op_5641_pad_type_0"), val = string("valid")]; int32 var_5641_groups_0 = const()[name = string("op_5641_groups_0"), val = int32(1)]; tensor var_5641_strides_0 = const()[name = string("op_5641_strides_0"), val = tensor([1])]; tensor var_5641_pad_0 = const()[name = string("op_5641_pad_0"), val = tensor([0, 0])]; tensor var_5641_dilations_0 = const()[name = string("op_5641_dilations_0"), val = tensor([1])]; tensor var_5626 = transpose(perm = var_5625, x = attn_output_57)[name = string("transpose_69")]; tensor var_5641 = conv(dilations = var_5641_dilations_0, groups = var_5641_groups_0, pad = var_5641_pad_0, pad_type = var_5641_pad_type_0, strides = var_5641_strides_0, weight = squeeze_9_quantized, x = var_5626)[name = string("op_5641")]; tensor var_5645 = const()[name = string("op_5645"), val = tensor([0, 2, 1])]; int32 var_5652 = const()[name = string("op_5652"), val = int32(-1)]; fp16 const_134_promoted_to_fp16 = const()[name = string("const_134_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_153 = transpose(perm = var_5645, x = var_5641)[name = string("transpose_68")]; tensor var_5658_cast_fp16 = mul(x = x_153, y = const_134_promoted_to_fp16)[name = string("op_5658_cast_fp16")]; bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)]; tensor input_191_cast_fp16 = concat(axis = var_5652, interleave = input_191_interleave_0, values = (x_153, var_5658_cast_fp16))[name = string("input_191_cast_fp16")]; tensor normed_267_axes_0 = const()[name = string("normed_267_axes_0"), val = tensor([-1])]; fp16 var_5650_to_fp16 = const()[name = string("op_5650_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_267_cast_fp16 = layer_norm(axes = normed_267_axes_0, epsilon = var_5650_to_fp16, x = input_191_cast_fp16)[name = string("normed_267_cast_fp16")]; tensor var_5663_split_sizes_0 = const()[name = string("op_5663_split_sizes_0"), val = tensor([640, 640])]; int32 var_5663_axis_0 = const()[name = string("op_5663_axis_0"), val = int32(-1)]; tensor var_5663_cast_fp16_0, tensor var_5663_cast_fp16_1 = split(axis = var_5663_axis_0, split_sizes = var_5663_split_sizes_0, x = normed_267_cast_fp16)[name = string("op_5663_cast_fp16")]; tensor var_5667_to_fp16 = const()[name = string("op_5667_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267866368)))]; tensor out_115_cast_fp16 = mul(x = var_5663_cast_fp16_0, y = var_5667_to_fp16)[name = string("out_115_cast_fp16")]; tensor x_155_cast_fp16 = add(x = x_145_cast_fp16, y = out_115_cast_fp16)[name = string("x_155_cast_fp16")]; int32 var_5681 = const()[name = string("op_5681"), val = int32(-1)]; fp16 const_136_promoted_to_fp16 = const()[name = string("const_136_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5687_cast_fp16 = mul(x = x_155_cast_fp16, y = const_136_promoted_to_fp16)[name = string("op_5687_cast_fp16")]; bool input_193_interleave_0 = const()[name = string("input_193_interleave_0"), val = bool(false)]; tensor input_193_cast_fp16 = concat(axis = var_5681, interleave = input_193_interleave_0, values = (x_155_cast_fp16, var_5687_cast_fp16))[name = string("input_193_cast_fp16")]; tensor normed_271_axes_0 = const()[name = string("normed_271_axes_0"), val = tensor([-1])]; fp16 var_5679_to_fp16 = const()[name = string("op_5679_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_271_cast_fp16 = layer_norm(axes = normed_271_axes_0, epsilon = var_5679_to_fp16, x = input_193_cast_fp16)[name = string("normed_271_cast_fp16")]; tensor var_5692_split_sizes_0 = const()[name = string("op_5692_split_sizes_0"), val = tensor([640, 640])]; int32 var_5692_axis_0 = const()[name = string("op_5692_axis_0"), val = int32(-1)]; tensor var_5692_cast_fp16_0, tensor var_5692_cast_fp16_1 = split(axis = var_5692_axis_0, split_sizes = var_5692_split_sizes_0, x = normed_271_cast_fp16)[name = string("op_5692_cast_fp16")]; tensor var_5696_to_fp16 = const()[name = string("op_5696_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267867712)))]; tensor out_117_cast_fp16 = mul(x = var_5692_cast_fp16_0, y = var_5696_to_fp16)[name = string("out_117_cast_fp16")]; tensor var_5710 = const()[name = string("op_5710"), val = tensor([0, 2, 1])]; tensor input_195_axes_0 = const()[name = string("input_195_axes_0"), val = tensor([2])]; tensor var_5711 = transpose(perm = var_5710, x = out_117_cast_fp16)[name = string("transpose_67")]; tensor input_195 = expand_dims(axes = input_195_axes_0, x = var_5711)[name = string("input_195")]; string gate_37_pad_type_0 = const()[name = string("gate_37_pad_type_0"), val = string("valid")]; tensor gate_37_strides_0 = const()[name = string("gate_37_strides_0"), val = tensor([1, 1])]; tensor gate_37_pad_0 = const()[name = string("gate_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_37_dilations_0 = const()[name = string("gate_37_dilations_0"), val = tensor([1, 1])]; int32 gate_37_groups_0 = const()[name = string("gate_37_groups_0"), val = int32(1)]; tensor gate_37 = conv(dilations = gate_37_dilations_0, groups = gate_37_groups_0, pad = gate_37_pad_0, pad_type = gate_37_pad_type_0, strides = gate_37_strides_0, weight = layers_9_mlp_gate_proj_weight_quantized, x = input_195)[name = string("gate_37")]; string up_19_pad_type_0 = const()[name = string("up_19_pad_type_0"), val = string("valid")]; tensor up_19_strides_0 = const()[name = string("up_19_strides_0"), val = tensor([1, 1])]; tensor up_19_pad_0 = const()[name = string("up_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_19_dilations_0 = const()[name = string("up_19_dilations_0"), val = tensor([1, 1])]; int32 up_19_groups_0 = const()[name = string("up_19_groups_0"), val = int32(1)]; tensor up_19 = conv(dilations = up_19_dilations_0, groups = up_19_groups_0, pad = up_19_pad_0, pad_type = up_19_pad_type_0, strides = up_19_strides_0, weight = layers_9_mlp_up_proj_weight_quantized, x = input_195)[name = string("up_19")]; string gate_39_mode_0 = const()[name = string("gate_39_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_39 = gelu(mode = gate_39_mode_0, x = gate_37)[name = string("gate_39")]; tensor input_197 = mul(x = gate_39, y = up_19)[name = string("input_197")]; string var_5749_pad_type_0 = const()[name = string("op_5749_pad_type_0"), val = string("valid")]; tensor var_5749_strides_0 = const()[name = string("op_5749_strides_0"), val = tensor([1, 1])]; tensor var_5749_pad_0 = const()[name = string("op_5749_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5749_dilations_0 = const()[name = string("op_5749_dilations_0"), val = tensor([1, 1])]; int32 var_5749_groups_0 = const()[name = string("op_5749_groups_0"), val = int32(1)]; tensor var_5749 = conv(dilations = var_5749_dilations_0, groups = var_5749_groups_0, pad = var_5749_pad_0, pad_type = var_5749_pad_type_0, strides = var_5749_strides_0, weight = layers_9_mlp_down_proj_weight_quantized, x = input_197)[name = string("op_5749")]; tensor var_5751_axes_0 = const()[name = string("op_5751_axes_0"), val = tensor([2])]; tensor var_5751 = squeeze(axes = var_5751_axes_0, x = var_5749)[name = string("op_5751")]; tensor var_5755 = const()[name = string("op_5755"), val = tensor([0, 2, 1])]; int32 var_5762 = const()[name = string("op_5762"), val = int32(-1)]; fp16 const_138_promoted_to_fp16 = const()[name = string("const_138_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_159 = transpose(perm = var_5755, x = var_5751)[name = string("transpose_66")]; tensor var_5768_cast_fp16 = mul(x = x_159, y = const_138_promoted_to_fp16)[name = string("op_5768_cast_fp16")]; bool input_199_interleave_0 = const()[name = string("input_199_interleave_0"), val = bool(false)]; tensor input_199_cast_fp16 = concat(axis = var_5762, interleave = input_199_interleave_0, values = (x_159, var_5768_cast_fp16))[name = string("input_199_cast_fp16")]; tensor normed_277_axes_0 = const()[name = string("normed_277_axes_0"), val = tensor([-1])]; fp16 var_5760_to_fp16 = const()[name = string("op_5760_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_277_cast_fp16 = layer_norm(axes = normed_277_axes_0, epsilon = var_5760_to_fp16, x = input_199_cast_fp16)[name = string("normed_277_cast_fp16")]; tensor var_5773_split_sizes_0 = const()[name = string("op_5773_split_sizes_0"), val = tensor([640, 640])]; int32 var_5773_axis_0 = const()[name = string("op_5773_axis_0"), val = int32(-1)]; tensor var_5773_cast_fp16_0, tensor var_5773_cast_fp16_1 = split(axis = var_5773_axis_0, split_sizes = var_5773_split_sizes_0, x = normed_277_cast_fp16)[name = string("op_5773_cast_fp16")]; tensor var_5777_to_fp16 = const()[name = string("op_5777_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267869056)))]; tensor out_119_cast_fp16 = mul(x = var_5773_cast_fp16_0, y = var_5777_to_fp16)[name = string("out_119_cast_fp16")]; tensor x_161_cast_fp16 = add(x = x_155_cast_fp16, y = out_119_cast_fp16)[name = string("x_161_cast_fp16")]; int32 var_5791 = const()[name = string("op_5791"), val = int32(-1)]; fp16 const_140_promoted_to_fp16 = const()[name = string("const_140_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5797_cast_fp16 = mul(x = x_161_cast_fp16, y = const_140_promoted_to_fp16)[name = string("op_5797_cast_fp16")]; bool input_201_interleave_0 = const()[name = string("input_201_interleave_0"), val = bool(false)]; tensor input_201_cast_fp16 = concat(axis = var_5791, interleave = input_201_interleave_0, values = (x_161_cast_fp16, var_5797_cast_fp16))[name = string("input_201_cast_fp16")]; tensor normed_281_axes_0 = const()[name = string("normed_281_axes_0"), val = tensor([-1])]; fp16 var_5789_to_fp16 = const()[name = string("op_5789_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_281_cast_fp16 = layer_norm(axes = normed_281_axes_0, epsilon = var_5789_to_fp16, x = input_201_cast_fp16)[name = string("normed_281_cast_fp16")]; tensor var_5802_split_sizes_0 = const()[name = string("op_5802_split_sizes_0"), val = tensor([640, 640])]; int32 var_5802_axis_0 = const()[name = string("op_5802_axis_0"), val = int32(-1)]; tensor var_5802_cast_fp16_0, tensor var_5802_cast_fp16_1 = split(axis = var_5802_axis_0, split_sizes = var_5802_split_sizes_0, x = normed_281_cast_fp16)[name = string("op_5802_cast_fp16")]; tensor var_5806_to_fp16 = const()[name = string("op_5806_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267870400)))]; tensor out_121_cast_fp16 = mul(x = var_5802_cast_fp16_0, y = var_5806_to_fp16)[name = string("out_121_cast_fp16")]; tensor var_5820 = const()[name = string("op_5820"), val = tensor([0, 2, 1])]; tensor input_203_axes_0 = const()[name = string("input_203_axes_0"), val = tensor([2])]; tensor var_5821 = transpose(perm = var_5820, x = out_121_cast_fp16)[name = string("transpose_65")]; tensor input_203 = expand_dims(axes = input_203_axes_0, x = var_5821)[name = string("input_203")]; string var_5834_pad_type_0 = const()[name = string("op_5834_pad_type_0"), val = string("valid")]; tensor var_5834_strides_0 = const()[name = string("op_5834_strides_0"), val = tensor([1, 1])]; tensor var_5834_pad_0 = const()[name = string("op_5834_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5834_dilations_0 = const()[name = string("op_5834_dilations_0"), val = tensor([1, 1])]; int32 var_5834_groups_0 = const()[name = string("op_5834_groups_0"), val = int32(1)]; tensor var_5834 = conv(dilations = var_5834_dilations_0, groups = var_5834_groups_0, pad = var_5834_pad_0, pad_type = var_5834_pad_type_0, strides = var_5834_strides_0, weight = layers_10_self_attn_q_proj_weight_quantized, x = input_203)[name = string("op_5834")]; tensor var_5839 = const()[name = string("op_5839"), val = tensor([1, 4, 256, 32])]; tensor var_5840 = reshape(shape = var_5839, x = var_5834)[name = string("op_5840")]; tensor var_5845 = const()[name = string("op_5845"), val = tensor([0, 1, 3, 2])]; int32 var_5858 = const()[name = string("op_5858"), val = int32(-1)]; fp16 const_142_promoted_to_fp16 = const()[name = string("const_142_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_61 = transpose(perm = var_5845, x = var_5840)[name = string("transpose_64")]; tensor var_5864_cast_fp16 = mul(x = q_61, y = const_142_promoted_to_fp16)[name = string("op_5864_cast_fp16")]; bool input_205_interleave_0 = const()[name = string("input_205_interleave_0"), val = bool(false)]; tensor input_205_cast_fp16 = concat(axis = var_5858, interleave = input_205_interleave_0, values = (q_61, var_5864_cast_fp16))[name = string("input_205_cast_fp16")]; tensor normed_287_axes_0 = const()[name = string("normed_287_axes_0"), val = tensor([-1])]; fp16 var_5856_to_fp16 = const()[name = string("op_5856_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_287_cast_fp16 = layer_norm(axes = normed_287_axes_0, epsilon = var_5856_to_fp16, x = input_205_cast_fp16)[name = string("normed_287_cast_fp16")]; tensor var_5869_split_sizes_0 = const()[name = string("op_5869_split_sizes_0"), val = tensor([256, 256])]; int32 var_5869_axis_0 = const()[name = string("op_5869_axis_0"), val = int32(-1)]; tensor var_5869_cast_fp16_0, tensor var_5869_cast_fp16_1 = split(axis = var_5869_axis_0, split_sizes = var_5869_split_sizes_0, x = normed_287_cast_fp16)[name = string("op_5869_cast_fp16")]; tensor var_5873_to_fp16 = const()[name = string("op_5873_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267871744)))]; tensor out_123_cast_fp16 = mul(x = var_5869_cast_fp16_0, y = var_5873_to_fp16)[name = string("out_123_cast_fp16")]; string var_5886_pad_type_0 = const()[name = string("op_5886_pad_type_0"), val = string("valid")]; tensor var_5886_strides_0 = const()[name = string("op_5886_strides_0"), val = tensor([1, 1])]; tensor var_5886_pad_0 = const()[name = string("op_5886_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5886_dilations_0 = const()[name = string("op_5886_dilations_0"), val = tensor([1, 1])]; int32 var_5886_groups_0 = const()[name = string("op_5886_groups_0"), val = int32(1)]; tensor var_5886 = conv(dilations = var_5886_dilations_0, groups = var_5886_groups_0, pad = var_5886_pad_0, pad_type = var_5886_pad_type_0, strides = var_5886_strides_0, weight = layers_10_self_attn_k_proj_weight_quantized, x = input_203)[name = string("op_5886")]; tensor var_5891 = const()[name = string("op_5891"), val = tensor([1, 1, 256, 32])]; tensor var_5892 = reshape(shape = var_5891, x = var_5886)[name = string("op_5892")]; tensor var_5897 = const()[name = string("op_5897"), val = tensor([0, 1, 3, 2])]; int32 var_5910 = const()[name = string("op_5910"), val = int32(-1)]; fp16 const_144_promoted_to_fp16 = const()[name = string("const_144_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_61 = transpose(perm = var_5897, x = var_5892)[name = string("transpose_63")]; tensor var_5916_cast_fp16 = mul(x = k_61, y = const_144_promoted_to_fp16)[name = string("op_5916_cast_fp16")]; bool input_207_interleave_0 = const()[name = string("input_207_interleave_0"), val = bool(false)]; tensor input_207_cast_fp16 = concat(axis = var_5910, interleave = input_207_interleave_0, values = (k_61, var_5916_cast_fp16))[name = string("input_207_cast_fp16")]; tensor normed_291_axes_0 = const()[name = string("normed_291_axes_0"), val = tensor([-1])]; fp16 var_5908_to_fp16 = const()[name = string("op_5908_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_291_cast_fp16 = layer_norm(axes = normed_291_axes_0, epsilon = var_5908_to_fp16, x = input_207_cast_fp16)[name = string("normed_291_cast_fp16")]; tensor var_5921_split_sizes_0 = const()[name = string("op_5921_split_sizes_0"), val = tensor([256, 256])]; int32 var_5921_axis_0 = const()[name = string("op_5921_axis_0"), val = int32(-1)]; tensor var_5921_cast_fp16_0, tensor var_5921_cast_fp16_1 = split(axis = var_5921_axis_0, split_sizes = var_5921_split_sizes_0, x = normed_291_cast_fp16)[name = string("op_5921_cast_fp16")]; tensor var_5925_to_fp16 = const()[name = string("op_5925_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267872320)))]; tensor out_125_cast_fp16 = mul(x = var_5921_cast_fp16_0, y = var_5925_to_fp16)[name = string("out_125_cast_fp16")]; string var_5938_pad_type_0 = const()[name = string("op_5938_pad_type_0"), val = string("valid")]; tensor var_5938_strides_0 = const()[name = string("op_5938_strides_0"), val = tensor([1, 1])]; tensor var_5938_pad_0 = const()[name = string("op_5938_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5938_dilations_0 = const()[name = string("op_5938_dilations_0"), val = tensor([1, 1])]; int32 var_5938_groups_0 = const()[name = string("op_5938_groups_0"), val = int32(1)]; tensor var_5938 = conv(dilations = var_5938_dilations_0, groups = var_5938_groups_0, pad = var_5938_pad_0, pad_type = var_5938_pad_type_0, strides = var_5938_strides_0, weight = layers_10_self_attn_v_proj_weight_quantized, x = input_203)[name = string("op_5938")]; tensor var_5943 = const()[name = string("op_5943"), val = tensor([1, 1, 256, 32])]; tensor var_5944 = reshape(shape = var_5943, x = var_5938)[name = string("op_5944")]; tensor var_5951 = mul(x = out_123_cast_fp16, y = cos_1)[name = string("op_5951")]; tensor var_5952_split_sizes_0 = const()[name = string("op_5952_split_sizes_0"), val = tensor([128, 128])]; int32 var_5952_axis_0 = const()[name = string("op_5952_axis_0"), val = int32(-1)]; tensor var_5952_0, tensor var_5952_1 = split(axis = var_5952_axis_0, split_sizes = var_5952_split_sizes_0, x = out_123_cast_fp16)[name = string("op_5952")]; fp16 const_146_promoted = const()[name = string("const_146_promoted"), val = fp16(-0x1p+0)]; tensor var_5954 = mul(x = var_5952_1, y = const_146_promoted)[name = string("op_5954")]; int32 var_5956 = const()[name = string("op_5956"), val = int32(-1)]; bool var_5957_interleave_0 = const()[name = string("op_5957_interleave_0"), val = bool(false)]; tensor var_5957 = concat(axis = var_5956, interleave = var_5957_interleave_0, values = (var_5954, var_5952_0))[name = string("op_5957")]; tensor var_5958 = mul(x = var_5957, y = sin_1)[name = string("op_5958")]; tensor q_65 = add(x = var_5951, y = var_5958)[name = string("q_65")]; tensor var_5961 = mul(x = out_125_cast_fp16, y = cos_1)[name = string("op_5961")]; tensor var_5962_split_sizes_0 = const()[name = string("op_5962_split_sizes_0"), val = tensor([128, 128])]; int32 var_5962_axis_0 = const()[name = string("op_5962_axis_0"), val = int32(-1)]; tensor var_5962_0, tensor var_5962_1 = split(axis = var_5962_axis_0, split_sizes = var_5962_split_sizes_0, x = out_125_cast_fp16)[name = string("op_5962")]; fp16 const_147_promoted = const()[name = string("const_147_promoted"), val = fp16(-0x1p+0)]; tensor var_5964 = mul(x = var_5962_1, y = const_147_promoted)[name = string("op_5964")]; int32 var_5966 = const()[name = string("op_5966"), val = int32(-1)]; bool var_5967_interleave_0 = const()[name = string("op_5967_interleave_0"), val = bool(false)]; tensor var_5967 = concat(axis = var_5966, interleave = var_5967_interleave_0, values = (var_5964, var_5962_0))[name = string("op_5967")]; tensor var_5968 = mul(x = var_5967, y = sin_1)[name = string("op_5968")]; tensor k_65 = add(x = var_5961, y = var_5968)[name = string("k_65")]; tensor var_5973_begin_0 = const()[name = string("op_5973_begin_0"), val = tensor([10, 0, 0, 0])]; tensor var_5973_end_0 = const()[name = string("op_5973_end_0"), val = tensor([11, 1, 2048, 256])]; tensor var_5973_end_mask_0 = const()[name = string("op_5973_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5973_squeeze_mask_0 = const()[name = string("op_5973_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_5973_cast_fp16 = slice_by_index(begin = var_5973_begin_0, end = var_5973_end_0, end_mask = var_5973_end_mask_0, squeeze_mask = var_5973_squeeze_mask_0, x = coreml_update_state_55)[name = string("op_5973_cast_fp16")]; tensor K_cache_21_axes_0 = const()[name = string("K_cache_21_axes_0"), val = tensor([0])]; tensor K_cache_21_cast_fp16 = expand_dims(axes = K_cache_21_axes_0, x = var_5973_cast_fp16)[name = string("K_cache_21_cast_fp16")]; tensor var_5978_begin_0 = const()[name = string("op_5978_begin_0"), val = tensor([28, 0, 0, 0])]; tensor var_5978_end_0 = const()[name = string("op_5978_end_0"), val = tensor([29, 1, 2048, 256])]; tensor var_5978_end_mask_0 = const()[name = string("op_5978_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5978_squeeze_mask_0 = const()[name = string("op_5978_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_5978_cast_fp16 = slice_by_index(begin = var_5978_begin_0, end = var_5978_end_0, end_mask = var_5978_end_mask_0, squeeze_mask = var_5978_squeeze_mask_0, x = coreml_update_state_55)[name = string("op_5978_cast_fp16")]; tensor V_cache_21_axes_0 = const()[name = string("V_cache_21_axes_0"), val = tensor([0])]; tensor V_cache_21_cast_fp16 = expand_dims(axes = V_cache_21_axes_0, x = var_5978_cast_fp16)[name = string("V_cache_21_cast_fp16")]; bool k_increment_21_transpose_x_0 = const()[name = string("k_increment_21_transpose_x_0"), val = bool(false)]; bool k_increment_21_transpose_y_0 = const()[name = string("k_increment_21_transpose_y_0"), val = bool(false)]; tensor k_increment_21 = matmul(transpose_x = k_increment_21_transpose_x_0, transpose_y = k_increment_21_transpose_y_0, x = update_mask, y = k_65)[name = string("k_increment_21")]; bool v_increment_21_transpose_x_1 = const()[name = string("v_increment_21_transpose_x_1"), val = bool(false)]; bool v_increment_21_transpose_y_1 = const()[name = string("v_increment_21_transpose_y_1"), val = bool(true)]; tensor v_increment_21 = matmul(transpose_x = v_increment_21_transpose_x_1, transpose_y = v_increment_21_transpose_y_1, x = update_mask, y = var_5944)[name = string("v_increment_21")]; tensor var_5996_cast_fp16 = mul(x = K_cache_21_cast_fp16, y = var_1125_cast_fp16)[name = string("op_5996_cast_fp16")]; tensor K_new_21_cast_fp16 = add(x = var_5996_cast_fp16, y = k_increment_21)[name = string("K_new_21_cast_fp16")]; tensor var_6002_cast_fp16 = mul(x = V_cache_21_cast_fp16, y = var_1125_cast_fp16)[name = string("op_6002_cast_fp16")]; tensor V_new_21_cast_fp16 = add(x = var_6002_cast_fp16, y = v_increment_21)[name = string("V_new_21_cast_fp16")]; tensor var_6006_axes_0 = const()[name = string("op_6006_axes_0"), val = tensor([0])]; tensor var_6006_cast_fp16 = squeeze(axes = var_6006_axes_0, x = K_new_21_cast_fp16)[name = string("op_6006_cast_fp16")]; tensor concat_40 = const()[name = string("concat_40"), val = tensor([10, 0, 0, 0])]; tensor concat_41 = const()[name = string("concat_41"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_21_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_21_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_21_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_40, begin_mask = kv_cache_0_internal_tensor_assign_21_begin_mask_0, end = concat_41, end_mask = kv_cache_0_internal_tensor_assign_21_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_21_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_21_stride_0, update = var_6006_cast_fp16, x = coreml_update_state_55)[name = string("kv_cache_0_internal_tensor_assign_21_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_21_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_56_write_state")]; tensor coreml_update_state_56 = read_state(input = kv_cache_0)[name = string("coreml_update_state_56")]; tensor var_6013_axes_0 = const()[name = string("op_6013_axes_0"), val = tensor([0])]; tensor var_6013_cast_fp16 = squeeze(axes = var_6013_axes_0, x = V_new_21_cast_fp16)[name = string("op_6013_cast_fp16")]; tensor concat_42 = const()[name = string("concat_42"), val = tensor([28, 0, 0, 0])]; tensor concat_43 = const()[name = string("concat_43"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_22_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_22_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_22_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_42, begin_mask = kv_cache_0_internal_tensor_assign_22_begin_mask_0, end = concat_43, end_mask = kv_cache_0_internal_tensor_assign_22_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_22_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_22_stride_0, update = var_6013_cast_fp16, x = coreml_update_state_56)[name = string("kv_cache_0_internal_tensor_assign_22_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_22_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_57_write_state")]; tensor coreml_update_state_57 = read_state(input = kv_cache_0)[name = string("coreml_update_state_57")]; tensor hidden_states_81_axes_0 = const()[name = string("hidden_states_81_axes_0"), val = tensor([2])]; tensor hidden_states_81_cast_fp16 = expand_dims(axes = hidden_states_81_axes_0, x = K_new_21_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; tensor var_6026 = const()[name = string("op_6026"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_83_cast_fp16 = tile(reps = var_6026, x = hidden_states_81_cast_fp16)[name = string("hidden_states_83_cast_fp16")]; tensor var_6032 = const()[name = string("op_6032"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_21_cast_fp16 = reshape(shape = var_6032, x = hidden_states_83_cast_fp16)[name = string("K_expanded_21_cast_fp16")]; tensor hidden_states_85_axes_0 = const()[name = string("hidden_states_85_axes_0"), val = tensor([2])]; tensor hidden_states_85_cast_fp16 = expand_dims(axes = hidden_states_85_axes_0, x = V_new_21_cast_fp16)[name = string("hidden_states_85_cast_fp16")]; tensor var_6041 = const()[name = string("op_6041"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_87_cast_fp16 = tile(reps = var_6041, x = hidden_states_85_cast_fp16)[name = string("hidden_states_87_cast_fp16")]; tensor var_6047 = const()[name = string("op_6047"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_21_cast_fp16 = reshape(shape = var_6047, x = hidden_states_87_cast_fp16)[name = string("V_expanded_21_cast_fp16")]; bool var_6062_transpose_x_1 = const()[name = string("op_6062_transpose_x_1"), val = bool(false)]; bool var_6062_transpose_y_1 = const()[name = string("op_6062_transpose_y_1"), val = bool(true)]; tensor var_6062_cast_fp16 = matmul(transpose_x = var_6062_transpose_x_1, transpose_y = var_6062_transpose_y_1, x = q_65, y = K_expanded_21_cast_fp16)[name = string("op_6062_cast_fp16")]; fp16 var_6063_to_fp16 = const()[name = string("op_6063_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_61_cast_fp16 = mul(x = var_6062_cast_fp16, y = var_6063_to_fp16)[name = string("attn_weights_61_cast_fp16")]; tensor attn_weights_63_cast_fp16 = add(x = attn_weights_61_cast_fp16, y = causal_mask)[name = string("attn_weights_63_cast_fp16")]; int32 var_6072 = const()[name = string("op_6072"), val = int32(-1)]; tensor var_6074_cast_fp16 = softmax(axis = var_6072, x = attn_weights_63_cast_fp16)[name = string("op_6074_cast_fp16")]; bool var_6090_transpose_x_0 = const()[name = string("op_6090_transpose_x_0"), val = bool(false)]; bool var_6090_transpose_y_0 = const()[name = string("op_6090_transpose_y_0"), val = bool(false)]; tensor var_6090_cast_fp16 = matmul(transpose_x = var_6090_transpose_x_0, transpose_y = var_6090_transpose_y_0, x = var_6074_cast_fp16, y = V_expanded_21_cast_fp16)[name = string("op_6090_cast_fp16")]; tensor var_6100 = const()[name = string("op_6100"), val = tensor([0, 2, 1, 3])]; tensor var_6107 = const()[name = string("op_6107"), val = tensor([1, 32, 1024])]; tensor var_6101 = transpose(perm = var_6100, x = var_6090_cast_fp16)[name = string("transpose_62")]; tensor attn_output_63 = reshape(shape = var_6107, x = var_6101)[name = string("attn_output_63")]; tensor var_6112 = const()[name = string("op_6112"), val = tensor([0, 2, 1])]; tensor squeeze_10_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267872896))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268528320))))[name = string("squeeze_10_quantized")]; string var_6128_pad_type_0 = const()[name = string("op_6128_pad_type_0"), val = string("valid")]; int32 var_6128_groups_0 = const()[name = string("op_6128_groups_0"), val = int32(1)]; tensor var_6128_strides_0 = const()[name = string("op_6128_strides_0"), val = tensor([1])]; tensor var_6128_pad_0 = const()[name = string("op_6128_pad_0"), val = tensor([0, 0])]; tensor var_6128_dilations_0 = const()[name = string("op_6128_dilations_0"), val = tensor([1])]; tensor var_6113 = transpose(perm = var_6112, x = attn_output_63)[name = string("transpose_61")]; tensor var_6128 = conv(dilations = var_6128_dilations_0, groups = var_6128_groups_0, pad = var_6128_pad_0, pad_type = var_6128_pad_type_0, strides = var_6128_strides_0, weight = squeeze_10_quantized, x = var_6113)[name = string("op_6128")]; tensor var_6132 = const()[name = string("op_6132"), val = tensor([0, 2, 1])]; int32 var_6139 = const()[name = string("op_6139"), val = int32(-1)]; fp16 const_148_promoted_to_fp16 = const()[name = string("const_148_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_169 = transpose(perm = var_6132, x = var_6128)[name = string("transpose_60")]; tensor var_6145_cast_fp16 = mul(x = x_169, y = const_148_promoted_to_fp16)[name = string("op_6145_cast_fp16")]; bool input_211_interleave_0 = const()[name = string("input_211_interleave_0"), val = bool(false)]; tensor input_211_cast_fp16 = concat(axis = var_6139, interleave = input_211_interleave_0, values = (x_169, var_6145_cast_fp16))[name = string("input_211_cast_fp16")]; tensor normed_295_axes_0 = const()[name = string("normed_295_axes_0"), val = tensor([-1])]; fp16 var_6137_to_fp16 = const()[name = string("op_6137_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_295_cast_fp16 = layer_norm(axes = normed_295_axes_0, epsilon = var_6137_to_fp16, x = input_211_cast_fp16)[name = string("normed_295_cast_fp16")]; tensor var_6150_split_sizes_0 = const()[name = string("op_6150_split_sizes_0"), val = tensor([640, 640])]; int32 var_6150_axis_0 = const()[name = string("op_6150_axis_0"), val = int32(-1)]; tensor var_6150_cast_fp16_0, tensor var_6150_cast_fp16_1 = split(axis = var_6150_axis_0, split_sizes = var_6150_split_sizes_0, x = normed_295_cast_fp16)[name = string("op_6150_cast_fp16")]; tensor var_6154_to_fp16 = const()[name = string("op_6154_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268529664)))]; tensor out_127_cast_fp16 = mul(x = var_6150_cast_fp16_0, y = var_6154_to_fp16)[name = string("out_127_cast_fp16")]; tensor x_171_cast_fp16 = add(x = x_161_cast_fp16, y = out_127_cast_fp16)[name = string("x_171_cast_fp16")]; int32 var_6168 = const()[name = string("op_6168"), val = int32(-1)]; fp16 const_150_promoted_to_fp16 = const()[name = string("const_150_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6174_cast_fp16 = mul(x = x_171_cast_fp16, y = const_150_promoted_to_fp16)[name = string("op_6174_cast_fp16")]; bool input_213_interleave_0 = const()[name = string("input_213_interleave_0"), val = bool(false)]; tensor input_213_cast_fp16 = concat(axis = var_6168, interleave = input_213_interleave_0, values = (x_171_cast_fp16, var_6174_cast_fp16))[name = string("input_213_cast_fp16")]; tensor normed_299_axes_0 = const()[name = string("normed_299_axes_0"), val = tensor([-1])]; fp16 var_6166_to_fp16 = const()[name = string("op_6166_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_299_cast_fp16 = layer_norm(axes = normed_299_axes_0, epsilon = var_6166_to_fp16, x = input_213_cast_fp16)[name = string("normed_299_cast_fp16")]; tensor var_6179_split_sizes_0 = const()[name = string("op_6179_split_sizes_0"), val = tensor([640, 640])]; int32 var_6179_axis_0 = const()[name = string("op_6179_axis_0"), val = int32(-1)]; tensor var_6179_cast_fp16_0, tensor var_6179_cast_fp16_1 = split(axis = var_6179_axis_0, split_sizes = var_6179_split_sizes_0, x = normed_299_cast_fp16)[name = string("op_6179_cast_fp16")]; tensor var_6183_to_fp16 = const()[name = string("op_6183_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268531008)))]; tensor out_129_cast_fp16 = mul(x = var_6179_cast_fp16_0, y = var_6183_to_fp16)[name = string("out_129_cast_fp16")]; tensor var_6197 = const()[name = string("op_6197"), val = tensor([0, 2, 1])]; tensor input_215_axes_0 = const()[name = string("input_215_axes_0"), val = tensor([2])]; tensor var_6198 = transpose(perm = var_6197, x = out_129_cast_fp16)[name = string("transpose_59")]; tensor input_215 = expand_dims(axes = input_215_axes_0, x = var_6198)[name = string("input_215")]; string gate_41_pad_type_0 = const()[name = string("gate_41_pad_type_0"), val = string("valid")]; tensor gate_41_strides_0 = const()[name = string("gate_41_strides_0"), val = tensor([1, 1])]; tensor gate_41_pad_0 = const()[name = string("gate_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_41_dilations_0 = const()[name = string("gate_41_dilations_0"), val = tensor([1, 1])]; int32 gate_41_groups_0 = const()[name = string("gate_41_groups_0"), val = int32(1)]; tensor gate_41 = conv(dilations = gate_41_dilations_0, groups = gate_41_groups_0, pad = gate_41_pad_0, pad_type = gate_41_pad_type_0, strides = gate_41_strides_0, weight = layers_10_mlp_gate_proj_weight_quantized, x = input_215)[name = string("gate_41")]; string up_21_pad_type_0 = const()[name = string("up_21_pad_type_0"), val = string("valid")]; tensor up_21_strides_0 = const()[name = string("up_21_strides_0"), val = tensor([1, 1])]; tensor up_21_pad_0 = const()[name = string("up_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_21_dilations_0 = const()[name = string("up_21_dilations_0"), val = tensor([1, 1])]; int32 up_21_groups_0 = const()[name = string("up_21_groups_0"), val = int32(1)]; tensor up_21 = conv(dilations = up_21_dilations_0, groups = up_21_groups_0, pad = up_21_pad_0, pad_type = up_21_pad_type_0, strides = up_21_strides_0, weight = layers_10_mlp_up_proj_weight_quantized, x = input_215)[name = string("up_21")]; string gate_43_mode_0 = const()[name = string("gate_43_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_43 = gelu(mode = gate_43_mode_0, x = gate_41)[name = string("gate_43")]; tensor input_217 = mul(x = gate_43, y = up_21)[name = string("input_217")]; string var_6236_pad_type_0 = const()[name = string("op_6236_pad_type_0"), val = string("valid")]; tensor var_6236_strides_0 = const()[name = string("op_6236_strides_0"), val = tensor([1, 1])]; tensor var_6236_pad_0 = const()[name = string("op_6236_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6236_dilations_0 = const()[name = string("op_6236_dilations_0"), val = tensor([1, 1])]; int32 var_6236_groups_0 = const()[name = string("op_6236_groups_0"), val = int32(1)]; tensor var_6236 = conv(dilations = var_6236_dilations_0, groups = var_6236_groups_0, pad = var_6236_pad_0, pad_type = var_6236_pad_type_0, strides = var_6236_strides_0, weight = layers_10_mlp_down_proj_weight_quantized, x = input_217)[name = string("op_6236")]; tensor var_6238_axes_0 = const()[name = string("op_6238_axes_0"), val = tensor([2])]; tensor var_6238 = squeeze(axes = var_6238_axes_0, x = var_6236)[name = string("op_6238")]; tensor var_6242 = const()[name = string("op_6242"), val = tensor([0, 2, 1])]; int32 var_6249 = const()[name = string("op_6249"), val = int32(-1)]; fp16 const_152_promoted_to_fp16 = const()[name = string("const_152_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_175 = transpose(perm = var_6242, x = var_6238)[name = string("transpose_58")]; tensor var_6255_cast_fp16 = mul(x = x_175, y = const_152_promoted_to_fp16)[name = string("op_6255_cast_fp16")]; bool input_219_interleave_0 = const()[name = string("input_219_interleave_0"), val = bool(false)]; tensor input_219_cast_fp16 = concat(axis = var_6249, interleave = input_219_interleave_0, values = (x_175, var_6255_cast_fp16))[name = string("input_219_cast_fp16")]; tensor normed_305_axes_0 = const()[name = string("normed_305_axes_0"), val = tensor([-1])]; fp16 var_6247_to_fp16 = const()[name = string("op_6247_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_305_cast_fp16 = layer_norm(axes = normed_305_axes_0, epsilon = var_6247_to_fp16, x = input_219_cast_fp16)[name = string("normed_305_cast_fp16")]; tensor var_6260_split_sizes_0 = const()[name = string("op_6260_split_sizes_0"), val = tensor([640, 640])]; int32 var_6260_axis_0 = const()[name = string("op_6260_axis_0"), val = int32(-1)]; tensor var_6260_cast_fp16_0, tensor var_6260_cast_fp16_1 = split(axis = var_6260_axis_0, split_sizes = var_6260_split_sizes_0, x = normed_305_cast_fp16)[name = string("op_6260_cast_fp16")]; tensor var_6264_to_fp16 = const()[name = string("op_6264_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268532352)))]; tensor out_131_cast_fp16 = mul(x = var_6260_cast_fp16_0, y = var_6264_to_fp16)[name = string("out_131_cast_fp16")]; tensor x_177_cast_fp16 = add(x = x_171_cast_fp16, y = out_131_cast_fp16)[name = string("x_177_cast_fp16")]; int32 var_6278 = const()[name = string("op_6278"), val = int32(-1)]; fp16 const_154_promoted_to_fp16 = const()[name = string("const_154_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6284_cast_fp16 = mul(x = x_177_cast_fp16, y = const_154_promoted_to_fp16)[name = string("op_6284_cast_fp16")]; bool input_221_interleave_0 = const()[name = string("input_221_interleave_0"), val = bool(false)]; tensor input_221_cast_fp16 = concat(axis = var_6278, interleave = input_221_interleave_0, values = (x_177_cast_fp16, var_6284_cast_fp16))[name = string("input_221_cast_fp16")]; tensor normed_309_axes_0 = const()[name = string("normed_309_axes_0"), val = tensor([-1])]; fp16 var_6276_to_fp16 = const()[name = string("op_6276_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_309_cast_fp16 = layer_norm(axes = normed_309_axes_0, epsilon = var_6276_to_fp16, x = input_221_cast_fp16)[name = string("normed_309_cast_fp16")]; tensor var_6289_split_sizes_0 = const()[name = string("op_6289_split_sizes_0"), val = tensor([640, 640])]; int32 var_6289_axis_0 = const()[name = string("op_6289_axis_0"), val = int32(-1)]; tensor var_6289_cast_fp16_0, tensor var_6289_cast_fp16_1 = split(axis = var_6289_axis_0, split_sizes = var_6289_split_sizes_0, x = normed_309_cast_fp16)[name = string("op_6289_cast_fp16")]; tensor var_6293_to_fp16 = const()[name = string("op_6293_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268533696)))]; tensor out_133_cast_fp16 = mul(x = var_6289_cast_fp16_0, y = var_6293_to_fp16)[name = string("out_133_cast_fp16")]; tensor var_6307 = const()[name = string("op_6307"), val = tensor([0, 2, 1])]; tensor input_223_axes_0 = const()[name = string("input_223_axes_0"), val = tensor([2])]; tensor var_6308 = transpose(perm = var_6307, x = out_133_cast_fp16)[name = string("transpose_57")]; tensor input_223 = expand_dims(axes = input_223_axes_0, x = var_6308)[name = string("input_223")]; string var_6321_pad_type_0 = const()[name = string("op_6321_pad_type_0"), val = string("valid")]; tensor var_6321_strides_0 = const()[name = string("op_6321_strides_0"), val = tensor([1, 1])]; tensor var_6321_pad_0 = const()[name = string("op_6321_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6321_dilations_0 = const()[name = string("op_6321_dilations_0"), val = tensor([1, 1])]; int32 var_6321_groups_0 = const()[name = string("op_6321_groups_0"), val = int32(1)]; tensor var_6321 = conv(dilations = var_6321_dilations_0, groups = var_6321_groups_0, pad = var_6321_pad_0, pad_type = var_6321_pad_type_0, strides = var_6321_strides_0, weight = layers_11_self_attn_q_proj_weight_quantized, x = input_223)[name = string("op_6321")]; tensor var_6326 = const()[name = string("op_6326"), val = tensor([1, 4, 256, 32])]; tensor var_6327 = reshape(shape = var_6326, x = var_6321)[name = string("op_6327")]; tensor var_6332 = const()[name = string("op_6332"), val = tensor([0, 1, 3, 2])]; int32 var_6345 = const()[name = string("op_6345"), val = int32(-1)]; fp16 const_156_promoted_to_fp16 = const()[name = string("const_156_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_67 = transpose(perm = var_6332, x = var_6327)[name = string("transpose_56")]; tensor var_6351_cast_fp16 = mul(x = q_67, y = const_156_promoted_to_fp16)[name = string("op_6351_cast_fp16")]; bool input_225_interleave_0 = const()[name = string("input_225_interleave_0"), val = bool(false)]; tensor input_225_cast_fp16 = concat(axis = var_6345, interleave = input_225_interleave_0, values = (q_67, var_6351_cast_fp16))[name = string("input_225_cast_fp16")]; tensor normed_315_axes_0 = const()[name = string("normed_315_axes_0"), val = tensor([-1])]; fp16 var_6343_to_fp16 = const()[name = string("op_6343_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_315_cast_fp16 = layer_norm(axes = normed_315_axes_0, epsilon = var_6343_to_fp16, x = input_225_cast_fp16)[name = string("normed_315_cast_fp16")]; tensor var_6356_split_sizes_0 = const()[name = string("op_6356_split_sizes_0"), val = tensor([256, 256])]; int32 var_6356_axis_0 = const()[name = string("op_6356_axis_0"), val = int32(-1)]; tensor var_6356_cast_fp16_0, tensor var_6356_cast_fp16_1 = split(axis = var_6356_axis_0, split_sizes = var_6356_split_sizes_0, x = normed_315_cast_fp16)[name = string("op_6356_cast_fp16")]; tensor var_6360_to_fp16 = const()[name = string("op_6360_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268535040)))]; tensor out_135_cast_fp16 = mul(x = var_6356_cast_fp16_0, y = var_6360_to_fp16)[name = string("out_135_cast_fp16")]; string var_6373_pad_type_0 = const()[name = string("op_6373_pad_type_0"), val = string("valid")]; tensor var_6373_strides_0 = const()[name = string("op_6373_strides_0"), val = tensor([1, 1])]; tensor var_6373_pad_0 = const()[name = string("op_6373_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6373_dilations_0 = const()[name = string("op_6373_dilations_0"), val = tensor([1, 1])]; int32 var_6373_groups_0 = const()[name = string("op_6373_groups_0"), val = int32(1)]; tensor var_6373 = conv(dilations = var_6373_dilations_0, groups = var_6373_groups_0, pad = var_6373_pad_0, pad_type = var_6373_pad_type_0, strides = var_6373_strides_0, weight = layers_11_self_attn_k_proj_weight_quantized, x = input_223)[name = string("op_6373")]; tensor var_6378 = const()[name = string("op_6378"), val = tensor([1, 1, 256, 32])]; tensor var_6379 = reshape(shape = var_6378, x = var_6373)[name = string("op_6379")]; tensor var_6384 = const()[name = string("op_6384"), val = tensor([0, 1, 3, 2])]; int32 var_6397 = const()[name = string("op_6397"), val = int32(-1)]; fp16 const_158_promoted_to_fp16 = const()[name = string("const_158_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_67 = transpose(perm = var_6384, x = var_6379)[name = string("transpose_55")]; tensor var_6403_cast_fp16 = mul(x = k_67, y = const_158_promoted_to_fp16)[name = string("op_6403_cast_fp16")]; bool input_227_interleave_0 = const()[name = string("input_227_interleave_0"), val = bool(false)]; tensor input_227_cast_fp16 = concat(axis = var_6397, interleave = input_227_interleave_0, values = (k_67, var_6403_cast_fp16))[name = string("input_227_cast_fp16")]; tensor normed_319_axes_0 = const()[name = string("normed_319_axes_0"), val = tensor([-1])]; fp16 var_6395_to_fp16 = const()[name = string("op_6395_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_319_cast_fp16 = layer_norm(axes = normed_319_axes_0, epsilon = var_6395_to_fp16, x = input_227_cast_fp16)[name = string("normed_319_cast_fp16")]; tensor var_6408_split_sizes_0 = const()[name = string("op_6408_split_sizes_0"), val = tensor([256, 256])]; int32 var_6408_axis_0 = const()[name = string("op_6408_axis_0"), val = int32(-1)]; tensor var_6408_cast_fp16_0, tensor var_6408_cast_fp16_1 = split(axis = var_6408_axis_0, split_sizes = var_6408_split_sizes_0, x = normed_319_cast_fp16)[name = string("op_6408_cast_fp16")]; tensor var_6412_to_fp16 = const()[name = string("op_6412_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268535616)))]; tensor out_137_cast_fp16 = mul(x = var_6408_cast_fp16_0, y = var_6412_to_fp16)[name = string("out_137_cast_fp16")]; string var_6425_pad_type_0 = const()[name = string("op_6425_pad_type_0"), val = string("valid")]; tensor var_6425_strides_0 = const()[name = string("op_6425_strides_0"), val = tensor([1, 1])]; tensor var_6425_pad_0 = const()[name = string("op_6425_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6425_dilations_0 = const()[name = string("op_6425_dilations_0"), val = tensor([1, 1])]; int32 var_6425_groups_0 = const()[name = string("op_6425_groups_0"), val = int32(1)]; tensor var_6425 = conv(dilations = var_6425_dilations_0, groups = var_6425_groups_0, pad = var_6425_pad_0, pad_type = var_6425_pad_type_0, strides = var_6425_strides_0, weight = layers_11_self_attn_v_proj_weight_quantized, x = input_223)[name = string("op_6425")]; tensor var_6430 = const()[name = string("op_6430"), val = tensor([1, 1, 256, 32])]; tensor var_6431 = reshape(shape = var_6430, x = var_6425)[name = string("op_6431")]; tensor var_6438 = mul(x = out_135_cast_fp16, y = cos)[name = string("op_6438")]; tensor var_6439_split_sizes_0 = const()[name = string("op_6439_split_sizes_0"), val = tensor([128, 128])]; int32 var_6439_axis_0 = const()[name = string("op_6439_axis_0"), val = int32(-1)]; tensor var_6439_0, tensor var_6439_1 = split(axis = var_6439_axis_0, split_sizes = var_6439_split_sizes_0, x = out_135_cast_fp16)[name = string("op_6439")]; fp16 const_160_promoted = const()[name = string("const_160_promoted"), val = fp16(-0x1p+0)]; tensor var_6441 = mul(x = var_6439_1, y = const_160_promoted)[name = string("op_6441")]; int32 var_6443 = const()[name = string("op_6443"), val = int32(-1)]; bool var_6444_interleave_0 = const()[name = string("op_6444_interleave_0"), val = bool(false)]; tensor var_6444 = concat(axis = var_6443, interleave = var_6444_interleave_0, values = (var_6441, var_6439_0))[name = string("op_6444")]; tensor var_6445 = mul(x = var_6444, y = sin)[name = string("op_6445")]; tensor q_71 = add(x = var_6438, y = var_6445)[name = string("q_71")]; tensor var_6448 = mul(x = out_137_cast_fp16, y = cos)[name = string("op_6448")]; tensor var_6449_split_sizes_0 = const()[name = string("op_6449_split_sizes_0"), val = tensor([128, 128])]; int32 var_6449_axis_0 = const()[name = string("op_6449_axis_0"), val = int32(-1)]; tensor var_6449_0, tensor var_6449_1 = split(axis = var_6449_axis_0, split_sizes = var_6449_split_sizes_0, x = out_137_cast_fp16)[name = string("op_6449")]; fp16 const_161_promoted = const()[name = string("const_161_promoted"), val = fp16(-0x1p+0)]; tensor var_6451 = mul(x = var_6449_1, y = const_161_promoted)[name = string("op_6451")]; int32 var_6453 = const()[name = string("op_6453"), val = int32(-1)]; bool var_6454_interleave_0 = const()[name = string("op_6454_interleave_0"), val = bool(false)]; tensor var_6454 = concat(axis = var_6453, interleave = var_6454_interleave_0, values = (var_6451, var_6449_0))[name = string("op_6454")]; tensor var_6455 = mul(x = var_6454, y = sin)[name = string("op_6455")]; tensor k_71 = add(x = var_6448, y = var_6455)[name = string("k_71")]; tensor var_6460_begin_0 = const()[name = string("op_6460_begin_0"), val = tensor([11, 0, 0, 0])]; tensor var_6460_end_0 = const()[name = string("op_6460_end_0"), val = tensor([12, 1, 2048, 256])]; tensor var_6460_end_mask_0 = const()[name = string("op_6460_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6460_squeeze_mask_0 = const()[name = string("op_6460_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_6460_cast_fp16 = slice_by_index(begin = var_6460_begin_0, end = var_6460_end_0, end_mask = var_6460_end_mask_0, squeeze_mask = var_6460_squeeze_mask_0, x = coreml_update_state_57)[name = string("op_6460_cast_fp16")]; tensor K_cache_23_axes_0 = const()[name = string("K_cache_23_axes_0"), val = tensor([0])]; tensor K_cache_23_cast_fp16 = expand_dims(axes = K_cache_23_axes_0, x = var_6460_cast_fp16)[name = string("K_cache_23_cast_fp16")]; tensor var_6465_begin_0 = const()[name = string("op_6465_begin_0"), val = tensor([29, 0, 0, 0])]; tensor var_6465_end_0 = const()[name = string("op_6465_end_0"), val = tensor([30, 1, 2048, 256])]; tensor var_6465_end_mask_0 = const()[name = string("op_6465_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6465_squeeze_mask_0 = const()[name = string("op_6465_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_6465_cast_fp16 = slice_by_index(begin = var_6465_begin_0, end = var_6465_end_0, end_mask = var_6465_end_mask_0, squeeze_mask = var_6465_squeeze_mask_0, x = coreml_update_state_57)[name = string("op_6465_cast_fp16")]; tensor V_cache_23_axes_0 = const()[name = string("V_cache_23_axes_0"), val = tensor([0])]; tensor V_cache_23_cast_fp16 = expand_dims(axes = V_cache_23_axes_0, x = var_6465_cast_fp16)[name = string("V_cache_23_cast_fp16")]; bool k_increment_23_transpose_x_0 = const()[name = string("k_increment_23_transpose_x_0"), val = bool(false)]; bool k_increment_23_transpose_y_0 = const()[name = string("k_increment_23_transpose_y_0"), val = bool(false)]; tensor k_increment_23 = matmul(transpose_x = k_increment_23_transpose_x_0, transpose_y = k_increment_23_transpose_y_0, x = update_mask, y = k_71)[name = string("k_increment_23")]; bool v_increment_23_transpose_x_1 = const()[name = string("v_increment_23_transpose_x_1"), val = bool(false)]; bool v_increment_23_transpose_y_1 = const()[name = string("v_increment_23_transpose_y_1"), val = bool(true)]; tensor v_increment_23 = matmul(transpose_x = v_increment_23_transpose_x_1, transpose_y = v_increment_23_transpose_y_1, x = update_mask, y = var_6431)[name = string("v_increment_23")]; tensor var_6483_cast_fp16 = mul(x = K_cache_23_cast_fp16, y = var_1125_cast_fp16)[name = string("op_6483_cast_fp16")]; tensor K_new_23_cast_fp16 = add(x = var_6483_cast_fp16, y = k_increment_23)[name = string("K_new_23_cast_fp16")]; tensor var_6489_cast_fp16 = mul(x = V_cache_23_cast_fp16, y = var_1125_cast_fp16)[name = string("op_6489_cast_fp16")]; tensor V_new_23_cast_fp16 = add(x = var_6489_cast_fp16, y = v_increment_23)[name = string("V_new_23_cast_fp16")]; tensor var_6493_axes_0 = const()[name = string("op_6493_axes_0"), val = tensor([0])]; tensor var_6493_cast_fp16 = squeeze(axes = var_6493_axes_0, x = K_new_23_cast_fp16)[name = string("op_6493_cast_fp16")]; tensor concat_44 = const()[name = string("concat_44"), val = tensor([11, 0, 0, 0])]; tensor concat_45 = const()[name = string("concat_45"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_23_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_23_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_23_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_44, begin_mask = kv_cache_0_internal_tensor_assign_23_begin_mask_0, end = concat_45, end_mask = kv_cache_0_internal_tensor_assign_23_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_23_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_23_stride_0, update = var_6493_cast_fp16, x = coreml_update_state_57)[name = string("kv_cache_0_internal_tensor_assign_23_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_23_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_58_write_state")]; tensor coreml_update_state_58 = read_state(input = kv_cache_0)[name = string("coreml_update_state_58")]; tensor var_6500_axes_0 = const()[name = string("op_6500_axes_0"), val = tensor([0])]; tensor var_6500_cast_fp16 = squeeze(axes = var_6500_axes_0, x = V_new_23_cast_fp16)[name = string("op_6500_cast_fp16")]; tensor concat_46 = const()[name = string("concat_46"), val = tensor([29, 0, 0, 0])]; tensor concat_47 = const()[name = string("concat_47"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_24_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_24_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_24_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_46, begin_mask = kv_cache_0_internal_tensor_assign_24_begin_mask_0, end = concat_47, end_mask = kv_cache_0_internal_tensor_assign_24_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_24_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_24_stride_0, update = var_6500_cast_fp16, x = coreml_update_state_58)[name = string("kv_cache_0_internal_tensor_assign_24_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_24_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_59_write_state")]; tensor coreml_update_state_59 = read_state(input = kv_cache_0)[name = string("coreml_update_state_59")]; tensor hidden_states_89_axes_0 = const()[name = string("hidden_states_89_axes_0"), val = tensor([2])]; tensor hidden_states_89_cast_fp16 = expand_dims(axes = hidden_states_89_axes_0, x = K_new_23_cast_fp16)[name = string("hidden_states_89_cast_fp16")]; tensor var_6513 = const()[name = string("op_6513"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_91_cast_fp16 = tile(reps = var_6513, x = hidden_states_89_cast_fp16)[name = string("hidden_states_91_cast_fp16")]; tensor var_6519 = const()[name = string("op_6519"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_23_cast_fp16 = reshape(shape = var_6519, x = hidden_states_91_cast_fp16)[name = string("K_expanded_23_cast_fp16")]; tensor hidden_states_93_axes_0 = const()[name = string("hidden_states_93_axes_0"), val = tensor([2])]; tensor hidden_states_93_cast_fp16 = expand_dims(axes = hidden_states_93_axes_0, x = V_new_23_cast_fp16)[name = string("hidden_states_93_cast_fp16")]; tensor var_6528 = const()[name = string("op_6528"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_95_cast_fp16 = tile(reps = var_6528, x = hidden_states_93_cast_fp16)[name = string("hidden_states_95_cast_fp16")]; tensor var_6534 = const()[name = string("op_6534"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_23_cast_fp16 = reshape(shape = var_6534, x = hidden_states_95_cast_fp16)[name = string("V_expanded_23_cast_fp16")]; bool var_6549_transpose_x_1 = const()[name = string("op_6549_transpose_x_1"), val = bool(false)]; bool var_6549_transpose_y_1 = const()[name = string("op_6549_transpose_y_1"), val = bool(true)]; tensor var_6549_cast_fp16 = matmul(transpose_x = var_6549_transpose_x_1, transpose_y = var_6549_transpose_y_1, x = q_71, y = K_expanded_23_cast_fp16)[name = string("op_6549_cast_fp16")]; fp16 var_6550_to_fp16 = const()[name = string("op_6550_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_67_cast_fp16 = mul(x = var_6549_cast_fp16, y = var_6550_to_fp16)[name = string("attn_weights_67_cast_fp16")]; tensor attn_weights_69_cast_fp16 = add(x = attn_weights_67_cast_fp16, y = causal_mask)[name = string("attn_weights_69_cast_fp16")]; int32 var_6559 = const()[name = string("op_6559"), val = int32(-1)]; tensor var_6561_cast_fp16 = softmax(axis = var_6559, x = attn_weights_69_cast_fp16)[name = string("op_6561_cast_fp16")]; bool var_6577_transpose_x_0 = const()[name = string("op_6577_transpose_x_0"), val = bool(false)]; bool var_6577_transpose_y_0 = const()[name = string("op_6577_transpose_y_0"), val = bool(false)]; tensor var_6577_cast_fp16 = matmul(transpose_x = var_6577_transpose_x_0, transpose_y = var_6577_transpose_y_0, x = var_6561_cast_fp16, y = V_expanded_23_cast_fp16)[name = string("op_6577_cast_fp16")]; tensor var_6587 = const()[name = string("op_6587"), val = tensor([0, 2, 1, 3])]; tensor var_6594 = const()[name = string("op_6594"), val = tensor([1, 32, 1024])]; tensor var_6588 = transpose(perm = var_6587, x = var_6577_cast_fp16)[name = string("transpose_54")]; tensor attn_output_69 = reshape(shape = var_6594, x = var_6588)[name = string("attn_output_69")]; tensor var_6599 = const()[name = string("op_6599"), val = tensor([0, 2, 1])]; tensor squeeze_11_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268536192))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269191616))))[name = string("squeeze_11_quantized")]; string var_6615_pad_type_0 = const()[name = string("op_6615_pad_type_0"), val = string("valid")]; int32 var_6615_groups_0 = const()[name = string("op_6615_groups_0"), val = int32(1)]; tensor var_6615_strides_0 = const()[name = string("op_6615_strides_0"), val = tensor([1])]; tensor var_6615_pad_0 = const()[name = string("op_6615_pad_0"), val = tensor([0, 0])]; tensor var_6615_dilations_0 = const()[name = string("op_6615_dilations_0"), val = tensor([1])]; tensor var_6600 = transpose(perm = var_6599, x = attn_output_69)[name = string("transpose_53")]; tensor var_6615 = conv(dilations = var_6615_dilations_0, groups = var_6615_groups_0, pad = var_6615_pad_0, pad_type = var_6615_pad_type_0, strides = var_6615_strides_0, weight = squeeze_11_quantized, x = var_6600)[name = string("op_6615")]; tensor var_6619 = const()[name = string("op_6619"), val = tensor([0, 2, 1])]; int32 var_6626 = const()[name = string("op_6626"), val = int32(-1)]; fp16 const_162_promoted_to_fp16 = const()[name = string("const_162_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_185 = transpose(perm = var_6619, x = var_6615)[name = string("transpose_52")]; tensor var_6632_cast_fp16 = mul(x = x_185, y = const_162_promoted_to_fp16)[name = string("op_6632_cast_fp16")]; bool input_231_interleave_0 = const()[name = string("input_231_interleave_0"), val = bool(false)]; tensor input_231_cast_fp16 = concat(axis = var_6626, interleave = input_231_interleave_0, values = (x_185, var_6632_cast_fp16))[name = string("input_231_cast_fp16")]; tensor normed_323_axes_0 = const()[name = string("normed_323_axes_0"), val = tensor([-1])]; fp16 var_6624_to_fp16 = const()[name = string("op_6624_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_323_cast_fp16 = layer_norm(axes = normed_323_axes_0, epsilon = var_6624_to_fp16, x = input_231_cast_fp16)[name = string("normed_323_cast_fp16")]; tensor var_6637_split_sizes_0 = const()[name = string("op_6637_split_sizes_0"), val = tensor([640, 640])]; int32 var_6637_axis_0 = const()[name = string("op_6637_axis_0"), val = int32(-1)]; tensor var_6637_cast_fp16_0, tensor var_6637_cast_fp16_1 = split(axis = var_6637_axis_0, split_sizes = var_6637_split_sizes_0, x = normed_323_cast_fp16)[name = string("op_6637_cast_fp16")]; tensor var_6641_to_fp16 = const()[name = string("op_6641_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269192960)))]; tensor out_139_cast_fp16 = mul(x = var_6637_cast_fp16_0, y = var_6641_to_fp16)[name = string("out_139_cast_fp16")]; tensor x_187_cast_fp16 = add(x = x_177_cast_fp16, y = out_139_cast_fp16)[name = string("x_187_cast_fp16")]; int32 var_6655 = const()[name = string("op_6655"), val = int32(-1)]; fp16 const_164_promoted_to_fp16 = const()[name = string("const_164_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6661_cast_fp16 = mul(x = x_187_cast_fp16, y = const_164_promoted_to_fp16)[name = string("op_6661_cast_fp16")]; bool input_233_interleave_0 = const()[name = string("input_233_interleave_0"), val = bool(false)]; tensor input_233_cast_fp16 = concat(axis = var_6655, interleave = input_233_interleave_0, values = (x_187_cast_fp16, var_6661_cast_fp16))[name = string("input_233_cast_fp16")]; tensor normed_327_axes_0 = const()[name = string("normed_327_axes_0"), val = tensor([-1])]; fp16 var_6653_to_fp16 = const()[name = string("op_6653_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_327_cast_fp16 = layer_norm(axes = normed_327_axes_0, epsilon = var_6653_to_fp16, x = input_233_cast_fp16)[name = string("normed_327_cast_fp16")]; tensor var_6666_split_sizes_0 = const()[name = string("op_6666_split_sizes_0"), val = tensor([640, 640])]; int32 var_6666_axis_0 = const()[name = string("op_6666_axis_0"), val = int32(-1)]; tensor var_6666_cast_fp16_0, tensor var_6666_cast_fp16_1 = split(axis = var_6666_axis_0, split_sizes = var_6666_split_sizes_0, x = normed_327_cast_fp16)[name = string("op_6666_cast_fp16")]; tensor var_6670_to_fp16 = const()[name = string("op_6670_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269194304)))]; tensor out_141_cast_fp16 = mul(x = var_6666_cast_fp16_0, y = var_6670_to_fp16)[name = string("out_141_cast_fp16")]; tensor var_6684 = const()[name = string("op_6684"), val = tensor([0, 2, 1])]; tensor input_235_axes_0 = const()[name = string("input_235_axes_0"), val = tensor([2])]; tensor var_6685 = transpose(perm = var_6684, x = out_141_cast_fp16)[name = string("transpose_51")]; tensor input_235 = expand_dims(axes = input_235_axes_0, x = var_6685)[name = string("input_235")]; string gate_45_pad_type_0 = const()[name = string("gate_45_pad_type_0"), val = string("valid")]; tensor gate_45_strides_0 = const()[name = string("gate_45_strides_0"), val = tensor([1, 1])]; tensor gate_45_pad_0 = const()[name = string("gate_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_45_dilations_0 = const()[name = string("gate_45_dilations_0"), val = tensor([1, 1])]; int32 gate_45_groups_0 = const()[name = string("gate_45_groups_0"), val = int32(1)]; tensor gate_45 = conv(dilations = gate_45_dilations_0, groups = gate_45_groups_0, pad = gate_45_pad_0, pad_type = gate_45_pad_type_0, strides = gate_45_strides_0, weight = layers_11_mlp_gate_proj_weight_quantized, x = input_235)[name = string("gate_45")]; string up_23_pad_type_0 = const()[name = string("up_23_pad_type_0"), val = string("valid")]; tensor up_23_strides_0 = const()[name = string("up_23_strides_0"), val = tensor([1, 1])]; tensor up_23_pad_0 = const()[name = string("up_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_23_dilations_0 = const()[name = string("up_23_dilations_0"), val = tensor([1, 1])]; int32 up_23_groups_0 = const()[name = string("up_23_groups_0"), val = int32(1)]; tensor up_23 = conv(dilations = up_23_dilations_0, groups = up_23_groups_0, pad = up_23_pad_0, pad_type = up_23_pad_type_0, strides = up_23_strides_0, weight = layers_11_mlp_up_proj_weight_quantized, x = input_235)[name = string("up_23")]; string gate_47_mode_0 = const()[name = string("gate_47_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_47 = gelu(mode = gate_47_mode_0, x = gate_45)[name = string("gate_47")]; tensor input_237 = mul(x = gate_47, y = up_23)[name = string("input_237")]; string var_6723_pad_type_0 = const()[name = string("op_6723_pad_type_0"), val = string("valid")]; tensor var_6723_strides_0 = const()[name = string("op_6723_strides_0"), val = tensor([1, 1])]; tensor var_6723_pad_0 = const()[name = string("op_6723_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6723_dilations_0 = const()[name = string("op_6723_dilations_0"), val = tensor([1, 1])]; int32 var_6723_groups_0 = const()[name = string("op_6723_groups_0"), val = int32(1)]; tensor var_6723 = conv(dilations = var_6723_dilations_0, groups = var_6723_groups_0, pad = var_6723_pad_0, pad_type = var_6723_pad_type_0, strides = var_6723_strides_0, weight = layers_11_mlp_down_proj_weight_quantized, x = input_237)[name = string("op_6723")]; tensor var_6725_axes_0 = const()[name = string("op_6725_axes_0"), val = tensor([2])]; tensor var_6725 = squeeze(axes = var_6725_axes_0, x = var_6723)[name = string("op_6725")]; tensor var_6729 = const()[name = string("op_6729"), val = tensor([0, 2, 1])]; int32 var_6736 = const()[name = string("op_6736"), val = int32(-1)]; fp16 const_166_promoted_to_fp16 = const()[name = string("const_166_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_191 = transpose(perm = var_6729, x = var_6725)[name = string("transpose_50")]; tensor var_6742_cast_fp16 = mul(x = x_191, y = const_166_promoted_to_fp16)[name = string("op_6742_cast_fp16")]; bool input_239_interleave_0 = const()[name = string("input_239_interleave_0"), val = bool(false)]; tensor input_239_cast_fp16 = concat(axis = var_6736, interleave = input_239_interleave_0, values = (x_191, var_6742_cast_fp16))[name = string("input_239_cast_fp16")]; tensor normed_333_axes_0 = const()[name = string("normed_333_axes_0"), val = tensor([-1])]; fp16 var_6734_to_fp16 = const()[name = string("op_6734_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_333_cast_fp16 = layer_norm(axes = normed_333_axes_0, epsilon = var_6734_to_fp16, x = input_239_cast_fp16)[name = string("normed_333_cast_fp16")]; tensor var_6747_split_sizes_0 = const()[name = string("op_6747_split_sizes_0"), val = tensor([640, 640])]; int32 var_6747_axis_0 = const()[name = string("op_6747_axis_0"), val = int32(-1)]; tensor var_6747_cast_fp16_0, tensor var_6747_cast_fp16_1 = split(axis = var_6747_axis_0, split_sizes = var_6747_split_sizes_0, x = normed_333_cast_fp16)[name = string("op_6747_cast_fp16")]; tensor var_6751_to_fp16 = const()[name = string("op_6751_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269195648)))]; tensor out_143_cast_fp16 = mul(x = var_6747_cast_fp16_0, y = var_6751_to_fp16)[name = string("out_143_cast_fp16")]; tensor x_193_cast_fp16 = add(x = x_187_cast_fp16, y = out_143_cast_fp16)[name = string("x_193_cast_fp16")]; int32 var_6765 = const()[name = string("op_6765"), val = int32(-1)]; fp16 const_168_promoted_to_fp16 = const()[name = string("const_168_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6771_cast_fp16 = mul(x = x_193_cast_fp16, y = const_168_promoted_to_fp16)[name = string("op_6771_cast_fp16")]; bool input_241_interleave_0 = const()[name = string("input_241_interleave_0"), val = bool(false)]; tensor input_241_cast_fp16 = concat(axis = var_6765, interleave = input_241_interleave_0, values = (x_193_cast_fp16, var_6771_cast_fp16))[name = string("input_241_cast_fp16")]; tensor normed_337_axes_0 = const()[name = string("normed_337_axes_0"), val = tensor([-1])]; fp16 var_6763_to_fp16 = const()[name = string("op_6763_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_337_cast_fp16 = layer_norm(axes = normed_337_axes_0, epsilon = var_6763_to_fp16, x = input_241_cast_fp16)[name = string("normed_337_cast_fp16")]; tensor var_6776_split_sizes_0 = const()[name = string("op_6776_split_sizes_0"), val = tensor([640, 640])]; int32 var_6776_axis_0 = const()[name = string("op_6776_axis_0"), val = int32(-1)]; tensor var_6776_cast_fp16_0, tensor var_6776_cast_fp16_1 = split(axis = var_6776_axis_0, split_sizes = var_6776_split_sizes_0, x = normed_337_cast_fp16)[name = string("op_6776_cast_fp16")]; tensor var_6780_to_fp16 = const()[name = string("op_6780_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269196992)))]; tensor out_145_cast_fp16 = mul(x = var_6776_cast_fp16_0, y = var_6780_to_fp16)[name = string("out_145_cast_fp16")]; tensor var_6794 = const()[name = string("op_6794"), val = tensor([0, 2, 1])]; tensor input_243_axes_0 = const()[name = string("input_243_axes_0"), val = tensor([2])]; tensor var_6795 = transpose(perm = var_6794, x = out_145_cast_fp16)[name = string("transpose_49")]; tensor input_243 = expand_dims(axes = input_243_axes_0, x = var_6795)[name = string("input_243")]; string var_6808_pad_type_0 = const()[name = string("op_6808_pad_type_0"), val = string("valid")]; tensor var_6808_strides_0 = const()[name = string("op_6808_strides_0"), val = tensor([1, 1])]; tensor var_6808_pad_0 = const()[name = string("op_6808_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6808_dilations_0 = const()[name = string("op_6808_dilations_0"), val = tensor([1, 1])]; int32 var_6808_groups_0 = const()[name = string("op_6808_groups_0"), val = int32(1)]; tensor var_6808 = conv(dilations = var_6808_dilations_0, groups = var_6808_groups_0, pad = var_6808_pad_0, pad_type = var_6808_pad_type_0, strides = var_6808_strides_0, weight = layers_12_self_attn_q_proj_weight_quantized, x = input_243)[name = string("op_6808")]; tensor var_6813 = const()[name = string("op_6813"), val = tensor([1, 4, 256, 32])]; tensor var_6814 = reshape(shape = var_6813, x = var_6808)[name = string("op_6814")]; tensor var_6819 = const()[name = string("op_6819"), val = tensor([0, 1, 3, 2])]; int32 var_6832 = const()[name = string("op_6832"), val = int32(-1)]; fp16 const_170_promoted_to_fp16 = const()[name = string("const_170_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_73 = transpose(perm = var_6819, x = var_6814)[name = string("transpose_48")]; tensor var_6838_cast_fp16 = mul(x = q_73, y = const_170_promoted_to_fp16)[name = string("op_6838_cast_fp16")]; bool input_245_interleave_0 = const()[name = string("input_245_interleave_0"), val = bool(false)]; tensor input_245_cast_fp16 = concat(axis = var_6832, interleave = input_245_interleave_0, values = (q_73, var_6838_cast_fp16))[name = string("input_245_cast_fp16")]; tensor normed_343_axes_0 = const()[name = string("normed_343_axes_0"), val = tensor([-1])]; fp16 var_6830_to_fp16 = const()[name = string("op_6830_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_343_cast_fp16 = layer_norm(axes = normed_343_axes_0, epsilon = var_6830_to_fp16, x = input_245_cast_fp16)[name = string("normed_343_cast_fp16")]; tensor var_6843_split_sizes_0 = const()[name = string("op_6843_split_sizes_0"), val = tensor([256, 256])]; int32 var_6843_axis_0 = const()[name = string("op_6843_axis_0"), val = int32(-1)]; tensor var_6843_cast_fp16_0, tensor var_6843_cast_fp16_1 = split(axis = var_6843_axis_0, split_sizes = var_6843_split_sizes_0, x = normed_343_cast_fp16)[name = string("op_6843_cast_fp16")]; tensor var_6847_to_fp16 = const()[name = string("op_6847_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269198336)))]; tensor out_147_cast_fp16 = mul(x = var_6843_cast_fp16_0, y = var_6847_to_fp16)[name = string("out_147_cast_fp16")]; string var_6860_pad_type_0 = const()[name = string("op_6860_pad_type_0"), val = string("valid")]; tensor var_6860_strides_0 = const()[name = string("op_6860_strides_0"), val = tensor([1, 1])]; tensor var_6860_pad_0 = const()[name = string("op_6860_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6860_dilations_0 = const()[name = string("op_6860_dilations_0"), val = tensor([1, 1])]; int32 var_6860_groups_0 = const()[name = string("op_6860_groups_0"), val = int32(1)]; tensor var_6860 = conv(dilations = var_6860_dilations_0, groups = var_6860_groups_0, pad = var_6860_pad_0, pad_type = var_6860_pad_type_0, strides = var_6860_strides_0, weight = layers_12_self_attn_k_proj_weight_quantized, x = input_243)[name = string("op_6860")]; tensor var_6865 = const()[name = string("op_6865"), val = tensor([1, 1, 256, 32])]; tensor var_6866 = reshape(shape = var_6865, x = var_6860)[name = string("op_6866")]; tensor var_6871 = const()[name = string("op_6871"), val = tensor([0, 1, 3, 2])]; int32 var_6884 = const()[name = string("op_6884"), val = int32(-1)]; fp16 const_172_promoted_to_fp16 = const()[name = string("const_172_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_73 = transpose(perm = var_6871, x = var_6866)[name = string("transpose_47")]; tensor var_6890_cast_fp16 = mul(x = k_73, y = const_172_promoted_to_fp16)[name = string("op_6890_cast_fp16")]; bool input_247_interleave_0 = const()[name = string("input_247_interleave_0"), val = bool(false)]; tensor input_247_cast_fp16 = concat(axis = var_6884, interleave = input_247_interleave_0, values = (k_73, var_6890_cast_fp16))[name = string("input_247_cast_fp16")]; tensor normed_347_axes_0 = const()[name = string("normed_347_axes_0"), val = tensor([-1])]; fp16 var_6882_to_fp16 = const()[name = string("op_6882_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_347_cast_fp16 = layer_norm(axes = normed_347_axes_0, epsilon = var_6882_to_fp16, x = input_247_cast_fp16)[name = string("normed_347_cast_fp16")]; tensor var_6895_split_sizes_0 = const()[name = string("op_6895_split_sizes_0"), val = tensor([256, 256])]; int32 var_6895_axis_0 = const()[name = string("op_6895_axis_0"), val = int32(-1)]; tensor var_6895_cast_fp16_0, tensor var_6895_cast_fp16_1 = split(axis = var_6895_axis_0, split_sizes = var_6895_split_sizes_0, x = normed_347_cast_fp16)[name = string("op_6895_cast_fp16")]; tensor var_6899_to_fp16 = const()[name = string("op_6899_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269198912)))]; tensor out_149_cast_fp16 = mul(x = var_6895_cast_fp16_0, y = var_6899_to_fp16)[name = string("out_149_cast_fp16")]; string var_6912_pad_type_0 = const()[name = string("op_6912_pad_type_0"), val = string("valid")]; tensor var_6912_strides_0 = const()[name = string("op_6912_strides_0"), val = tensor([1, 1])]; tensor var_6912_pad_0 = const()[name = string("op_6912_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6912_dilations_0 = const()[name = string("op_6912_dilations_0"), val = tensor([1, 1])]; int32 var_6912_groups_0 = const()[name = string("op_6912_groups_0"), val = int32(1)]; tensor var_6912 = conv(dilations = var_6912_dilations_0, groups = var_6912_groups_0, pad = var_6912_pad_0, pad_type = var_6912_pad_type_0, strides = var_6912_strides_0, weight = layers_12_self_attn_v_proj_weight_quantized, x = input_243)[name = string("op_6912")]; tensor var_6917 = const()[name = string("op_6917"), val = tensor([1, 1, 256, 32])]; tensor var_6918 = reshape(shape = var_6917, x = var_6912)[name = string("op_6918")]; tensor var_6925 = mul(x = out_147_cast_fp16, y = cos_1)[name = string("op_6925")]; tensor var_6926_split_sizes_0 = const()[name = string("op_6926_split_sizes_0"), val = tensor([128, 128])]; int32 var_6926_axis_0 = const()[name = string("op_6926_axis_0"), val = int32(-1)]; tensor var_6926_0, tensor var_6926_1 = split(axis = var_6926_axis_0, split_sizes = var_6926_split_sizes_0, x = out_147_cast_fp16)[name = string("op_6926")]; fp16 const_174_promoted = const()[name = string("const_174_promoted"), val = fp16(-0x1p+0)]; tensor var_6928 = mul(x = var_6926_1, y = const_174_promoted)[name = string("op_6928")]; int32 var_6930 = const()[name = string("op_6930"), val = int32(-1)]; bool var_6931_interleave_0 = const()[name = string("op_6931_interleave_0"), val = bool(false)]; tensor var_6931 = concat(axis = var_6930, interleave = var_6931_interleave_0, values = (var_6928, var_6926_0))[name = string("op_6931")]; tensor var_6932 = mul(x = var_6931, y = sin_1)[name = string("op_6932")]; tensor q_77 = add(x = var_6925, y = var_6932)[name = string("q_77")]; tensor var_6935 = mul(x = out_149_cast_fp16, y = cos_1)[name = string("op_6935")]; tensor var_6936_split_sizes_0 = const()[name = string("op_6936_split_sizes_0"), val = tensor([128, 128])]; int32 var_6936_axis_0 = const()[name = string("op_6936_axis_0"), val = int32(-1)]; tensor var_6936_0, tensor var_6936_1 = split(axis = var_6936_axis_0, split_sizes = var_6936_split_sizes_0, x = out_149_cast_fp16)[name = string("op_6936")]; fp16 const_175_promoted = const()[name = string("const_175_promoted"), val = fp16(-0x1p+0)]; tensor var_6938 = mul(x = var_6936_1, y = const_175_promoted)[name = string("op_6938")]; int32 var_6940 = const()[name = string("op_6940"), val = int32(-1)]; bool var_6941_interleave_0 = const()[name = string("op_6941_interleave_0"), val = bool(false)]; tensor var_6941 = concat(axis = var_6940, interleave = var_6941_interleave_0, values = (var_6938, var_6936_0))[name = string("op_6941")]; tensor var_6942 = mul(x = var_6941, y = sin_1)[name = string("op_6942")]; tensor k_77 = add(x = var_6935, y = var_6942)[name = string("k_77")]; tensor var_6947_begin_0 = const()[name = string("op_6947_begin_0"), val = tensor([12, 0, 0, 0])]; tensor var_6947_end_0 = const()[name = string("op_6947_end_0"), val = tensor([13, 1, 2048, 256])]; tensor var_6947_end_mask_0 = const()[name = string("op_6947_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6947_squeeze_mask_0 = const()[name = string("op_6947_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_6947_cast_fp16 = slice_by_index(begin = var_6947_begin_0, end = var_6947_end_0, end_mask = var_6947_end_mask_0, squeeze_mask = var_6947_squeeze_mask_0, x = coreml_update_state_59)[name = string("op_6947_cast_fp16")]; tensor K_cache_25_axes_0 = const()[name = string("K_cache_25_axes_0"), val = tensor([0])]; tensor K_cache_25_cast_fp16 = expand_dims(axes = K_cache_25_axes_0, x = var_6947_cast_fp16)[name = string("K_cache_25_cast_fp16")]; tensor var_6952_begin_0 = const()[name = string("op_6952_begin_0"), val = tensor([30, 0, 0, 0])]; tensor var_6952_end_0 = const()[name = string("op_6952_end_0"), val = tensor([31, 1, 2048, 256])]; tensor var_6952_end_mask_0 = const()[name = string("op_6952_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6952_squeeze_mask_0 = const()[name = string("op_6952_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_6952_cast_fp16 = slice_by_index(begin = var_6952_begin_0, end = var_6952_end_0, end_mask = var_6952_end_mask_0, squeeze_mask = var_6952_squeeze_mask_0, x = coreml_update_state_59)[name = string("op_6952_cast_fp16")]; tensor V_cache_25_axes_0 = const()[name = string("V_cache_25_axes_0"), val = tensor([0])]; tensor V_cache_25_cast_fp16 = expand_dims(axes = V_cache_25_axes_0, x = var_6952_cast_fp16)[name = string("V_cache_25_cast_fp16")]; bool k_increment_25_transpose_x_0 = const()[name = string("k_increment_25_transpose_x_0"), val = bool(false)]; bool k_increment_25_transpose_y_0 = const()[name = string("k_increment_25_transpose_y_0"), val = bool(false)]; tensor k_increment_25 = matmul(transpose_x = k_increment_25_transpose_x_0, transpose_y = k_increment_25_transpose_y_0, x = update_mask, y = k_77)[name = string("k_increment_25")]; bool v_increment_25_transpose_x_1 = const()[name = string("v_increment_25_transpose_x_1"), val = bool(false)]; bool v_increment_25_transpose_y_1 = const()[name = string("v_increment_25_transpose_y_1"), val = bool(true)]; tensor v_increment_25 = matmul(transpose_x = v_increment_25_transpose_x_1, transpose_y = v_increment_25_transpose_y_1, x = update_mask, y = var_6918)[name = string("v_increment_25")]; tensor var_6970_cast_fp16 = mul(x = K_cache_25_cast_fp16, y = var_1125_cast_fp16)[name = string("op_6970_cast_fp16")]; tensor K_new_25_cast_fp16 = add(x = var_6970_cast_fp16, y = k_increment_25)[name = string("K_new_25_cast_fp16")]; tensor var_6976_cast_fp16 = mul(x = V_cache_25_cast_fp16, y = var_1125_cast_fp16)[name = string("op_6976_cast_fp16")]; tensor V_new_25_cast_fp16 = add(x = var_6976_cast_fp16, y = v_increment_25)[name = string("V_new_25_cast_fp16")]; tensor var_6980_axes_0 = const()[name = string("op_6980_axes_0"), val = tensor([0])]; tensor var_6980_cast_fp16 = squeeze(axes = var_6980_axes_0, x = K_new_25_cast_fp16)[name = string("op_6980_cast_fp16")]; tensor concat_48 = const()[name = string("concat_48"), val = tensor([12, 0, 0, 0])]; tensor concat_49 = const()[name = string("concat_49"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_25_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_25_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_25_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_48, begin_mask = kv_cache_0_internal_tensor_assign_25_begin_mask_0, end = concat_49, end_mask = kv_cache_0_internal_tensor_assign_25_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_25_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_25_stride_0, update = var_6980_cast_fp16, x = coreml_update_state_59)[name = string("kv_cache_0_internal_tensor_assign_25_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_25_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_60_write_state")]; tensor coreml_update_state_60 = read_state(input = kv_cache_0)[name = string("coreml_update_state_60")]; tensor var_6987_axes_0 = const()[name = string("op_6987_axes_0"), val = tensor([0])]; tensor var_6987_cast_fp16 = squeeze(axes = var_6987_axes_0, x = V_new_25_cast_fp16)[name = string("op_6987_cast_fp16")]; tensor concat_50 = const()[name = string("concat_50"), val = tensor([30, 0, 0, 0])]; tensor concat_51 = const()[name = string("concat_51"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_26_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_26_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_26_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_50, begin_mask = kv_cache_0_internal_tensor_assign_26_begin_mask_0, end = concat_51, end_mask = kv_cache_0_internal_tensor_assign_26_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_26_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_26_stride_0, update = var_6987_cast_fp16, x = coreml_update_state_60)[name = string("kv_cache_0_internal_tensor_assign_26_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_26_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_61_write_state")]; tensor coreml_update_state_61 = read_state(input = kv_cache_0)[name = string("coreml_update_state_61")]; tensor hidden_states_97_axes_0 = const()[name = string("hidden_states_97_axes_0"), val = tensor([2])]; tensor hidden_states_97_cast_fp16 = expand_dims(axes = hidden_states_97_axes_0, x = K_new_25_cast_fp16)[name = string("hidden_states_97_cast_fp16")]; tensor var_7000 = const()[name = string("op_7000"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_99_cast_fp16 = tile(reps = var_7000, x = hidden_states_97_cast_fp16)[name = string("hidden_states_99_cast_fp16")]; tensor var_7006 = const()[name = string("op_7006"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_25_cast_fp16 = reshape(shape = var_7006, x = hidden_states_99_cast_fp16)[name = string("K_expanded_25_cast_fp16")]; tensor hidden_states_101_axes_0 = const()[name = string("hidden_states_101_axes_0"), val = tensor([2])]; tensor hidden_states_101_cast_fp16 = expand_dims(axes = hidden_states_101_axes_0, x = V_new_25_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; tensor var_7015 = const()[name = string("op_7015"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_103_cast_fp16 = tile(reps = var_7015, x = hidden_states_101_cast_fp16)[name = string("hidden_states_103_cast_fp16")]; tensor var_7021 = const()[name = string("op_7021"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_25_cast_fp16 = reshape(shape = var_7021, x = hidden_states_103_cast_fp16)[name = string("V_expanded_25_cast_fp16")]; bool var_7036_transpose_x_1 = const()[name = string("op_7036_transpose_x_1"), val = bool(false)]; bool var_7036_transpose_y_1 = const()[name = string("op_7036_transpose_y_1"), val = bool(true)]; tensor var_7036_cast_fp16 = matmul(transpose_x = var_7036_transpose_x_1, transpose_y = var_7036_transpose_y_1, x = q_77, y = K_expanded_25_cast_fp16)[name = string("op_7036_cast_fp16")]; fp16 var_7037_to_fp16 = const()[name = string("op_7037_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_73_cast_fp16 = mul(x = var_7036_cast_fp16, y = var_7037_to_fp16)[name = string("attn_weights_73_cast_fp16")]; tensor attn_weights_75_cast_fp16 = add(x = attn_weights_73_cast_fp16, y = causal_mask)[name = string("attn_weights_75_cast_fp16")]; int32 var_7046 = const()[name = string("op_7046"), val = int32(-1)]; tensor var_7048_cast_fp16 = softmax(axis = var_7046, x = attn_weights_75_cast_fp16)[name = string("op_7048_cast_fp16")]; bool var_7064_transpose_x_0 = const()[name = string("op_7064_transpose_x_0"), val = bool(false)]; bool var_7064_transpose_y_0 = const()[name = string("op_7064_transpose_y_0"), val = bool(false)]; tensor var_7064_cast_fp16 = matmul(transpose_x = var_7064_transpose_x_0, transpose_y = var_7064_transpose_y_0, x = var_7048_cast_fp16, y = V_expanded_25_cast_fp16)[name = string("op_7064_cast_fp16")]; tensor var_7074 = const()[name = string("op_7074"), val = tensor([0, 2, 1, 3])]; tensor var_7081 = const()[name = string("op_7081"), val = tensor([1, 32, 1024])]; tensor var_7075 = transpose(perm = var_7074, x = var_7064_cast_fp16)[name = string("transpose_46")]; tensor attn_output_75 = reshape(shape = var_7081, x = var_7075)[name = string("attn_output_75")]; tensor var_7086 = const()[name = string("op_7086"), val = tensor([0, 2, 1])]; tensor squeeze_12_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269199488))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269854912))))[name = string("squeeze_12_quantized")]; string var_7102_pad_type_0 = const()[name = string("op_7102_pad_type_0"), val = string("valid")]; int32 var_7102_groups_0 = const()[name = string("op_7102_groups_0"), val = int32(1)]; tensor var_7102_strides_0 = const()[name = string("op_7102_strides_0"), val = tensor([1])]; tensor var_7102_pad_0 = const()[name = string("op_7102_pad_0"), val = tensor([0, 0])]; tensor var_7102_dilations_0 = const()[name = string("op_7102_dilations_0"), val = tensor([1])]; tensor var_7087 = transpose(perm = var_7086, x = attn_output_75)[name = string("transpose_45")]; tensor var_7102 = conv(dilations = var_7102_dilations_0, groups = var_7102_groups_0, pad = var_7102_pad_0, pad_type = var_7102_pad_type_0, strides = var_7102_strides_0, weight = squeeze_12_quantized, x = var_7087)[name = string("op_7102")]; tensor var_7106 = const()[name = string("op_7106"), val = tensor([0, 2, 1])]; int32 var_7113 = const()[name = string("op_7113"), val = int32(-1)]; fp16 const_176_promoted_to_fp16 = const()[name = string("const_176_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_201 = transpose(perm = var_7106, x = var_7102)[name = string("transpose_44")]; tensor var_7119_cast_fp16 = mul(x = x_201, y = const_176_promoted_to_fp16)[name = string("op_7119_cast_fp16")]; bool input_251_interleave_0 = const()[name = string("input_251_interleave_0"), val = bool(false)]; tensor input_251_cast_fp16 = concat(axis = var_7113, interleave = input_251_interleave_0, values = (x_201, var_7119_cast_fp16))[name = string("input_251_cast_fp16")]; tensor normed_351_axes_0 = const()[name = string("normed_351_axes_0"), val = tensor([-1])]; fp16 var_7111_to_fp16 = const()[name = string("op_7111_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_351_cast_fp16 = layer_norm(axes = normed_351_axes_0, epsilon = var_7111_to_fp16, x = input_251_cast_fp16)[name = string("normed_351_cast_fp16")]; tensor var_7124_split_sizes_0 = const()[name = string("op_7124_split_sizes_0"), val = tensor([640, 640])]; int32 var_7124_axis_0 = const()[name = string("op_7124_axis_0"), val = int32(-1)]; tensor var_7124_cast_fp16_0, tensor var_7124_cast_fp16_1 = split(axis = var_7124_axis_0, split_sizes = var_7124_split_sizes_0, x = normed_351_cast_fp16)[name = string("op_7124_cast_fp16")]; tensor var_7128_to_fp16 = const()[name = string("op_7128_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269856256)))]; tensor out_151_cast_fp16 = mul(x = var_7124_cast_fp16_0, y = var_7128_to_fp16)[name = string("out_151_cast_fp16")]; tensor x_203_cast_fp16 = add(x = x_193_cast_fp16, y = out_151_cast_fp16)[name = string("x_203_cast_fp16")]; int32 var_7142 = const()[name = string("op_7142"), val = int32(-1)]; fp16 const_178_promoted_to_fp16 = const()[name = string("const_178_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7148_cast_fp16 = mul(x = x_203_cast_fp16, y = const_178_promoted_to_fp16)[name = string("op_7148_cast_fp16")]; bool input_253_interleave_0 = const()[name = string("input_253_interleave_0"), val = bool(false)]; tensor input_253_cast_fp16 = concat(axis = var_7142, interleave = input_253_interleave_0, values = (x_203_cast_fp16, var_7148_cast_fp16))[name = string("input_253_cast_fp16")]; tensor normed_355_axes_0 = const()[name = string("normed_355_axes_0"), val = tensor([-1])]; fp16 var_7140_to_fp16 = const()[name = string("op_7140_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_355_cast_fp16 = layer_norm(axes = normed_355_axes_0, epsilon = var_7140_to_fp16, x = input_253_cast_fp16)[name = string("normed_355_cast_fp16")]; tensor var_7153_split_sizes_0 = const()[name = string("op_7153_split_sizes_0"), val = tensor([640, 640])]; int32 var_7153_axis_0 = const()[name = string("op_7153_axis_0"), val = int32(-1)]; tensor var_7153_cast_fp16_0, tensor var_7153_cast_fp16_1 = split(axis = var_7153_axis_0, split_sizes = var_7153_split_sizes_0, x = normed_355_cast_fp16)[name = string("op_7153_cast_fp16")]; tensor var_7157_to_fp16 = const()[name = string("op_7157_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269857600)))]; tensor out_153_cast_fp16 = mul(x = var_7153_cast_fp16_0, y = var_7157_to_fp16)[name = string("out_153_cast_fp16")]; tensor var_7171 = const()[name = string("op_7171"), val = tensor([0, 2, 1])]; tensor input_255_axes_0 = const()[name = string("input_255_axes_0"), val = tensor([2])]; tensor var_7172 = transpose(perm = var_7171, x = out_153_cast_fp16)[name = string("transpose_43")]; tensor input_255 = expand_dims(axes = input_255_axes_0, x = var_7172)[name = string("input_255")]; string gate_49_pad_type_0 = const()[name = string("gate_49_pad_type_0"), val = string("valid")]; tensor gate_49_strides_0 = const()[name = string("gate_49_strides_0"), val = tensor([1, 1])]; tensor gate_49_pad_0 = const()[name = string("gate_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_49_dilations_0 = const()[name = string("gate_49_dilations_0"), val = tensor([1, 1])]; int32 gate_49_groups_0 = const()[name = string("gate_49_groups_0"), val = int32(1)]; tensor gate_49 = conv(dilations = gate_49_dilations_0, groups = gate_49_groups_0, pad = gate_49_pad_0, pad_type = gate_49_pad_type_0, strides = gate_49_strides_0, weight = layers_12_mlp_gate_proj_weight_quantized, x = input_255)[name = string("gate_49")]; string up_25_pad_type_0 = const()[name = string("up_25_pad_type_0"), val = string("valid")]; tensor up_25_strides_0 = const()[name = string("up_25_strides_0"), val = tensor([1, 1])]; tensor up_25_pad_0 = const()[name = string("up_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_25_dilations_0 = const()[name = string("up_25_dilations_0"), val = tensor([1, 1])]; int32 up_25_groups_0 = const()[name = string("up_25_groups_0"), val = int32(1)]; tensor up_25 = conv(dilations = up_25_dilations_0, groups = up_25_groups_0, pad = up_25_pad_0, pad_type = up_25_pad_type_0, strides = up_25_strides_0, weight = layers_12_mlp_up_proj_weight_quantized, x = input_255)[name = string("up_25")]; string gate_51_mode_0 = const()[name = string("gate_51_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_51 = gelu(mode = gate_51_mode_0, x = gate_49)[name = string("gate_51")]; tensor input_257 = mul(x = gate_51, y = up_25)[name = string("input_257")]; string var_7210_pad_type_0 = const()[name = string("op_7210_pad_type_0"), val = string("valid")]; tensor var_7210_strides_0 = const()[name = string("op_7210_strides_0"), val = tensor([1, 1])]; tensor var_7210_pad_0 = const()[name = string("op_7210_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7210_dilations_0 = const()[name = string("op_7210_dilations_0"), val = tensor([1, 1])]; int32 var_7210_groups_0 = const()[name = string("op_7210_groups_0"), val = int32(1)]; tensor var_7210 = conv(dilations = var_7210_dilations_0, groups = var_7210_groups_0, pad = var_7210_pad_0, pad_type = var_7210_pad_type_0, strides = var_7210_strides_0, weight = layers_12_mlp_down_proj_weight_quantized, x = input_257)[name = string("op_7210")]; tensor var_7212_axes_0 = const()[name = string("op_7212_axes_0"), val = tensor([2])]; tensor var_7212 = squeeze(axes = var_7212_axes_0, x = var_7210)[name = string("op_7212")]; tensor var_7216 = const()[name = string("op_7216"), val = tensor([0, 2, 1])]; int32 var_7223 = const()[name = string("op_7223"), val = int32(-1)]; fp16 const_180_promoted_to_fp16 = const()[name = string("const_180_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_207 = transpose(perm = var_7216, x = var_7212)[name = string("transpose_42")]; tensor var_7229_cast_fp16 = mul(x = x_207, y = const_180_promoted_to_fp16)[name = string("op_7229_cast_fp16")]; bool input_259_interleave_0 = const()[name = string("input_259_interleave_0"), val = bool(false)]; tensor input_259_cast_fp16 = concat(axis = var_7223, interleave = input_259_interleave_0, values = (x_207, var_7229_cast_fp16))[name = string("input_259_cast_fp16")]; tensor normed_361_axes_0 = const()[name = string("normed_361_axes_0"), val = tensor([-1])]; fp16 var_7221_to_fp16 = const()[name = string("op_7221_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_361_cast_fp16 = layer_norm(axes = normed_361_axes_0, epsilon = var_7221_to_fp16, x = input_259_cast_fp16)[name = string("normed_361_cast_fp16")]; tensor var_7234_split_sizes_0 = const()[name = string("op_7234_split_sizes_0"), val = tensor([640, 640])]; int32 var_7234_axis_0 = const()[name = string("op_7234_axis_0"), val = int32(-1)]; tensor var_7234_cast_fp16_0, tensor var_7234_cast_fp16_1 = split(axis = var_7234_axis_0, split_sizes = var_7234_split_sizes_0, x = normed_361_cast_fp16)[name = string("op_7234_cast_fp16")]; tensor var_7238_to_fp16 = const()[name = string("op_7238_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269858944)))]; tensor out_155_cast_fp16 = mul(x = var_7234_cast_fp16_0, y = var_7238_to_fp16)[name = string("out_155_cast_fp16")]; tensor x_209_cast_fp16 = add(x = x_203_cast_fp16, y = out_155_cast_fp16)[name = string("x_209_cast_fp16")]; int32 var_7252 = const()[name = string("op_7252"), val = int32(-1)]; fp16 const_182_promoted_to_fp16 = const()[name = string("const_182_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7258_cast_fp16 = mul(x = x_209_cast_fp16, y = const_182_promoted_to_fp16)[name = string("op_7258_cast_fp16")]; bool input_261_interleave_0 = const()[name = string("input_261_interleave_0"), val = bool(false)]; tensor input_261_cast_fp16 = concat(axis = var_7252, interleave = input_261_interleave_0, values = (x_209_cast_fp16, var_7258_cast_fp16))[name = string("input_261_cast_fp16")]; tensor normed_365_axes_0 = const()[name = string("normed_365_axes_0"), val = tensor([-1])]; fp16 var_7250_to_fp16 = const()[name = string("op_7250_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_365_cast_fp16 = layer_norm(axes = normed_365_axes_0, epsilon = var_7250_to_fp16, x = input_261_cast_fp16)[name = string("normed_365_cast_fp16")]; tensor var_7263_split_sizes_0 = const()[name = string("op_7263_split_sizes_0"), val = tensor([640, 640])]; int32 var_7263_axis_0 = const()[name = string("op_7263_axis_0"), val = int32(-1)]; tensor var_7263_cast_fp16_0, tensor var_7263_cast_fp16_1 = split(axis = var_7263_axis_0, split_sizes = var_7263_split_sizes_0, x = normed_365_cast_fp16)[name = string("op_7263_cast_fp16")]; tensor var_7267_to_fp16 = const()[name = string("op_7267_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269860288)))]; tensor out_157_cast_fp16 = mul(x = var_7263_cast_fp16_0, y = var_7267_to_fp16)[name = string("out_157_cast_fp16")]; tensor var_7281 = const()[name = string("op_7281"), val = tensor([0, 2, 1])]; tensor input_263_axes_0 = const()[name = string("input_263_axes_0"), val = tensor([2])]; tensor var_7282 = transpose(perm = var_7281, x = out_157_cast_fp16)[name = string("transpose_41")]; tensor input_263 = expand_dims(axes = input_263_axes_0, x = var_7282)[name = string("input_263")]; string var_7295_pad_type_0 = const()[name = string("op_7295_pad_type_0"), val = string("valid")]; tensor var_7295_strides_0 = const()[name = string("op_7295_strides_0"), val = tensor([1, 1])]; tensor var_7295_pad_0 = const()[name = string("op_7295_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7295_dilations_0 = const()[name = string("op_7295_dilations_0"), val = tensor([1, 1])]; int32 var_7295_groups_0 = const()[name = string("op_7295_groups_0"), val = int32(1)]; tensor var_7295 = conv(dilations = var_7295_dilations_0, groups = var_7295_groups_0, pad = var_7295_pad_0, pad_type = var_7295_pad_type_0, strides = var_7295_strides_0, weight = layers_13_self_attn_q_proj_weight_quantized, x = input_263)[name = string("op_7295")]; tensor var_7300 = const()[name = string("op_7300"), val = tensor([1, 4, 256, 32])]; tensor var_7301 = reshape(shape = var_7300, x = var_7295)[name = string("op_7301")]; tensor var_7306 = const()[name = string("op_7306"), val = tensor([0, 1, 3, 2])]; int32 var_7319 = const()[name = string("op_7319"), val = int32(-1)]; fp16 const_184_promoted_to_fp16 = const()[name = string("const_184_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_79 = transpose(perm = var_7306, x = var_7301)[name = string("transpose_40")]; tensor var_7325_cast_fp16 = mul(x = q_79, y = const_184_promoted_to_fp16)[name = string("op_7325_cast_fp16")]; bool input_265_interleave_0 = const()[name = string("input_265_interleave_0"), val = bool(false)]; tensor input_265_cast_fp16 = concat(axis = var_7319, interleave = input_265_interleave_0, values = (q_79, var_7325_cast_fp16))[name = string("input_265_cast_fp16")]; tensor normed_371_axes_0 = const()[name = string("normed_371_axes_0"), val = tensor([-1])]; fp16 var_7317_to_fp16 = const()[name = string("op_7317_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_371_cast_fp16 = layer_norm(axes = normed_371_axes_0, epsilon = var_7317_to_fp16, x = input_265_cast_fp16)[name = string("normed_371_cast_fp16")]; tensor var_7330_split_sizes_0 = const()[name = string("op_7330_split_sizes_0"), val = tensor([256, 256])]; int32 var_7330_axis_0 = const()[name = string("op_7330_axis_0"), val = int32(-1)]; tensor var_7330_cast_fp16_0, tensor var_7330_cast_fp16_1 = split(axis = var_7330_axis_0, split_sizes = var_7330_split_sizes_0, x = normed_371_cast_fp16)[name = string("op_7330_cast_fp16")]; tensor var_7334_to_fp16 = const()[name = string("op_7334_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269861632)))]; tensor out_159_cast_fp16 = mul(x = var_7330_cast_fp16_0, y = var_7334_to_fp16)[name = string("out_159_cast_fp16")]; string var_7347_pad_type_0 = const()[name = string("op_7347_pad_type_0"), val = string("valid")]; tensor var_7347_strides_0 = const()[name = string("op_7347_strides_0"), val = tensor([1, 1])]; tensor var_7347_pad_0 = const()[name = string("op_7347_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7347_dilations_0 = const()[name = string("op_7347_dilations_0"), val = tensor([1, 1])]; int32 var_7347_groups_0 = const()[name = string("op_7347_groups_0"), val = int32(1)]; tensor var_7347 = conv(dilations = var_7347_dilations_0, groups = var_7347_groups_0, pad = var_7347_pad_0, pad_type = var_7347_pad_type_0, strides = var_7347_strides_0, weight = layers_13_self_attn_k_proj_weight_quantized, x = input_263)[name = string("op_7347")]; tensor var_7352 = const()[name = string("op_7352"), val = tensor([1, 1, 256, 32])]; tensor var_7353 = reshape(shape = var_7352, x = var_7347)[name = string("op_7353")]; tensor var_7358 = const()[name = string("op_7358"), val = tensor([0, 1, 3, 2])]; int32 var_7371 = const()[name = string("op_7371"), val = int32(-1)]; fp16 const_186_promoted_to_fp16 = const()[name = string("const_186_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_79 = transpose(perm = var_7358, x = var_7353)[name = string("transpose_39")]; tensor var_7377_cast_fp16 = mul(x = k_79, y = const_186_promoted_to_fp16)[name = string("op_7377_cast_fp16")]; bool input_267_interleave_0 = const()[name = string("input_267_interleave_0"), val = bool(false)]; tensor input_267_cast_fp16 = concat(axis = var_7371, interleave = input_267_interleave_0, values = (k_79, var_7377_cast_fp16))[name = string("input_267_cast_fp16")]; tensor normed_375_axes_0 = const()[name = string("normed_375_axes_0"), val = tensor([-1])]; fp16 var_7369_to_fp16 = const()[name = string("op_7369_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_375_cast_fp16 = layer_norm(axes = normed_375_axes_0, epsilon = var_7369_to_fp16, x = input_267_cast_fp16)[name = string("normed_375_cast_fp16")]; tensor var_7382_split_sizes_0 = const()[name = string("op_7382_split_sizes_0"), val = tensor([256, 256])]; int32 var_7382_axis_0 = const()[name = string("op_7382_axis_0"), val = int32(-1)]; tensor var_7382_cast_fp16_0, tensor var_7382_cast_fp16_1 = split(axis = var_7382_axis_0, split_sizes = var_7382_split_sizes_0, x = normed_375_cast_fp16)[name = string("op_7382_cast_fp16")]; tensor var_7386_to_fp16 = const()[name = string("op_7386_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269862208)))]; tensor out_161_cast_fp16 = mul(x = var_7382_cast_fp16_0, y = var_7386_to_fp16)[name = string("out_161_cast_fp16")]; string var_7399_pad_type_0 = const()[name = string("op_7399_pad_type_0"), val = string("valid")]; tensor var_7399_strides_0 = const()[name = string("op_7399_strides_0"), val = tensor([1, 1])]; tensor var_7399_pad_0 = const()[name = string("op_7399_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7399_dilations_0 = const()[name = string("op_7399_dilations_0"), val = tensor([1, 1])]; int32 var_7399_groups_0 = const()[name = string("op_7399_groups_0"), val = int32(1)]; tensor var_7399 = conv(dilations = var_7399_dilations_0, groups = var_7399_groups_0, pad = var_7399_pad_0, pad_type = var_7399_pad_type_0, strides = var_7399_strides_0, weight = layers_13_self_attn_v_proj_weight_quantized, x = input_263)[name = string("op_7399")]; tensor var_7404 = const()[name = string("op_7404"), val = tensor([1, 1, 256, 32])]; tensor var_7405 = reshape(shape = var_7404, x = var_7399)[name = string("op_7405")]; tensor var_7412 = mul(x = out_159_cast_fp16, y = cos_1)[name = string("op_7412")]; tensor var_7413_split_sizes_0 = const()[name = string("op_7413_split_sizes_0"), val = tensor([128, 128])]; int32 var_7413_axis_0 = const()[name = string("op_7413_axis_0"), val = int32(-1)]; tensor var_7413_0, tensor var_7413_1 = split(axis = var_7413_axis_0, split_sizes = var_7413_split_sizes_0, x = out_159_cast_fp16)[name = string("op_7413")]; fp16 const_188_promoted = const()[name = string("const_188_promoted"), val = fp16(-0x1p+0)]; tensor var_7415 = mul(x = var_7413_1, y = const_188_promoted)[name = string("op_7415")]; int32 var_7417 = const()[name = string("op_7417"), val = int32(-1)]; bool var_7418_interleave_0 = const()[name = string("op_7418_interleave_0"), val = bool(false)]; tensor var_7418 = concat(axis = var_7417, interleave = var_7418_interleave_0, values = (var_7415, var_7413_0))[name = string("op_7418")]; tensor var_7419 = mul(x = var_7418, y = sin_1)[name = string("op_7419")]; tensor q_83 = add(x = var_7412, y = var_7419)[name = string("q_83")]; tensor var_7422 = mul(x = out_161_cast_fp16, y = cos_1)[name = string("op_7422")]; tensor var_7423_split_sizes_0 = const()[name = string("op_7423_split_sizes_0"), val = tensor([128, 128])]; int32 var_7423_axis_0 = const()[name = string("op_7423_axis_0"), val = int32(-1)]; tensor var_7423_0, tensor var_7423_1 = split(axis = var_7423_axis_0, split_sizes = var_7423_split_sizes_0, x = out_161_cast_fp16)[name = string("op_7423")]; fp16 const_189_promoted = const()[name = string("const_189_promoted"), val = fp16(-0x1p+0)]; tensor var_7425 = mul(x = var_7423_1, y = const_189_promoted)[name = string("op_7425")]; int32 var_7427 = const()[name = string("op_7427"), val = int32(-1)]; bool var_7428_interleave_0 = const()[name = string("op_7428_interleave_0"), val = bool(false)]; tensor var_7428 = concat(axis = var_7427, interleave = var_7428_interleave_0, values = (var_7425, var_7423_0))[name = string("op_7428")]; tensor var_7429 = mul(x = var_7428, y = sin_1)[name = string("op_7429")]; tensor k_83 = add(x = var_7422, y = var_7429)[name = string("k_83")]; tensor var_7434_begin_0 = const()[name = string("op_7434_begin_0"), val = tensor([13, 0, 0, 0])]; tensor var_7434_end_0 = const()[name = string("op_7434_end_0"), val = tensor([14, 1, 2048, 256])]; tensor var_7434_end_mask_0 = const()[name = string("op_7434_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7434_squeeze_mask_0 = const()[name = string("op_7434_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_7434_cast_fp16 = slice_by_index(begin = var_7434_begin_0, end = var_7434_end_0, end_mask = var_7434_end_mask_0, squeeze_mask = var_7434_squeeze_mask_0, x = coreml_update_state_61)[name = string("op_7434_cast_fp16")]; tensor K_cache_27_axes_0 = const()[name = string("K_cache_27_axes_0"), val = tensor([0])]; tensor K_cache_27_cast_fp16 = expand_dims(axes = K_cache_27_axes_0, x = var_7434_cast_fp16)[name = string("K_cache_27_cast_fp16")]; tensor var_7439_begin_0 = const()[name = string("op_7439_begin_0"), val = tensor([31, 0, 0, 0])]; tensor var_7439_end_0 = const()[name = string("op_7439_end_0"), val = tensor([32, 1, 2048, 256])]; tensor var_7439_end_mask_0 = const()[name = string("op_7439_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7439_squeeze_mask_0 = const()[name = string("op_7439_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_7439_cast_fp16 = slice_by_index(begin = var_7439_begin_0, end = var_7439_end_0, end_mask = var_7439_end_mask_0, squeeze_mask = var_7439_squeeze_mask_0, x = coreml_update_state_61)[name = string("op_7439_cast_fp16")]; tensor V_cache_27_axes_0 = const()[name = string("V_cache_27_axes_0"), val = tensor([0])]; tensor V_cache_27_cast_fp16 = expand_dims(axes = V_cache_27_axes_0, x = var_7439_cast_fp16)[name = string("V_cache_27_cast_fp16")]; bool k_increment_27_transpose_x_0 = const()[name = string("k_increment_27_transpose_x_0"), val = bool(false)]; bool k_increment_27_transpose_y_0 = const()[name = string("k_increment_27_transpose_y_0"), val = bool(false)]; tensor k_increment_27 = matmul(transpose_x = k_increment_27_transpose_x_0, transpose_y = k_increment_27_transpose_y_0, x = update_mask, y = k_83)[name = string("k_increment_27")]; bool v_increment_27_transpose_x_1 = const()[name = string("v_increment_27_transpose_x_1"), val = bool(false)]; bool v_increment_27_transpose_y_1 = const()[name = string("v_increment_27_transpose_y_1"), val = bool(true)]; tensor v_increment_27 = matmul(transpose_x = v_increment_27_transpose_x_1, transpose_y = v_increment_27_transpose_y_1, x = update_mask, y = var_7405)[name = string("v_increment_27")]; tensor var_7457_cast_fp16 = mul(x = K_cache_27_cast_fp16, y = var_1125_cast_fp16)[name = string("op_7457_cast_fp16")]; tensor K_new_27_cast_fp16 = add(x = var_7457_cast_fp16, y = k_increment_27)[name = string("K_new_27_cast_fp16")]; tensor var_7463_cast_fp16 = mul(x = V_cache_27_cast_fp16, y = var_1125_cast_fp16)[name = string("op_7463_cast_fp16")]; tensor V_new_27_cast_fp16 = add(x = var_7463_cast_fp16, y = v_increment_27)[name = string("V_new_27_cast_fp16")]; tensor var_7467_axes_0 = const()[name = string("op_7467_axes_0"), val = tensor([0])]; tensor var_7467_cast_fp16 = squeeze(axes = var_7467_axes_0, x = K_new_27_cast_fp16)[name = string("op_7467_cast_fp16")]; tensor concat_52 = const()[name = string("concat_52"), val = tensor([13, 0, 0, 0])]; tensor concat_53 = const()[name = string("concat_53"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_27_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_27_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_27_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_52, begin_mask = kv_cache_0_internal_tensor_assign_27_begin_mask_0, end = concat_53, end_mask = kv_cache_0_internal_tensor_assign_27_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_27_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_27_stride_0, update = var_7467_cast_fp16, x = coreml_update_state_61)[name = string("kv_cache_0_internal_tensor_assign_27_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_27_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_62_write_state")]; tensor coreml_update_state_62 = read_state(input = kv_cache_0)[name = string("coreml_update_state_62")]; tensor var_7474_axes_0 = const()[name = string("op_7474_axes_0"), val = tensor([0])]; tensor var_7474_cast_fp16 = squeeze(axes = var_7474_axes_0, x = V_new_27_cast_fp16)[name = string("op_7474_cast_fp16")]; tensor concat_54 = const()[name = string("concat_54"), val = tensor([31, 0, 0, 0])]; tensor concat_55 = const()[name = string("concat_55"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_28_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_28_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_28_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_54, begin_mask = kv_cache_0_internal_tensor_assign_28_begin_mask_0, end = concat_55, end_mask = kv_cache_0_internal_tensor_assign_28_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_28_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_28_stride_0, update = var_7474_cast_fp16, x = coreml_update_state_62)[name = string("kv_cache_0_internal_tensor_assign_28_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_28_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_63_write_state")]; tensor coreml_update_state_63 = read_state(input = kv_cache_0)[name = string("coreml_update_state_63")]; tensor hidden_states_105_axes_0 = const()[name = string("hidden_states_105_axes_0"), val = tensor([2])]; tensor hidden_states_105_cast_fp16 = expand_dims(axes = hidden_states_105_axes_0, x = K_new_27_cast_fp16)[name = string("hidden_states_105_cast_fp16")]; tensor var_7487 = const()[name = string("op_7487"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_107_cast_fp16 = tile(reps = var_7487, x = hidden_states_105_cast_fp16)[name = string("hidden_states_107_cast_fp16")]; tensor var_7493 = const()[name = string("op_7493"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_27_cast_fp16 = reshape(shape = var_7493, x = hidden_states_107_cast_fp16)[name = string("K_expanded_27_cast_fp16")]; tensor hidden_states_109_axes_0 = const()[name = string("hidden_states_109_axes_0"), val = tensor([2])]; tensor hidden_states_109_cast_fp16 = expand_dims(axes = hidden_states_109_axes_0, x = V_new_27_cast_fp16)[name = string("hidden_states_109_cast_fp16")]; tensor var_7502 = const()[name = string("op_7502"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_111_cast_fp16 = tile(reps = var_7502, x = hidden_states_109_cast_fp16)[name = string("hidden_states_111_cast_fp16")]; tensor var_7508 = const()[name = string("op_7508"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_27_cast_fp16 = reshape(shape = var_7508, x = hidden_states_111_cast_fp16)[name = string("V_expanded_27_cast_fp16")]; bool var_7523_transpose_x_1 = const()[name = string("op_7523_transpose_x_1"), val = bool(false)]; bool var_7523_transpose_y_1 = const()[name = string("op_7523_transpose_y_1"), val = bool(true)]; tensor var_7523_cast_fp16 = matmul(transpose_x = var_7523_transpose_x_1, transpose_y = var_7523_transpose_y_1, x = q_83, y = K_expanded_27_cast_fp16)[name = string("op_7523_cast_fp16")]; fp16 var_7524_to_fp16 = const()[name = string("op_7524_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_79_cast_fp16 = mul(x = var_7523_cast_fp16, y = var_7524_to_fp16)[name = string("attn_weights_79_cast_fp16")]; tensor attn_weights_81_cast_fp16 = add(x = attn_weights_79_cast_fp16, y = causal_mask)[name = string("attn_weights_81_cast_fp16")]; int32 var_7533 = const()[name = string("op_7533"), val = int32(-1)]; tensor var_7535_cast_fp16 = softmax(axis = var_7533, x = attn_weights_81_cast_fp16)[name = string("op_7535_cast_fp16")]; bool var_7551_transpose_x_0 = const()[name = string("op_7551_transpose_x_0"), val = bool(false)]; bool var_7551_transpose_y_0 = const()[name = string("op_7551_transpose_y_0"), val = bool(false)]; tensor var_7551_cast_fp16 = matmul(transpose_x = var_7551_transpose_x_0, transpose_y = var_7551_transpose_y_0, x = var_7535_cast_fp16, y = V_expanded_27_cast_fp16)[name = string("op_7551_cast_fp16")]; tensor var_7561 = const()[name = string("op_7561"), val = tensor([0, 2, 1, 3])]; tensor var_7568 = const()[name = string("op_7568"), val = tensor([1, 32, 1024])]; tensor var_7562 = transpose(perm = var_7561, x = var_7551_cast_fp16)[name = string("transpose_38")]; tensor attn_output_81 = reshape(shape = var_7568, x = var_7562)[name = string("attn_output_81")]; tensor var_7573 = const()[name = string("op_7573"), val = tensor([0, 2, 1])]; tensor squeeze_13_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269862784))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270518208))))[name = string("squeeze_13_quantized")]; string var_7589_pad_type_0 = const()[name = string("op_7589_pad_type_0"), val = string("valid")]; int32 var_7589_groups_0 = const()[name = string("op_7589_groups_0"), val = int32(1)]; tensor var_7589_strides_0 = const()[name = string("op_7589_strides_0"), val = tensor([1])]; tensor var_7589_pad_0 = const()[name = string("op_7589_pad_0"), val = tensor([0, 0])]; tensor var_7589_dilations_0 = const()[name = string("op_7589_dilations_0"), val = tensor([1])]; tensor var_7574 = transpose(perm = var_7573, x = attn_output_81)[name = string("transpose_37")]; tensor var_7589 = conv(dilations = var_7589_dilations_0, groups = var_7589_groups_0, pad = var_7589_pad_0, pad_type = var_7589_pad_type_0, strides = var_7589_strides_0, weight = squeeze_13_quantized, x = var_7574)[name = string("op_7589")]; tensor var_7593 = const()[name = string("op_7593"), val = tensor([0, 2, 1])]; int32 var_7600 = const()[name = string("op_7600"), val = int32(-1)]; fp16 const_190_promoted_to_fp16 = const()[name = string("const_190_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_217 = transpose(perm = var_7593, x = var_7589)[name = string("transpose_36")]; tensor var_7606_cast_fp16 = mul(x = x_217, y = const_190_promoted_to_fp16)[name = string("op_7606_cast_fp16")]; bool input_271_interleave_0 = const()[name = string("input_271_interleave_0"), val = bool(false)]; tensor input_271_cast_fp16 = concat(axis = var_7600, interleave = input_271_interleave_0, values = (x_217, var_7606_cast_fp16))[name = string("input_271_cast_fp16")]; tensor normed_379_axes_0 = const()[name = string("normed_379_axes_0"), val = tensor([-1])]; fp16 var_7598_to_fp16 = const()[name = string("op_7598_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_379_cast_fp16 = layer_norm(axes = normed_379_axes_0, epsilon = var_7598_to_fp16, x = input_271_cast_fp16)[name = string("normed_379_cast_fp16")]; tensor var_7611_split_sizes_0 = const()[name = string("op_7611_split_sizes_0"), val = tensor([640, 640])]; int32 var_7611_axis_0 = const()[name = string("op_7611_axis_0"), val = int32(-1)]; tensor var_7611_cast_fp16_0, tensor var_7611_cast_fp16_1 = split(axis = var_7611_axis_0, split_sizes = var_7611_split_sizes_0, x = normed_379_cast_fp16)[name = string("op_7611_cast_fp16")]; tensor var_7615_to_fp16 = const()[name = string("op_7615_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270519552)))]; tensor out_163_cast_fp16 = mul(x = var_7611_cast_fp16_0, y = var_7615_to_fp16)[name = string("out_163_cast_fp16")]; tensor x_219_cast_fp16 = add(x = x_209_cast_fp16, y = out_163_cast_fp16)[name = string("x_219_cast_fp16")]; int32 var_7629 = const()[name = string("op_7629"), val = int32(-1)]; fp16 const_192_promoted_to_fp16 = const()[name = string("const_192_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7635_cast_fp16 = mul(x = x_219_cast_fp16, y = const_192_promoted_to_fp16)[name = string("op_7635_cast_fp16")]; bool input_273_interleave_0 = const()[name = string("input_273_interleave_0"), val = bool(false)]; tensor input_273_cast_fp16 = concat(axis = var_7629, interleave = input_273_interleave_0, values = (x_219_cast_fp16, var_7635_cast_fp16))[name = string("input_273_cast_fp16")]; tensor normed_383_axes_0 = const()[name = string("normed_383_axes_0"), val = tensor([-1])]; fp16 var_7627_to_fp16 = const()[name = string("op_7627_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_383_cast_fp16 = layer_norm(axes = normed_383_axes_0, epsilon = var_7627_to_fp16, x = input_273_cast_fp16)[name = string("normed_383_cast_fp16")]; tensor var_7640_split_sizes_0 = const()[name = string("op_7640_split_sizes_0"), val = tensor([640, 640])]; int32 var_7640_axis_0 = const()[name = string("op_7640_axis_0"), val = int32(-1)]; tensor var_7640_cast_fp16_0, tensor var_7640_cast_fp16_1 = split(axis = var_7640_axis_0, split_sizes = var_7640_split_sizes_0, x = normed_383_cast_fp16)[name = string("op_7640_cast_fp16")]; tensor var_7644_to_fp16 = const()[name = string("op_7644_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270520896)))]; tensor out_165_cast_fp16 = mul(x = var_7640_cast_fp16_0, y = var_7644_to_fp16)[name = string("out_165_cast_fp16")]; tensor var_7658 = const()[name = string("op_7658"), val = tensor([0, 2, 1])]; tensor input_275_axes_0 = const()[name = string("input_275_axes_0"), val = tensor([2])]; tensor var_7659 = transpose(perm = var_7658, x = out_165_cast_fp16)[name = string("transpose_35")]; tensor input_275 = expand_dims(axes = input_275_axes_0, x = var_7659)[name = string("input_275")]; string gate_53_pad_type_0 = const()[name = string("gate_53_pad_type_0"), val = string("valid")]; tensor gate_53_strides_0 = const()[name = string("gate_53_strides_0"), val = tensor([1, 1])]; tensor gate_53_pad_0 = const()[name = string("gate_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_53_dilations_0 = const()[name = string("gate_53_dilations_0"), val = tensor([1, 1])]; int32 gate_53_groups_0 = const()[name = string("gate_53_groups_0"), val = int32(1)]; tensor gate_53 = conv(dilations = gate_53_dilations_0, groups = gate_53_groups_0, pad = gate_53_pad_0, pad_type = gate_53_pad_type_0, strides = gate_53_strides_0, weight = layers_13_mlp_gate_proj_weight_quantized, x = input_275)[name = string("gate_53")]; string up_27_pad_type_0 = const()[name = string("up_27_pad_type_0"), val = string("valid")]; tensor up_27_strides_0 = const()[name = string("up_27_strides_0"), val = tensor([1, 1])]; tensor up_27_pad_0 = const()[name = string("up_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_27_dilations_0 = const()[name = string("up_27_dilations_0"), val = tensor([1, 1])]; int32 up_27_groups_0 = const()[name = string("up_27_groups_0"), val = int32(1)]; tensor up_27 = conv(dilations = up_27_dilations_0, groups = up_27_groups_0, pad = up_27_pad_0, pad_type = up_27_pad_type_0, strides = up_27_strides_0, weight = layers_13_mlp_up_proj_weight_quantized, x = input_275)[name = string("up_27")]; string gate_55_mode_0 = const()[name = string("gate_55_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_55 = gelu(mode = gate_55_mode_0, x = gate_53)[name = string("gate_55")]; tensor input_277 = mul(x = gate_55, y = up_27)[name = string("input_277")]; string var_7697_pad_type_0 = const()[name = string("op_7697_pad_type_0"), val = string("valid")]; tensor var_7697_strides_0 = const()[name = string("op_7697_strides_0"), val = tensor([1, 1])]; tensor var_7697_pad_0 = const()[name = string("op_7697_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7697_dilations_0 = const()[name = string("op_7697_dilations_0"), val = tensor([1, 1])]; int32 var_7697_groups_0 = const()[name = string("op_7697_groups_0"), val = int32(1)]; tensor var_7697 = conv(dilations = var_7697_dilations_0, groups = var_7697_groups_0, pad = var_7697_pad_0, pad_type = var_7697_pad_type_0, strides = var_7697_strides_0, weight = layers_13_mlp_down_proj_weight_quantized, x = input_277)[name = string("op_7697")]; tensor var_7699_axes_0 = const()[name = string("op_7699_axes_0"), val = tensor([2])]; tensor var_7699 = squeeze(axes = var_7699_axes_0, x = var_7697)[name = string("op_7699")]; tensor var_7703 = const()[name = string("op_7703"), val = tensor([0, 2, 1])]; int32 var_7710 = const()[name = string("op_7710"), val = int32(-1)]; fp16 const_194_promoted_to_fp16 = const()[name = string("const_194_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_223 = transpose(perm = var_7703, x = var_7699)[name = string("transpose_34")]; tensor var_7716_cast_fp16 = mul(x = x_223, y = const_194_promoted_to_fp16)[name = string("op_7716_cast_fp16")]; bool input_279_interleave_0 = const()[name = string("input_279_interleave_0"), val = bool(false)]; tensor input_279_cast_fp16 = concat(axis = var_7710, interleave = input_279_interleave_0, values = (x_223, var_7716_cast_fp16))[name = string("input_279_cast_fp16")]; tensor normed_389_axes_0 = const()[name = string("normed_389_axes_0"), val = tensor([-1])]; fp16 var_7708_to_fp16 = const()[name = string("op_7708_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_389_cast_fp16 = layer_norm(axes = normed_389_axes_0, epsilon = var_7708_to_fp16, x = input_279_cast_fp16)[name = string("normed_389_cast_fp16")]; tensor var_7721_split_sizes_0 = const()[name = string("op_7721_split_sizes_0"), val = tensor([640, 640])]; int32 var_7721_axis_0 = const()[name = string("op_7721_axis_0"), val = int32(-1)]; tensor var_7721_cast_fp16_0, tensor var_7721_cast_fp16_1 = split(axis = var_7721_axis_0, split_sizes = var_7721_split_sizes_0, x = normed_389_cast_fp16)[name = string("op_7721_cast_fp16")]; tensor var_7725_to_fp16 = const()[name = string("op_7725_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270522240)))]; tensor out_167_cast_fp16 = mul(x = var_7721_cast_fp16_0, y = var_7725_to_fp16)[name = string("out_167_cast_fp16")]; tensor x_225_cast_fp16 = add(x = x_219_cast_fp16, y = out_167_cast_fp16)[name = string("x_225_cast_fp16")]; int32 var_7739 = const()[name = string("op_7739"), val = int32(-1)]; fp16 const_196_promoted_to_fp16 = const()[name = string("const_196_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7745_cast_fp16 = mul(x = x_225_cast_fp16, y = const_196_promoted_to_fp16)[name = string("op_7745_cast_fp16")]; bool input_281_interleave_0 = const()[name = string("input_281_interleave_0"), val = bool(false)]; tensor input_281_cast_fp16 = concat(axis = var_7739, interleave = input_281_interleave_0, values = (x_225_cast_fp16, var_7745_cast_fp16))[name = string("input_281_cast_fp16")]; tensor normed_393_axes_0 = const()[name = string("normed_393_axes_0"), val = tensor([-1])]; fp16 var_7737_to_fp16 = const()[name = string("op_7737_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_393_cast_fp16 = layer_norm(axes = normed_393_axes_0, epsilon = var_7737_to_fp16, x = input_281_cast_fp16)[name = string("normed_393_cast_fp16")]; tensor var_7750_split_sizes_0 = const()[name = string("op_7750_split_sizes_0"), val = tensor([640, 640])]; int32 var_7750_axis_0 = const()[name = string("op_7750_axis_0"), val = int32(-1)]; tensor var_7750_cast_fp16_0, tensor var_7750_cast_fp16_1 = split(axis = var_7750_axis_0, split_sizes = var_7750_split_sizes_0, x = normed_393_cast_fp16)[name = string("op_7750_cast_fp16")]; tensor var_7754_to_fp16 = const()[name = string("op_7754_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270523584)))]; tensor out_169_cast_fp16 = mul(x = var_7750_cast_fp16_0, y = var_7754_to_fp16)[name = string("out_169_cast_fp16")]; tensor var_7768 = const()[name = string("op_7768"), val = tensor([0, 2, 1])]; tensor input_283_axes_0 = const()[name = string("input_283_axes_0"), val = tensor([2])]; tensor var_7769 = transpose(perm = var_7768, x = out_169_cast_fp16)[name = string("transpose_33")]; tensor input_283 = expand_dims(axes = input_283_axes_0, x = var_7769)[name = string("input_283")]; string var_7782_pad_type_0 = const()[name = string("op_7782_pad_type_0"), val = string("valid")]; tensor var_7782_strides_0 = const()[name = string("op_7782_strides_0"), val = tensor([1, 1])]; tensor var_7782_pad_0 = const()[name = string("op_7782_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7782_dilations_0 = const()[name = string("op_7782_dilations_0"), val = tensor([1, 1])]; int32 var_7782_groups_0 = const()[name = string("op_7782_groups_0"), val = int32(1)]; tensor var_7782 = conv(dilations = var_7782_dilations_0, groups = var_7782_groups_0, pad = var_7782_pad_0, pad_type = var_7782_pad_type_0, strides = var_7782_strides_0, weight = layers_14_self_attn_q_proj_weight_quantized, x = input_283)[name = string("op_7782")]; tensor var_7787 = const()[name = string("op_7787"), val = tensor([1, 4, 256, 32])]; tensor var_7788 = reshape(shape = var_7787, x = var_7782)[name = string("op_7788")]; tensor var_7793 = const()[name = string("op_7793"), val = tensor([0, 1, 3, 2])]; int32 var_7806 = const()[name = string("op_7806"), val = int32(-1)]; fp16 const_198_promoted_to_fp16 = const()[name = string("const_198_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_85 = transpose(perm = var_7793, x = var_7788)[name = string("transpose_32")]; tensor var_7812_cast_fp16 = mul(x = q_85, y = const_198_promoted_to_fp16)[name = string("op_7812_cast_fp16")]; bool input_285_interleave_0 = const()[name = string("input_285_interleave_0"), val = bool(false)]; tensor input_285_cast_fp16 = concat(axis = var_7806, interleave = input_285_interleave_0, values = (q_85, var_7812_cast_fp16))[name = string("input_285_cast_fp16")]; tensor normed_399_axes_0 = const()[name = string("normed_399_axes_0"), val = tensor([-1])]; fp16 var_7804_to_fp16 = const()[name = string("op_7804_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_399_cast_fp16 = layer_norm(axes = normed_399_axes_0, epsilon = var_7804_to_fp16, x = input_285_cast_fp16)[name = string("normed_399_cast_fp16")]; tensor var_7817_split_sizes_0 = const()[name = string("op_7817_split_sizes_0"), val = tensor([256, 256])]; int32 var_7817_axis_0 = const()[name = string("op_7817_axis_0"), val = int32(-1)]; tensor var_7817_cast_fp16_0, tensor var_7817_cast_fp16_1 = split(axis = var_7817_axis_0, split_sizes = var_7817_split_sizes_0, x = normed_399_cast_fp16)[name = string("op_7817_cast_fp16")]; tensor var_7821_to_fp16 = const()[name = string("op_7821_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270524928)))]; tensor out_171_cast_fp16 = mul(x = var_7817_cast_fp16_0, y = var_7821_to_fp16)[name = string("out_171_cast_fp16")]; string var_7834_pad_type_0 = const()[name = string("op_7834_pad_type_0"), val = string("valid")]; tensor var_7834_strides_0 = const()[name = string("op_7834_strides_0"), val = tensor([1, 1])]; tensor var_7834_pad_0 = const()[name = string("op_7834_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7834_dilations_0 = const()[name = string("op_7834_dilations_0"), val = tensor([1, 1])]; int32 var_7834_groups_0 = const()[name = string("op_7834_groups_0"), val = int32(1)]; tensor var_7834 = conv(dilations = var_7834_dilations_0, groups = var_7834_groups_0, pad = var_7834_pad_0, pad_type = var_7834_pad_type_0, strides = var_7834_strides_0, weight = layers_14_self_attn_k_proj_weight_quantized, x = input_283)[name = string("op_7834")]; tensor var_7839 = const()[name = string("op_7839"), val = tensor([1, 1, 256, 32])]; tensor var_7840 = reshape(shape = var_7839, x = var_7834)[name = string("op_7840")]; tensor var_7845 = const()[name = string("op_7845"), val = tensor([0, 1, 3, 2])]; int32 var_7858 = const()[name = string("op_7858"), val = int32(-1)]; fp16 const_200_promoted_to_fp16 = const()[name = string("const_200_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_85 = transpose(perm = var_7845, x = var_7840)[name = string("transpose_31")]; tensor var_7864_cast_fp16 = mul(x = k_85, y = const_200_promoted_to_fp16)[name = string("op_7864_cast_fp16")]; bool input_287_interleave_0 = const()[name = string("input_287_interleave_0"), val = bool(false)]; tensor input_287_cast_fp16 = concat(axis = var_7858, interleave = input_287_interleave_0, values = (k_85, var_7864_cast_fp16))[name = string("input_287_cast_fp16")]; tensor normed_403_axes_0 = const()[name = string("normed_403_axes_0"), val = tensor([-1])]; fp16 var_7856_to_fp16 = const()[name = string("op_7856_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_403_cast_fp16 = layer_norm(axes = normed_403_axes_0, epsilon = var_7856_to_fp16, x = input_287_cast_fp16)[name = string("normed_403_cast_fp16")]; tensor var_7869_split_sizes_0 = const()[name = string("op_7869_split_sizes_0"), val = tensor([256, 256])]; int32 var_7869_axis_0 = const()[name = string("op_7869_axis_0"), val = int32(-1)]; tensor var_7869_cast_fp16_0, tensor var_7869_cast_fp16_1 = split(axis = var_7869_axis_0, split_sizes = var_7869_split_sizes_0, x = normed_403_cast_fp16)[name = string("op_7869_cast_fp16")]; tensor var_7873_to_fp16 = const()[name = string("op_7873_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270525504)))]; tensor out_173_cast_fp16 = mul(x = var_7869_cast_fp16_0, y = var_7873_to_fp16)[name = string("out_173_cast_fp16")]; string var_7886_pad_type_0 = const()[name = string("op_7886_pad_type_0"), val = string("valid")]; tensor var_7886_strides_0 = const()[name = string("op_7886_strides_0"), val = tensor([1, 1])]; tensor var_7886_pad_0 = const()[name = string("op_7886_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7886_dilations_0 = const()[name = string("op_7886_dilations_0"), val = tensor([1, 1])]; int32 var_7886_groups_0 = const()[name = string("op_7886_groups_0"), val = int32(1)]; tensor var_7886 = conv(dilations = var_7886_dilations_0, groups = var_7886_groups_0, pad = var_7886_pad_0, pad_type = var_7886_pad_type_0, strides = var_7886_strides_0, weight = layers_14_self_attn_v_proj_weight_quantized, x = input_283)[name = string("op_7886")]; tensor var_7891 = const()[name = string("op_7891"), val = tensor([1, 1, 256, 32])]; tensor var_7892 = reshape(shape = var_7891, x = var_7886)[name = string("op_7892")]; tensor var_7899 = mul(x = out_171_cast_fp16, y = cos_1)[name = string("op_7899")]; tensor var_7900_split_sizes_0 = const()[name = string("op_7900_split_sizes_0"), val = tensor([128, 128])]; int32 var_7900_axis_0 = const()[name = string("op_7900_axis_0"), val = int32(-1)]; tensor var_7900_0, tensor var_7900_1 = split(axis = var_7900_axis_0, split_sizes = var_7900_split_sizes_0, x = out_171_cast_fp16)[name = string("op_7900")]; fp16 const_202_promoted = const()[name = string("const_202_promoted"), val = fp16(-0x1p+0)]; tensor var_7902 = mul(x = var_7900_1, y = const_202_promoted)[name = string("op_7902")]; int32 var_7904 = const()[name = string("op_7904"), val = int32(-1)]; bool var_7905_interleave_0 = const()[name = string("op_7905_interleave_0"), val = bool(false)]; tensor var_7905 = concat(axis = var_7904, interleave = var_7905_interleave_0, values = (var_7902, var_7900_0))[name = string("op_7905")]; tensor var_7906 = mul(x = var_7905, y = sin_1)[name = string("op_7906")]; tensor q_89 = add(x = var_7899, y = var_7906)[name = string("q_89")]; tensor var_7909 = mul(x = out_173_cast_fp16, y = cos_1)[name = string("op_7909")]; tensor var_7910_split_sizes_0 = const()[name = string("op_7910_split_sizes_0"), val = tensor([128, 128])]; int32 var_7910_axis_0 = const()[name = string("op_7910_axis_0"), val = int32(-1)]; tensor var_7910_0, tensor var_7910_1 = split(axis = var_7910_axis_0, split_sizes = var_7910_split_sizes_0, x = out_173_cast_fp16)[name = string("op_7910")]; fp16 const_203_promoted = const()[name = string("const_203_promoted"), val = fp16(-0x1p+0)]; tensor var_7912 = mul(x = var_7910_1, y = const_203_promoted)[name = string("op_7912")]; int32 var_7914 = const()[name = string("op_7914"), val = int32(-1)]; bool var_7915_interleave_0 = const()[name = string("op_7915_interleave_0"), val = bool(false)]; tensor var_7915 = concat(axis = var_7914, interleave = var_7915_interleave_0, values = (var_7912, var_7910_0))[name = string("op_7915")]; tensor var_7916 = mul(x = var_7915, y = sin_1)[name = string("op_7916")]; tensor k_89 = add(x = var_7909, y = var_7916)[name = string("k_89")]; tensor var_7921_begin_0 = const()[name = string("op_7921_begin_0"), val = tensor([14, 0, 0, 0])]; tensor var_7921_end_0 = const()[name = string("op_7921_end_0"), val = tensor([15, 1, 2048, 256])]; tensor var_7921_end_mask_0 = const()[name = string("op_7921_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7921_squeeze_mask_0 = const()[name = string("op_7921_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_7921_cast_fp16 = slice_by_index(begin = var_7921_begin_0, end = var_7921_end_0, end_mask = var_7921_end_mask_0, squeeze_mask = var_7921_squeeze_mask_0, x = coreml_update_state_63)[name = string("op_7921_cast_fp16")]; tensor K_cache_29_axes_0 = const()[name = string("K_cache_29_axes_0"), val = tensor([0])]; tensor K_cache_29_cast_fp16 = expand_dims(axes = K_cache_29_axes_0, x = var_7921_cast_fp16)[name = string("K_cache_29_cast_fp16")]; tensor var_7926_begin_0 = const()[name = string("op_7926_begin_0"), val = tensor([32, 0, 0, 0])]; tensor var_7926_end_0 = const()[name = string("op_7926_end_0"), val = tensor([33, 1, 2048, 256])]; tensor var_7926_end_mask_0 = const()[name = string("op_7926_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7926_squeeze_mask_0 = const()[name = string("op_7926_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_7926_cast_fp16 = slice_by_index(begin = var_7926_begin_0, end = var_7926_end_0, end_mask = var_7926_end_mask_0, squeeze_mask = var_7926_squeeze_mask_0, x = coreml_update_state_63)[name = string("op_7926_cast_fp16")]; tensor V_cache_29_axes_0 = const()[name = string("V_cache_29_axes_0"), val = tensor([0])]; tensor V_cache_29_cast_fp16 = expand_dims(axes = V_cache_29_axes_0, x = var_7926_cast_fp16)[name = string("V_cache_29_cast_fp16")]; bool k_increment_29_transpose_x_0 = const()[name = string("k_increment_29_transpose_x_0"), val = bool(false)]; bool k_increment_29_transpose_y_0 = const()[name = string("k_increment_29_transpose_y_0"), val = bool(false)]; tensor k_increment_29 = matmul(transpose_x = k_increment_29_transpose_x_0, transpose_y = k_increment_29_transpose_y_0, x = update_mask, y = k_89)[name = string("k_increment_29")]; bool v_increment_29_transpose_x_1 = const()[name = string("v_increment_29_transpose_x_1"), val = bool(false)]; bool v_increment_29_transpose_y_1 = const()[name = string("v_increment_29_transpose_y_1"), val = bool(true)]; tensor v_increment_29 = matmul(transpose_x = v_increment_29_transpose_x_1, transpose_y = v_increment_29_transpose_y_1, x = update_mask, y = var_7892)[name = string("v_increment_29")]; tensor var_7944_cast_fp16 = mul(x = K_cache_29_cast_fp16, y = var_1125_cast_fp16)[name = string("op_7944_cast_fp16")]; tensor K_new_29_cast_fp16 = add(x = var_7944_cast_fp16, y = k_increment_29)[name = string("K_new_29_cast_fp16")]; tensor var_7950_cast_fp16 = mul(x = V_cache_29_cast_fp16, y = var_1125_cast_fp16)[name = string("op_7950_cast_fp16")]; tensor V_new_29_cast_fp16 = add(x = var_7950_cast_fp16, y = v_increment_29)[name = string("V_new_29_cast_fp16")]; tensor var_7954_axes_0 = const()[name = string("op_7954_axes_0"), val = tensor([0])]; tensor var_7954_cast_fp16 = squeeze(axes = var_7954_axes_0, x = K_new_29_cast_fp16)[name = string("op_7954_cast_fp16")]; tensor concat_56 = const()[name = string("concat_56"), val = tensor([14, 0, 0, 0])]; tensor concat_57 = const()[name = string("concat_57"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_29_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_29_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_29_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_56, begin_mask = kv_cache_0_internal_tensor_assign_29_begin_mask_0, end = concat_57, end_mask = kv_cache_0_internal_tensor_assign_29_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_29_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_29_stride_0, update = var_7954_cast_fp16, x = coreml_update_state_63)[name = string("kv_cache_0_internal_tensor_assign_29_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_29_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_64_write_state")]; tensor coreml_update_state_64 = read_state(input = kv_cache_0)[name = string("coreml_update_state_64")]; tensor var_7961_axes_0 = const()[name = string("op_7961_axes_0"), val = tensor([0])]; tensor var_7961_cast_fp16 = squeeze(axes = var_7961_axes_0, x = V_new_29_cast_fp16)[name = string("op_7961_cast_fp16")]; tensor concat_58 = const()[name = string("concat_58"), val = tensor([32, 0, 0, 0])]; tensor concat_59 = const()[name = string("concat_59"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_30_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_30_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_30_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_58, begin_mask = kv_cache_0_internal_tensor_assign_30_begin_mask_0, end = concat_59, end_mask = kv_cache_0_internal_tensor_assign_30_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_30_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_30_stride_0, update = var_7961_cast_fp16, x = coreml_update_state_64)[name = string("kv_cache_0_internal_tensor_assign_30_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_30_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_65_write_state")]; tensor coreml_update_state_65 = read_state(input = kv_cache_0)[name = string("coreml_update_state_65")]; tensor hidden_states_113_axes_0 = const()[name = string("hidden_states_113_axes_0"), val = tensor([2])]; tensor hidden_states_113_cast_fp16 = expand_dims(axes = hidden_states_113_axes_0, x = K_new_29_cast_fp16)[name = string("hidden_states_113_cast_fp16")]; tensor var_7974 = const()[name = string("op_7974"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_115_cast_fp16 = tile(reps = var_7974, x = hidden_states_113_cast_fp16)[name = string("hidden_states_115_cast_fp16")]; tensor var_7980 = const()[name = string("op_7980"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_29_cast_fp16 = reshape(shape = var_7980, x = hidden_states_115_cast_fp16)[name = string("K_expanded_29_cast_fp16")]; tensor hidden_states_117_axes_0 = const()[name = string("hidden_states_117_axes_0"), val = tensor([2])]; tensor hidden_states_117_cast_fp16 = expand_dims(axes = hidden_states_117_axes_0, x = V_new_29_cast_fp16)[name = string("hidden_states_117_cast_fp16")]; tensor var_7989 = const()[name = string("op_7989"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_119_cast_fp16 = tile(reps = var_7989, x = hidden_states_117_cast_fp16)[name = string("hidden_states_119_cast_fp16")]; tensor var_7995 = const()[name = string("op_7995"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_29_cast_fp16 = reshape(shape = var_7995, x = hidden_states_119_cast_fp16)[name = string("V_expanded_29_cast_fp16")]; bool var_8010_transpose_x_1 = const()[name = string("op_8010_transpose_x_1"), val = bool(false)]; bool var_8010_transpose_y_1 = const()[name = string("op_8010_transpose_y_1"), val = bool(true)]; tensor var_8010_cast_fp16 = matmul(transpose_x = var_8010_transpose_x_1, transpose_y = var_8010_transpose_y_1, x = q_89, y = K_expanded_29_cast_fp16)[name = string("op_8010_cast_fp16")]; fp16 var_8011_to_fp16 = const()[name = string("op_8011_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_85_cast_fp16 = mul(x = var_8010_cast_fp16, y = var_8011_to_fp16)[name = string("attn_weights_85_cast_fp16")]; tensor attn_weights_87_cast_fp16 = add(x = attn_weights_85_cast_fp16, y = causal_mask)[name = string("attn_weights_87_cast_fp16")]; int32 var_8020 = const()[name = string("op_8020"), val = int32(-1)]; tensor var_8022_cast_fp16 = softmax(axis = var_8020, x = attn_weights_87_cast_fp16)[name = string("op_8022_cast_fp16")]; bool var_8038_transpose_x_0 = const()[name = string("op_8038_transpose_x_0"), val = bool(false)]; bool var_8038_transpose_y_0 = const()[name = string("op_8038_transpose_y_0"), val = bool(false)]; tensor var_8038_cast_fp16 = matmul(transpose_x = var_8038_transpose_x_0, transpose_y = var_8038_transpose_y_0, x = var_8022_cast_fp16, y = V_expanded_29_cast_fp16)[name = string("op_8038_cast_fp16")]; tensor var_8048 = const()[name = string("op_8048"), val = tensor([0, 2, 1, 3])]; tensor var_8055 = const()[name = string("op_8055"), val = tensor([1, 32, 1024])]; tensor var_8049 = transpose(perm = var_8048, x = var_8038_cast_fp16)[name = string("transpose_30")]; tensor attn_output_87 = reshape(shape = var_8055, x = var_8049)[name = string("attn_output_87")]; tensor var_8060 = const()[name = string("op_8060"), val = tensor([0, 2, 1])]; tensor squeeze_14_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270526080))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271181504))))[name = string("squeeze_14_quantized")]; string var_8076_pad_type_0 = const()[name = string("op_8076_pad_type_0"), val = string("valid")]; int32 var_8076_groups_0 = const()[name = string("op_8076_groups_0"), val = int32(1)]; tensor var_8076_strides_0 = const()[name = string("op_8076_strides_0"), val = tensor([1])]; tensor var_8076_pad_0 = const()[name = string("op_8076_pad_0"), val = tensor([0, 0])]; tensor var_8076_dilations_0 = const()[name = string("op_8076_dilations_0"), val = tensor([1])]; tensor var_8061 = transpose(perm = var_8060, x = attn_output_87)[name = string("transpose_29")]; tensor var_8076 = conv(dilations = var_8076_dilations_0, groups = var_8076_groups_0, pad = var_8076_pad_0, pad_type = var_8076_pad_type_0, strides = var_8076_strides_0, weight = squeeze_14_quantized, x = var_8061)[name = string("op_8076")]; tensor var_8080 = const()[name = string("op_8080"), val = tensor([0, 2, 1])]; int32 var_8087 = const()[name = string("op_8087"), val = int32(-1)]; fp16 const_204_promoted_to_fp16 = const()[name = string("const_204_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_233 = transpose(perm = var_8080, x = var_8076)[name = string("transpose_28")]; tensor var_8093_cast_fp16 = mul(x = x_233, y = const_204_promoted_to_fp16)[name = string("op_8093_cast_fp16")]; bool input_291_interleave_0 = const()[name = string("input_291_interleave_0"), val = bool(false)]; tensor input_291_cast_fp16 = concat(axis = var_8087, interleave = input_291_interleave_0, values = (x_233, var_8093_cast_fp16))[name = string("input_291_cast_fp16")]; tensor normed_407_axes_0 = const()[name = string("normed_407_axes_0"), val = tensor([-1])]; fp16 var_8085_to_fp16 = const()[name = string("op_8085_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_407_cast_fp16 = layer_norm(axes = normed_407_axes_0, epsilon = var_8085_to_fp16, x = input_291_cast_fp16)[name = string("normed_407_cast_fp16")]; tensor var_8098_split_sizes_0 = const()[name = string("op_8098_split_sizes_0"), val = tensor([640, 640])]; int32 var_8098_axis_0 = const()[name = string("op_8098_axis_0"), val = int32(-1)]; tensor var_8098_cast_fp16_0, tensor var_8098_cast_fp16_1 = split(axis = var_8098_axis_0, split_sizes = var_8098_split_sizes_0, x = normed_407_cast_fp16)[name = string("op_8098_cast_fp16")]; tensor var_8102_to_fp16 = const()[name = string("op_8102_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271182848)))]; tensor out_175_cast_fp16 = mul(x = var_8098_cast_fp16_0, y = var_8102_to_fp16)[name = string("out_175_cast_fp16")]; tensor x_235_cast_fp16 = add(x = x_225_cast_fp16, y = out_175_cast_fp16)[name = string("x_235_cast_fp16")]; int32 var_8116 = const()[name = string("op_8116"), val = int32(-1)]; fp16 const_206_promoted_to_fp16 = const()[name = string("const_206_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8122_cast_fp16 = mul(x = x_235_cast_fp16, y = const_206_promoted_to_fp16)[name = string("op_8122_cast_fp16")]; bool input_293_interleave_0 = const()[name = string("input_293_interleave_0"), val = bool(false)]; tensor input_293_cast_fp16 = concat(axis = var_8116, interleave = input_293_interleave_0, values = (x_235_cast_fp16, var_8122_cast_fp16))[name = string("input_293_cast_fp16")]; tensor normed_411_axes_0 = const()[name = string("normed_411_axes_0"), val = tensor([-1])]; fp16 var_8114_to_fp16 = const()[name = string("op_8114_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_411_cast_fp16 = layer_norm(axes = normed_411_axes_0, epsilon = var_8114_to_fp16, x = input_293_cast_fp16)[name = string("normed_411_cast_fp16")]; tensor var_8127_split_sizes_0 = const()[name = string("op_8127_split_sizes_0"), val = tensor([640, 640])]; int32 var_8127_axis_0 = const()[name = string("op_8127_axis_0"), val = int32(-1)]; tensor var_8127_cast_fp16_0, tensor var_8127_cast_fp16_1 = split(axis = var_8127_axis_0, split_sizes = var_8127_split_sizes_0, x = normed_411_cast_fp16)[name = string("op_8127_cast_fp16")]; tensor var_8131_to_fp16 = const()[name = string("op_8131_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271184192)))]; tensor out_177_cast_fp16 = mul(x = var_8127_cast_fp16_0, y = var_8131_to_fp16)[name = string("out_177_cast_fp16")]; tensor var_8145 = const()[name = string("op_8145"), val = tensor([0, 2, 1])]; tensor input_295_axes_0 = const()[name = string("input_295_axes_0"), val = tensor([2])]; tensor var_8146 = transpose(perm = var_8145, x = out_177_cast_fp16)[name = string("transpose_27")]; tensor input_295 = expand_dims(axes = input_295_axes_0, x = var_8146)[name = string("input_295")]; string gate_57_pad_type_0 = const()[name = string("gate_57_pad_type_0"), val = string("valid")]; tensor gate_57_strides_0 = const()[name = string("gate_57_strides_0"), val = tensor([1, 1])]; tensor gate_57_pad_0 = const()[name = string("gate_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_57_dilations_0 = const()[name = string("gate_57_dilations_0"), val = tensor([1, 1])]; int32 gate_57_groups_0 = const()[name = string("gate_57_groups_0"), val = int32(1)]; tensor gate_57 = conv(dilations = gate_57_dilations_0, groups = gate_57_groups_0, pad = gate_57_pad_0, pad_type = gate_57_pad_type_0, strides = gate_57_strides_0, weight = layers_14_mlp_gate_proj_weight_quantized, x = input_295)[name = string("gate_57")]; string up_29_pad_type_0 = const()[name = string("up_29_pad_type_0"), val = string("valid")]; tensor up_29_strides_0 = const()[name = string("up_29_strides_0"), val = tensor([1, 1])]; tensor up_29_pad_0 = const()[name = string("up_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_29_dilations_0 = const()[name = string("up_29_dilations_0"), val = tensor([1, 1])]; int32 up_29_groups_0 = const()[name = string("up_29_groups_0"), val = int32(1)]; tensor up_29 = conv(dilations = up_29_dilations_0, groups = up_29_groups_0, pad = up_29_pad_0, pad_type = up_29_pad_type_0, strides = up_29_strides_0, weight = layers_14_mlp_up_proj_weight_quantized, x = input_295)[name = string("up_29")]; string gate_59_mode_0 = const()[name = string("gate_59_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_59 = gelu(mode = gate_59_mode_0, x = gate_57)[name = string("gate_59")]; tensor input_297 = mul(x = gate_59, y = up_29)[name = string("input_297")]; string var_8184_pad_type_0 = const()[name = string("op_8184_pad_type_0"), val = string("valid")]; tensor var_8184_strides_0 = const()[name = string("op_8184_strides_0"), val = tensor([1, 1])]; tensor var_8184_pad_0 = const()[name = string("op_8184_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8184_dilations_0 = const()[name = string("op_8184_dilations_0"), val = tensor([1, 1])]; int32 var_8184_groups_0 = const()[name = string("op_8184_groups_0"), val = int32(1)]; tensor var_8184 = conv(dilations = var_8184_dilations_0, groups = var_8184_groups_0, pad = var_8184_pad_0, pad_type = var_8184_pad_type_0, strides = var_8184_strides_0, weight = layers_14_mlp_down_proj_weight_quantized, x = input_297)[name = string("op_8184")]; tensor var_8186_axes_0 = const()[name = string("op_8186_axes_0"), val = tensor([2])]; tensor var_8186 = squeeze(axes = var_8186_axes_0, x = var_8184)[name = string("op_8186")]; tensor var_8190 = const()[name = string("op_8190"), val = tensor([0, 2, 1])]; int32 var_8197 = const()[name = string("op_8197"), val = int32(-1)]; fp16 const_208_promoted_to_fp16 = const()[name = string("const_208_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_239 = transpose(perm = var_8190, x = var_8186)[name = string("transpose_26")]; tensor var_8203_cast_fp16 = mul(x = x_239, y = const_208_promoted_to_fp16)[name = string("op_8203_cast_fp16")]; bool input_299_interleave_0 = const()[name = string("input_299_interleave_0"), val = bool(false)]; tensor input_299_cast_fp16 = concat(axis = var_8197, interleave = input_299_interleave_0, values = (x_239, var_8203_cast_fp16))[name = string("input_299_cast_fp16")]; tensor normed_417_axes_0 = const()[name = string("normed_417_axes_0"), val = tensor([-1])]; fp16 var_8195_to_fp16 = const()[name = string("op_8195_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_417_cast_fp16 = layer_norm(axes = normed_417_axes_0, epsilon = var_8195_to_fp16, x = input_299_cast_fp16)[name = string("normed_417_cast_fp16")]; tensor var_8208_split_sizes_0 = const()[name = string("op_8208_split_sizes_0"), val = tensor([640, 640])]; int32 var_8208_axis_0 = const()[name = string("op_8208_axis_0"), val = int32(-1)]; tensor var_8208_cast_fp16_0, tensor var_8208_cast_fp16_1 = split(axis = var_8208_axis_0, split_sizes = var_8208_split_sizes_0, x = normed_417_cast_fp16)[name = string("op_8208_cast_fp16")]; tensor var_8212_to_fp16 = const()[name = string("op_8212_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271185536)))]; tensor out_179_cast_fp16 = mul(x = var_8208_cast_fp16_0, y = var_8212_to_fp16)[name = string("out_179_cast_fp16")]; tensor x_241_cast_fp16 = add(x = x_235_cast_fp16, y = out_179_cast_fp16)[name = string("x_241_cast_fp16")]; int32 var_8226 = const()[name = string("op_8226"), val = int32(-1)]; fp16 const_210_promoted_to_fp16 = const()[name = string("const_210_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8232_cast_fp16 = mul(x = x_241_cast_fp16, y = const_210_promoted_to_fp16)[name = string("op_8232_cast_fp16")]; bool input_301_interleave_0 = const()[name = string("input_301_interleave_0"), val = bool(false)]; tensor input_301_cast_fp16 = concat(axis = var_8226, interleave = input_301_interleave_0, values = (x_241_cast_fp16, var_8232_cast_fp16))[name = string("input_301_cast_fp16")]; tensor normed_421_axes_0 = const()[name = string("normed_421_axes_0"), val = tensor([-1])]; fp16 var_8224_to_fp16 = const()[name = string("op_8224_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_421_cast_fp16 = layer_norm(axes = normed_421_axes_0, epsilon = var_8224_to_fp16, x = input_301_cast_fp16)[name = string("normed_421_cast_fp16")]; tensor var_8237_split_sizes_0 = const()[name = string("op_8237_split_sizes_0"), val = tensor([640, 640])]; int32 var_8237_axis_0 = const()[name = string("op_8237_axis_0"), val = int32(-1)]; tensor var_8237_cast_fp16_0, tensor var_8237_cast_fp16_1 = split(axis = var_8237_axis_0, split_sizes = var_8237_split_sizes_0, x = normed_421_cast_fp16)[name = string("op_8237_cast_fp16")]; tensor var_8241_to_fp16 = const()[name = string("op_8241_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271186880)))]; tensor out_181_cast_fp16 = mul(x = var_8237_cast_fp16_0, y = var_8241_to_fp16)[name = string("out_181_cast_fp16")]; tensor var_8255 = const()[name = string("op_8255"), val = tensor([0, 2, 1])]; tensor input_303_axes_0 = const()[name = string("input_303_axes_0"), val = tensor([2])]; tensor var_8256 = transpose(perm = var_8255, x = out_181_cast_fp16)[name = string("transpose_25")]; tensor input_303 = expand_dims(axes = input_303_axes_0, x = var_8256)[name = string("input_303")]; string var_8269_pad_type_0 = const()[name = string("op_8269_pad_type_0"), val = string("valid")]; tensor var_8269_strides_0 = const()[name = string("op_8269_strides_0"), val = tensor([1, 1])]; tensor var_8269_pad_0 = const()[name = string("op_8269_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8269_dilations_0 = const()[name = string("op_8269_dilations_0"), val = tensor([1, 1])]; int32 var_8269_groups_0 = const()[name = string("op_8269_groups_0"), val = int32(1)]; tensor var_8269 = conv(dilations = var_8269_dilations_0, groups = var_8269_groups_0, pad = var_8269_pad_0, pad_type = var_8269_pad_type_0, strides = var_8269_strides_0, weight = layers_15_self_attn_q_proj_weight_quantized, x = input_303)[name = string("op_8269")]; tensor var_8274 = const()[name = string("op_8274"), val = tensor([1, 4, 256, 32])]; tensor var_8275 = reshape(shape = var_8274, x = var_8269)[name = string("op_8275")]; tensor var_8280 = const()[name = string("op_8280"), val = tensor([0, 1, 3, 2])]; int32 var_8293 = const()[name = string("op_8293"), val = int32(-1)]; fp16 const_212_promoted_to_fp16 = const()[name = string("const_212_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_91 = transpose(perm = var_8280, x = var_8275)[name = string("transpose_24")]; tensor var_8299_cast_fp16 = mul(x = q_91, y = const_212_promoted_to_fp16)[name = string("op_8299_cast_fp16")]; bool input_305_interleave_0 = const()[name = string("input_305_interleave_0"), val = bool(false)]; tensor input_305_cast_fp16 = concat(axis = var_8293, interleave = input_305_interleave_0, values = (q_91, var_8299_cast_fp16))[name = string("input_305_cast_fp16")]; tensor normed_427_axes_0 = const()[name = string("normed_427_axes_0"), val = tensor([-1])]; fp16 var_8291_to_fp16 = const()[name = string("op_8291_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_427_cast_fp16 = layer_norm(axes = normed_427_axes_0, epsilon = var_8291_to_fp16, x = input_305_cast_fp16)[name = string("normed_427_cast_fp16")]; tensor var_8304_split_sizes_0 = const()[name = string("op_8304_split_sizes_0"), val = tensor([256, 256])]; int32 var_8304_axis_0 = const()[name = string("op_8304_axis_0"), val = int32(-1)]; tensor var_8304_cast_fp16_0, tensor var_8304_cast_fp16_1 = split(axis = var_8304_axis_0, split_sizes = var_8304_split_sizes_0, x = normed_427_cast_fp16)[name = string("op_8304_cast_fp16")]; tensor var_8308_to_fp16 = const()[name = string("op_8308_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271188224)))]; tensor out_183_cast_fp16 = mul(x = var_8304_cast_fp16_0, y = var_8308_to_fp16)[name = string("out_183_cast_fp16")]; string var_8321_pad_type_0 = const()[name = string("op_8321_pad_type_0"), val = string("valid")]; tensor var_8321_strides_0 = const()[name = string("op_8321_strides_0"), val = tensor([1, 1])]; tensor var_8321_pad_0 = const()[name = string("op_8321_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8321_dilations_0 = const()[name = string("op_8321_dilations_0"), val = tensor([1, 1])]; int32 var_8321_groups_0 = const()[name = string("op_8321_groups_0"), val = int32(1)]; tensor var_8321 = conv(dilations = var_8321_dilations_0, groups = var_8321_groups_0, pad = var_8321_pad_0, pad_type = var_8321_pad_type_0, strides = var_8321_strides_0, weight = layers_15_self_attn_k_proj_weight_quantized, x = input_303)[name = string("op_8321")]; tensor var_8326 = const()[name = string("op_8326"), val = tensor([1, 1, 256, 32])]; tensor var_8327 = reshape(shape = var_8326, x = var_8321)[name = string("op_8327")]; tensor var_8332 = const()[name = string("op_8332"), val = tensor([0, 1, 3, 2])]; int32 var_8345 = const()[name = string("op_8345"), val = int32(-1)]; fp16 const_214_promoted_to_fp16 = const()[name = string("const_214_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_91 = transpose(perm = var_8332, x = var_8327)[name = string("transpose_23")]; tensor var_8351_cast_fp16 = mul(x = k_91, y = const_214_promoted_to_fp16)[name = string("op_8351_cast_fp16")]; bool input_307_interleave_0 = const()[name = string("input_307_interleave_0"), val = bool(false)]; tensor input_307_cast_fp16 = concat(axis = var_8345, interleave = input_307_interleave_0, values = (k_91, var_8351_cast_fp16))[name = string("input_307_cast_fp16")]; tensor normed_431_axes_0 = const()[name = string("normed_431_axes_0"), val = tensor([-1])]; fp16 var_8343_to_fp16 = const()[name = string("op_8343_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_431_cast_fp16 = layer_norm(axes = normed_431_axes_0, epsilon = var_8343_to_fp16, x = input_307_cast_fp16)[name = string("normed_431_cast_fp16")]; tensor var_8356_split_sizes_0 = const()[name = string("op_8356_split_sizes_0"), val = tensor([256, 256])]; int32 var_8356_axis_0 = const()[name = string("op_8356_axis_0"), val = int32(-1)]; tensor var_8356_cast_fp16_0, tensor var_8356_cast_fp16_1 = split(axis = var_8356_axis_0, split_sizes = var_8356_split_sizes_0, x = normed_431_cast_fp16)[name = string("op_8356_cast_fp16")]; tensor var_8360_to_fp16 = const()[name = string("op_8360_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271188800)))]; tensor out_185_cast_fp16 = mul(x = var_8356_cast_fp16_0, y = var_8360_to_fp16)[name = string("out_185_cast_fp16")]; string var_8373_pad_type_0 = const()[name = string("op_8373_pad_type_0"), val = string("valid")]; tensor var_8373_strides_0 = const()[name = string("op_8373_strides_0"), val = tensor([1, 1])]; tensor var_8373_pad_0 = const()[name = string("op_8373_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8373_dilations_0 = const()[name = string("op_8373_dilations_0"), val = tensor([1, 1])]; int32 var_8373_groups_0 = const()[name = string("op_8373_groups_0"), val = int32(1)]; tensor var_8373 = conv(dilations = var_8373_dilations_0, groups = var_8373_groups_0, pad = var_8373_pad_0, pad_type = var_8373_pad_type_0, strides = var_8373_strides_0, weight = layers_15_self_attn_v_proj_weight_quantized, x = input_303)[name = string("op_8373")]; tensor var_8378 = const()[name = string("op_8378"), val = tensor([1, 1, 256, 32])]; tensor var_8379 = reshape(shape = var_8378, x = var_8373)[name = string("op_8379")]; tensor var_8386 = mul(x = out_183_cast_fp16, y = cos_1)[name = string("op_8386")]; tensor var_8387_split_sizes_0 = const()[name = string("op_8387_split_sizes_0"), val = tensor([128, 128])]; int32 var_8387_axis_0 = const()[name = string("op_8387_axis_0"), val = int32(-1)]; tensor var_8387_0, tensor var_8387_1 = split(axis = var_8387_axis_0, split_sizes = var_8387_split_sizes_0, x = out_183_cast_fp16)[name = string("op_8387")]; fp16 const_216_promoted = const()[name = string("const_216_promoted"), val = fp16(-0x1p+0)]; tensor var_8389 = mul(x = var_8387_1, y = const_216_promoted)[name = string("op_8389")]; int32 var_8391 = const()[name = string("op_8391"), val = int32(-1)]; bool var_8392_interleave_0 = const()[name = string("op_8392_interleave_0"), val = bool(false)]; tensor var_8392 = concat(axis = var_8391, interleave = var_8392_interleave_0, values = (var_8389, var_8387_0))[name = string("op_8392")]; tensor var_8393 = mul(x = var_8392, y = sin_1)[name = string("op_8393")]; tensor q_95 = add(x = var_8386, y = var_8393)[name = string("q_95")]; tensor var_8396 = mul(x = out_185_cast_fp16, y = cos_1)[name = string("op_8396")]; tensor var_8397_split_sizes_0 = const()[name = string("op_8397_split_sizes_0"), val = tensor([128, 128])]; int32 var_8397_axis_0 = const()[name = string("op_8397_axis_0"), val = int32(-1)]; tensor var_8397_0, tensor var_8397_1 = split(axis = var_8397_axis_0, split_sizes = var_8397_split_sizes_0, x = out_185_cast_fp16)[name = string("op_8397")]; fp16 const_217_promoted = const()[name = string("const_217_promoted"), val = fp16(-0x1p+0)]; tensor var_8399 = mul(x = var_8397_1, y = const_217_promoted)[name = string("op_8399")]; int32 var_8401 = const()[name = string("op_8401"), val = int32(-1)]; bool var_8402_interleave_0 = const()[name = string("op_8402_interleave_0"), val = bool(false)]; tensor var_8402 = concat(axis = var_8401, interleave = var_8402_interleave_0, values = (var_8399, var_8397_0))[name = string("op_8402")]; tensor var_8403 = mul(x = var_8402, y = sin_1)[name = string("op_8403")]; tensor k_95 = add(x = var_8396, y = var_8403)[name = string("k_95")]; tensor var_8408_begin_0 = const()[name = string("op_8408_begin_0"), val = tensor([15, 0, 0, 0])]; tensor var_8408_end_0 = const()[name = string("op_8408_end_0"), val = tensor([16, 1, 2048, 256])]; tensor var_8408_end_mask_0 = const()[name = string("op_8408_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8408_squeeze_mask_0 = const()[name = string("op_8408_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_8408_cast_fp16 = slice_by_index(begin = var_8408_begin_0, end = var_8408_end_0, end_mask = var_8408_end_mask_0, squeeze_mask = var_8408_squeeze_mask_0, x = coreml_update_state_65)[name = string("op_8408_cast_fp16")]; tensor K_cache_31_axes_0 = const()[name = string("K_cache_31_axes_0"), val = tensor([0])]; tensor K_cache_31_cast_fp16 = expand_dims(axes = K_cache_31_axes_0, x = var_8408_cast_fp16)[name = string("K_cache_31_cast_fp16")]; tensor var_8413_begin_0 = const()[name = string("op_8413_begin_0"), val = tensor([33, 0, 0, 0])]; tensor var_8413_end_0 = const()[name = string("op_8413_end_0"), val = tensor([34, 1, 2048, 256])]; tensor var_8413_end_mask_0 = const()[name = string("op_8413_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8413_squeeze_mask_0 = const()[name = string("op_8413_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_8413_cast_fp16 = slice_by_index(begin = var_8413_begin_0, end = var_8413_end_0, end_mask = var_8413_end_mask_0, squeeze_mask = var_8413_squeeze_mask_0, x = coreml_update_state_65)[name = string("op_8413_cast_fp16")]; tensor V_cache_31_axes_0 = const()[name = string("V_cache_31_axes_0"), val = tensor([0])]; tensor V_cache_31_cast_fp16 = expand_dims(axes = V_cache_31_axes_0, x = var_8413_cast_fp16)[name = string("V_cache_31_cast_fp16")]; bool k_increment_31_transpose_x_0 = const()[name = string("k_increment_31_transpose_x_0"), val = bool(false)]; bool k_increment_31_transpose_y_0 = const()[name = string("k_increment_31_transpose_y_0"), val = bool(false)]; tensor k_increment_31 = matmul(transpose_x = k_increment_31_transpose_x_0, transpose_y = k_increment_31_transpose_y_0, x = update_mask, y = k_95)[name = string("k_increment_31")]; bool v_increment_31_transpose_x_1 = const()[name = string("v_increment_31_transpose_x_1"), val = bool(false)]; bool v_increment_31_transpose_y_1 = const()[name = string("v_increment_31_transpose_y_1"), val = bool(true)]; tensor v_increment_31 = matmul(transpose_x = v_increment_31_transpose_x_1, transpose_y = v_increment_31_transpose_y_1, x = update_mask, y = var_8379)[name = string("v_increment_31")]; tensor var_8431_cast_fp16 = mul(x = K_cache_31_cast_fp16, y = var_1125_cast_fp16)[name = string("op_8431_cast_fp16")]; tensor K_new_31_cast_fp16 = add(x = var_8431_cast_fp16, y = k_increment_31)[name = string("K_new_31_cast_fp16")]; tensor var_8437_cast_fp16 = mul(x = V_cache_31_cast_fp16, y = var_1125_cast_fp16)[name = string("op_8437_cast_fp16")]; tensor V_new_31_cast_fp16 = add(x = var_8437_cast_fp16, y = v_increment_31)[name = string("V_new_31_cast_fp16")]; tensor var_8441_axes_0 = const()[name = string("op_8441_axes_0"), val = tensor([0])]; tensor var_8441_cast_fp16 = squeeze(axes = var_8441_axes_0, x = K_new_31_cast_fp16)[name = string("op_8441_cast_fp16")]; tensor concat_60 = const()[name = string("concat_60"), val = tensor([15, 0, 0, 0])]; tensor concat_61 = const()[name = string("concat_61"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_31_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_31_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_31_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_60, begin_mask = kv_cache_0_internal_tensor_assign_31_begin_mask_0, end = concat_61, end_mask = kv_cache_0_internal_tensor_assign_31_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_31_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_31_stride_0, update = var_8441_cast_fp16, x = coreml_update_state_65)[name = string("kv_cache_0_internal_tensor_assign_31_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_31_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_66_write_state")]; tensor coreml_update_state_66 = read_state(input = kv_cache_0)[name = string("coreml_update_state_66")]; tensor var_8448_axes_0 = const()[name = string("op_8448_axes_0"), val = tensor([0])]; tensor var_8448_cast_fp16 = squeeze(axes = var_8448_axes_0, x = V_new_31_cast_fp16)[name = string("op_8448_cast_fp16")]; tensor concat_62 = const()[name = string("concat_62"), val = tensor([33, 0, 0, 0])]; tensor concat_63 = const()[name = string("concat_63"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_32_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_32_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_32_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_62, begin_mask = kv_cache_0_internal_tensor_assign_32_begin_mask_0, end = concat_63, end_mask = kv_cache_0_internal_tensor_assign_32_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_32_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_32_stride_0, update = var_8448_cast_fp16, x = coreml_update_state_66)[name = string("kv_cache_0_internal_tensor_assign_32_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_32_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_67_write_state")]; tensor coreml_update_state_67 = read_state(input = kv_cache_0)[name = string("coreml_update_state_67")]; tensor hidden_states_121_axes_0 = const()[name = string("hidden_states_121_axes_0"), val = tensor([2])]; tensor hidden_states_121_cast_fp16 = expand_dims(axes = hidden_states_121_axes_0, x = K_new_31_cast_fp16)[name = string("hidden_states_121_cast_fp16")]; tensor var_8461 = const()[name = string("op_8461"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_123_cast_fp16 = tile(reps = var_8461, x = hidden_states_121_cast_fp16)[name = string("hidden_states_123_cast_fp16")]; tensor var_8467 = const()[name = string("op_8467"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_31_cast_fp16 = reshape(shape = var_8467, x = hidden_states_123_cast_fp16)[name = string("K_expanded_31_cast_fp16")]; tensor hidden_states_125_axes_0 = const()[name = string("hidden_states_125_axes_0"), val = tensor([2])]; tensor hidden_states_125_cast_fp16 = expand_dims(axes = hidden_states_125_axes_0, x = V_new_31_cast_fp16)[name = string("hidden_states_125_cast_fp16")]; tensor var_8476 = const()[name = string("op_8476"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_127_cast_fp16 = tile(reps = var_8476, x = hidden_states_125_cast_fp16)[name = string("hidden_states_127_cast_fp16")]; tensor var_8482 = const()[name = string("op_8482"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_31_cast_fp16 = reshape(shape = var_8482, x = hidden_states_127_cast_fp16)[name = string("V_expanded_31_cast_fp16")]; bool var_8497_transpose_x_1 = const()[name = string("op_8497_transpose_x_1"), val = bool(false)]; bool var_8497_transpose_y_1 = const()[name = string("op_8497_transpose_y_1"), val = bool(true)]; tensor var_8497_cast_fp16 = matmul(transpose_x = var_8497_transpose_x_1, transpose_y = var_8497_transpose_y_1, x = q_95, y = K_expanded_31_cast_fp16)[name = string("op_8497_cast_fp16")]; fp16 var_8498_to_fp16 = const()[name = string("op_8498_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_91_cast_fp16 = mul(x = var_8497_cast_fp16, y = var_8498_to_fp16)[name = string("attn_weights_91_cast_fp16")]; tensor attn_weights_93_cast_fp16 = add(x = attn_weights_91_cast_fp16, y = causal_mask)[name = string("attn_weights_93_cast_fp16")]; int32 var_8507 = const()[name = string("op_8507"), val = int32(-1)]; tensor var_8509_cast_fp16 = softmax(axis = var_8507, x = attn_weights_93_cast_fp16)[name = string("op_8509_cast_fp16")]; bool var_8525_transpose_x_0 = const()[name = string("op_8525_transpose_x_0"), val = bool(false)]; bool var_8525_transpose_y_0 = const()[name = string("op_8525_transpose_y_0"), val = bool(false)]; tensor var_8525_cast_fp16 = matmul(transpose_x = var_8525_transpose_x_0, transpose_y = var_8525_transpose_y_0, x = var_8509_cast_fp16, y = V_expanded_31_cast_fp16)[name = string("op_8525_cast_fp16")]; tensor var_8535 = const()[name = string("op_8535"), val = tensor([0, 2, 1, 3])]; tensor var_8542 = const()[name = string("op_8542"), val = tensor([1, 32, 1024])]; tensor var_8536 = transpose(perm = var_8535, x = var_8525_cast_fp16)[name = string("transpose_22")]; tensor attn_output_93 = reshape(shape = var_8542, x = var_8536)[name = string("attn_output_93")]; tensor var_8547 = const()[name = string("op_8547"), val = tensor([0, 2, 1])]; tensor squeeze_15_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271189376))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271844800))))[name = string("squeeze_15_quantized")]; string var_8563_pad_type_0 = const()[name = string("op_8563_pad_type_0"), val = string("valid")]; int32 var_8563_groups_0 = const()[name = string("op_8563_groups_0"), val = int32(1)]; tensor var_8563_strides_0 = const()[name = string("op_8563_strides_0"), val = tensor([1])]; tensor var_8563_pad_0 = const()[name = string("op_8563_pad_0"), val = tensor([0, 0])]; tensor var_8563_dilations_0 = const()[name = string("op_8563_dilations_0"), val = tensor([1])]; tensor var_8548 = transpose(perm = var_8547, x = attn_output_93)[name = string("transpose_21")]; tensor var_8563 = conv(dilations = var_8563_dilations_0, groups = var_8563_groups_0, pad = var_8563_pad_0, pad_type = var_8563_pad_type_0, strides = var_8563_strides_0, weight = squeeze_15_quantized, x = var_8548)[name = string("op_8563")]; tensor var_8567 = const()[name = string("op_8567"), val = tensor([0, 2, 1])]; int32 var_8574 = const()[name = string("op_8574"), val = int32(-1)]; fp16 const_218_promoted_to_fp16 = const()[name = string("const_218_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_249 = transpose(perm = var_8567, x = var_8563)[name = string("transpose_20")]; tensor var_8580_cast_fp16 = mul(x = x_249, y = const_218_promoted_to_fp16)[name = string("op_8580_cast_fp16")]; bool input_311_interleave_0 = const()[name = string("input_311_interleave_0"), val = bool(false)]; tensor input_311_cast_fp16 = concat(axis = var_8574, interleave = input_311_interleave_0, values = (x_249, var_8580_cast_fp16))[name = string("input_311_cast_fp16")]; tensor normed_435_axes_0 = const()[name = string("normed_435_axes_0"), val = tensor([-1])]; fp16 var_8572_to_fp16 = const()[name = string("op_8572_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_435_cast_fp16 = layer_norm(axes = normed_435_axes_0, epsilon = var_8572_to_fp16, x = input_311_cast_fp16)[name = string("normed_435_cast_fp16")]; tensor var_8585_split_sizes_0 = const()[name = string("op_8585_split_sizes_0"), val = tensor([640, 640])]; int32 var_8585_axis_0 = const()[name = string("op_8585_axis_0"), val = int32(-1)]; tensor var_8585_cast_fp16_0, tensor var_8585_cast_fp16_1 = split(axis = var_8585_axis_0, split_sizes = var_8585_split_sizes_0, x = normed_435_cast_fp16)[name = string("op_8585_cast_fp16")]; tensor var_8589_to_fp16 = const()[name = string("op_8589_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271846144)))]; tensor out_187_cast_fp16 = mul(x = var_8585_cast_fp16_0, y = var_8589_to_fp16)[name = string("out_187_cast_fp16")]; tensor x_251_cast_fp16 = add(x = x_241_cast_fp16, y = out_187_cast_fp16)[name = string("x_251_cast_fp16")]; int32 var_8603 = const()[name = string("op_8603"), val = int32(-1)]; fp16 const_220_promoted_to_fp16 = const()[name = string("const_220_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8609_cast_fp16 = mul(x = x_251_cast_fp16, y = const_220_promoted_to_fp16)[name = string("op_8609_cast_fp16")]; bool input_313_interleave_0 = const()[name = string("input_313_interleave_0"), val = bool(false)]; tensor input_313_cast_fp16 = concat(axis = var_8603, interleave = input_313_interleave_0, values = (x_251_cast_fp16, var_8609_cast_fp16))[name = string("input_313_cast_fp16")]; tensor normed_439_axes_0 = const()[name = string("normed_439_axes_0"), val = tensor([-1])]; fp16 var_8601_to_fp16 = const()[name = string("op_8601_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_439_cast_fp16 = layer_norm(axes = normed_439_axes_0, epsilon = var_8601_to_fp16, x = input_313_cast_fp16)[name = string("normed_439_cast_fp16")]; tensor var_8614_split_sizes_0 = const()[name = string("op_8614_split_sizes_0"), val = tensor([640, 640])]; int32 var_8614_axis_0 = const()[name = string("op_8614_axis_0"), val = int32(-1)]; tensor var_8614_cast_fp16_0, tensor var_8614_cast_fp16_1 = split(axis = var_8614_axis_0, split_sizes = var_8614_split_sizes_0, x = normed_439_cast_fp16)[name = string("op_8614_cast_fp16")]; tensor var_8618_to_fp16 = const()[name = string("op_8618_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271847488)))]; tensor out_189_cast_fp16 = mul(x = var_8614_cast_fp16_0, y = var_8618_to_fp16)[name = string("out_189_cast_fp16")]; tensor var_8632 = const()[name = string("op_8632"), val = tensor([0, 2, 1])]; tensor input_315_axes_0 = const()[name = string("input_315_axes_0"), val = tensor([2])]; tensor var_8633 = transpose(perm = var_8632, x = out_189_cast_fp16)[name = string("transpose_19")]; tensor input_315 = expand_dims(axes = input_315_axes_0, x = var_8633)[name = string("input_315")]; string gate_61_pad_type_0 = const()[name = string("gate_61_pad_type_0"), val = string("valid")]; tensor gate_61_strides_0 = const()[name = string("gate_61_strides_0"), val = tensor([1, 1])]; tensor gate_61_pad_0 = const()[name = string("gate_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_61_dilations_0 = const()[name = string("gate_61_dilations_0"), val = tensor([1, 1])]; int32 gate_61_groups_0 = const()[name = string("gate_61_groups_0"), val = int32(1)]; tensor gate_61 = conv(dilations = gate_61_dilations_0, groups = gate_61_groups_0, pad = gate_61_pad_0, pad_type = gate_61_pad_type_0, strides = gate_61_strides_0, weight = layers_15_mlp_gate_proj_weight_quantized, x = input_315)[name = string("gate_61")]; string up_31_pad_type_0 = const()[name = string("up_31_pad_type_0"), val = string("valid")]; tensor up_31_strides_0 = const()[name = string("up_31_strides_0"), val = tensor([1, 1])]; tensor up_31_pad_0 = const()[name = string("up_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_31_dilations_0 = const()[name = string("up_31_dilations_0"), val = tensor([1, 1])]; int32 up_31_groups_0 = const()[name = string("up_31_groups_0"), val = int32(1)]; tensor up_31 = conv(dilations = up_31_dilations_0, groups = up_31_groups_0, pad = up_31_pad_0, pad_type = up_31_pad_type_0, strides = up_31_strides_0, weight = layers_15_mlp_up_proj_weight_quantized, x = input_315)[name = string("up_31")]; string gate_63_mode_0 = const()[name = string("gate_63_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_63 = gelu(mode = gate_63_mode_0, x = gate_61)[name = string("gate_63")]; tensor input_317 = mul(x = gate_63, y = up_31)[name = string("input_317")]; string var_8671_pad_type_0 = const()[name = string("op_8671_pad_type_0"), val = string("valid")]; tensor var_8671_strides_0 = const()[name = string("op_8671_strides_0"), val = tensor([1, 1])]; tensor var_8671_pad_0 = const()[name = string("op_8671_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8671_dilations_0 = const()[name = string("op_8671_dilations_0"), val = tensor([1, 1])]; int32 var_8671_groups_0 = const()[name = string("op_8671_groups_0"), val = int32(1)]; tensor var_8671 = conv(dilations = var_8671_dilations_0, groups = var_8671_groups_0, pad = var_8671_pad_0, pad_type = var_8671_pad_type_0, strides = var_8671_strides_0, weight = layers_15_mlp_down_proj_weight_quantized, x = input_317)[name = string("op_8671")]; tensor var_8673_axes_0 = const()[name = string("op_8673_axes_0"), val = tensor([2])]; tensor var_8673 = squeeze(axes = var_8673_axes_0, x = var_8671)[name = string("op_8673")]; tensor var_8677 = const()[name = string("op_8677"), val = tensor([0, 2, 1])]; int32 var_8684 = const()[name = string("op_8684"), val = int32(-1)]; fp16 const_222_promoted_to_fp16 = const()[name = string("const_222_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_255 = transpose(perm = var_8677, x = var_8673)[name = string("transpose_18")]; tensor var_8690_cast_fp16 = mul(x = x_255, y = const_222_promoted_to_fp16)[name = string("op_8690_cast_fp16")]; bool input_319_interleave_0 = const()[name = string("input_319_interleave_0"), val = bool(false)]; tensor input_319_cast_fp16 = concat(axis = var_8684, interleave = input_319_interleave_0, values = (x_255, var_8690_cast_fp16))[name = string("input_319_cast_fp16")]; tensor normed_445_axes_0 = const()[name = string("normed_445_axes_0"), val = tensor([-1])]; fp16 var_8682_to_fp16 = const()[name = string("op_8682_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_445_cast_fp16 = layer_norm(axes = normed_445_axes_0, epsilon = var_8682_to_fp16, x = input_319_cast_fp16)[name = string("normed_445_cast_fp16")]; tensor var_8695_split_sizes_0 = const()[name = string("op_8695_split_sizes_0"), val = tensor([640, 640])]; int32 var_8695_axis_0 = const()[name = string("op_8695_axis_0"), val = int32(-1)]; tensor var_8695_cast_fp16_0, tensor var_8695_cast_fp16_1 = split(axis = var_8695_axis_0, split_sizes = var_8695_split_sizes_0, x = normed_445_cast_fp16)[name = string("op_8695_cast_fp16")]; tensor var_8699_to_fp16 = const()[name = string("op_8699_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271848832)))]; tensor out_191_cast_fp16 = mul(x = var_8695_cast_fp16_0, y = var_8699_to_fp16)[name = string("out_191_cast_fp16")]; tensor x_257_cast_fp16 = add(x = x_251_cast_fp16, y = out_191_cast_fp16)[name = string("x_257_cast_fp16")]; int32 var_8713 = const()[name = string("op_8713"), val = int32(-1)]; fp16 const_224_promoted_to_fp16 = const()[name = string("const_224_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8719_cast_fp16 = mul(x = x_257_cast_fp16, y = const_224_promoted_to_fp16)[name = string("op_8719_cast_fp16")]; bool input_321_interleave_0 = const()[name = string("input_321_interleave_0"), val = bool(false)]; tensor input_321_cast_fp16 = concat(axis = var_8713, interleave = input_321_interleave_0, values = (x_257_cast_fp16, var_8719_cast_fp16))[name = string("input_321_cast_fp16")]; tensor normed_449_axes_0 = const()[name = string("normed_449_axes_0"), val = tensor([-1])]; fp16 var_8711_to_fp16 = const()[name = string("op_8711_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_449_cast_fp16 = layer_norm(axes = normed_449_axes_0, epsilon = var_8711_to_fp16, x = input_321_cast_fp16)[name = string("normed_449_cast_fp16")]; tensor var_8724_split_sizes_0 = const()[name = string("op_8724_split_sizes_0"), val = tensor([640, 640])]; int32 var_8724_axis_0 = const()[name = string("op_8724_axis_0"), val = int32(-1)]; tensor var_8724_cast_fp16_0, tensor var_8724_cast_fp16_1 = split(axis = var_8724_axis_0, split_sizes = var_8724_split_sizes_0, x = normed_449_cast_fp16)[name = string("op_8724_cast_fp16")]; tensor var_8728_to_fp16 = const()[name = string("op_8728_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271850176)))]; tensor out_193_cast_fp16 = mul(x = var_8724_cast_fp16_0, y = var_8728_to_fp16)[name = string("out_193_cast_fp16")]; tensor var_8742 = const()[name = string("op_8742"), val = tensor([0, 2, 1])]; tensor input_323_axes_0 = const()[name = string("input_323_axes_0"), val = tensor([2])]; tensor var_8743 = transpose(perm = var_8742, x = out_193_cast_fp16)[name = string("transpose_17")]; tensor input_323 = expand_dims(axes = input_323_axes_0, x = var_8743)[name = string("input_323")]; string var_8756_pad_type_0 = const()[name = string("op_8756_pad_type_0"), val = string("valid")]; tensor var_8756_strides_0 = const()[name = string("op_8756_strides_0"), val = tensor([1, 1])]; tensor var_8756_pad_0 = const()[name = string("op_8756_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8756_dilations_0 = const()[name = string("op_8756_dilations_0"), val = tensor([1, 1])]; int32 var_8756_groups_0 = const()[name = string("op_8756_groups_0"), val = int32(1)]; tensor var_8756 = conv(dilations = var_8756_dilations_0, groups = var_8756_groups_0, pad = var_8756_pad_0, pad_type = var_8756_pad_type_0, strides = var_8756_strides_0, weight = layers_16_self_attn_q_proj_weight_quantized, x = input_323)[name = string("op_8756")]; tensor var_8761 = const()[name = string("op_8761"), val = tensor([1, 4, 256, 32])]; tensor var_8762 = reshape(shape = var_8761, x = var_8756)[name = string("op_8762")]; tensor var_8767 = const()[name = string("op_8767"), val = tensor([0, 1, 3, 2])]; int32 var_8780 = const()[name = string("op_8780"), val = int32(-1)]; fp16 const_226_promoted_to_fp16 = const()[name = string("const_226_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_97 = transpose(perm = var_8767, x = var_8762)[name = string("transpose_16")]; tensor var_8786_cast_fp16 = mul(x = q_97, y = const_226_promoted_to_fp16)[name = string("op_8786_cast_fp16")]; bool input_325_interleave_0 = const()[name = string("input_325_interleave_0"), val = bool(false)]; tensor input_325_cast_fp16 = concat(axis = var_8780, interleave = input_325_interleave_0, values = (q_97, var_8786_cast_fp16))[name = string("input_325_cast_fp16")]; tensor normed_455_axes_0 = const()[name = string("normed_455_axes_0"), val = tensor([-1])]; fp16 var_8778_to_fp16 = const()[name = string("op_8778_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_455_cast_fp16 = layer_norm(axes = normed_455_axes_0, epsilon = var_8778_to_fp16, x = input_325_cast_fp16)[name = string("normed_455_cast_fp16")]; tensor var_8791_split_sizes_0 = const()[name = string("op_8791_split_sizes_0"), val = tensor([256, 256])]; int32 var_8791_axis_0 = const()[name = string("op_8791_axis_0"), val = int32(-1)]; tensor var_8791_cast_fp16_0, tensor var_8791_cast_fp16_1 = split(axis = var_8791_axis_0, split_sizes = var_8791_split_sizes_0, x = normed_455_cast_fp16)[name = string("op_8791_cast_fp16")]; tensor var_8795_to_fp16 = const()[name = string("op_8795_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271851520)))]; tensor out_195_cast_fp16 = mul(x = var_8791_cast_fp16_0, y = var_8795_to_fp16)[name = string("out_195_cast_fp16")]; string var_8808_pad_type_0 = const()[name = string("op_8808_pad_type_0"), val = string("valid")]; tensor var_8808_strides_0 = const()[name = string("op_8808_strides_0"), val = tensor([1, 1])]; tensor var_8808_pad_0 = const()[name = string("op_8808_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8808_dilations_0 = const()[name = string("op_8808_dilations_0"), val = tensor([1, 1])]; int32 var_8808_groups_0 = const()[name = string("op_8808_groups_0"), val = int32(1)]; tensor var_8808 = conv(dilations = var_8808_dilations_0, groups = var_8808_groups_0, pad = var_8808_pad_0, pad_type = var_8808_pad_type_0, strides = var_8808_strides_0, weight = layers_16_self_attn_k_proj_weight_quantized, x = input_323)[name = string("op_8808")]; tensor var_8813 = const()[name = string("op_8813"), val = tensor([1, 1, 256, 32])]; tensor var_8814 = reshape(shape = var_8813, x = var_8808)[name = string("op_8814")]; tensor var_8819 = const()[name = string("op_8819"), val = tensor([0, 1, 3, 2])]; int32 var_8832 = const()[name = string("op_8832"), val = int32(-1)]; fp16 const_228_promoted_to_fp16 = const()[name = string("const_228_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_97 = transpose(perm = var_8819, x = var_8814)[name = string("transpose_15")]; tensor var_8838_cast_fp16 = mul(x = k_97, y = const_228_promoted_to_fp16)[name = string("op_8838_cast_fp16")]; bool input_327_interleave_0 = const()[name = string("input_327_interleave_0"), val = bool(false)]; tensor input_327_cast_fp16 = concat(axis = var_8832, interleave = input_327_interleave_0, values = (k_97, var_8838_cast_fp16))[name = string("input_327_cast_fp16")]; tensor normed_459_axes_0 = const()[name = string("normed_459_axes_0"), val = tensor([-1])]; fp16 var_8830_to_fp16 = const()[name = string("op_8830_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_459_cast_fp16 = layer_norm(axes = normed_459_axes_0, epsilon = var_8830_to_fp16, x = input_327_cast_fp16)[name = string("normed_459_cast_fp16")]; tensor var_8843_split_sizes_0 = const()[name = string("op_8843_split_sizes_0"), val = tensor([256, 256])]; int32 var_8843_axis_0 = const()[name = string("op_8843_axis_0"), val = int32(-1)]; tensor var_8843_cast_fp16_0, tensor var_8843_cast_fp16_1 = split(axis = var_8843_axis_0, split_sizes = var_8843_split_sizes_0, x = normed_459_cast_fp16)[name = string("op_8843_cast_fp16")]; tensor var_8847_to_fp16 = const()[name = string("op_8847_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271852096)))]; tensor out_197_cast_fp16 = mul(x = var_8843_cast_fp16_0, y = var_8847_to_fp16)[name = string("out_197_cast_fp16")]; string var_8860_pad_type_0 = const()[name = string("op_8860_pad_type_0"), val = string("valid")]; tensor var_8860_strides_0 = const()[name = string("op_8860_strides_0"), val = tensor([1, 1])]; tensor var_8860_pad_0 = const()[name = string("op_8860_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8860_dilations_0 = const()[name = string("op_8860_dilations_0"), val = tensor([1, 1])]; int32 var_8860_groups_0 = const()[name = string("op_8860_groups_0"), val = int32(1)]; tensor var_8860 = conv(dilations = var_8860_dilations_0, groups = var_8860_groups_0, pad = var_8860_pad_0, pad_type = var_8860_pad_type_0, strides = var_8860_strides_0, weight = layers_16_self_attn_v_proj_weight_quantized, x = input_323)[name = string("op_8860")]; tensor var_8865 = const()[name = string("op_8865"), val = tensor([1, 1, 256, 32])]; tensor var_8866 = reshape(shape = var_8865, x = var_8860)[name = string("op_8866")]; tensor var_8873 = mul(x = out_195_cast_fp16, y = cos_1)[name = string("op_8873")]; tensor var_8874_split_sizes_0 = const()[name = string("op_8874_split_sizes_0"), val = tensor([128, 128])]; int32 var_8874_axis_0 = const()[name = string("op_8874_axis_0"), val = int32(-1)]; tensor var_8874_0, tensor var_8874_1 = split(axis = var_8874_axis_0, split_sizes = var_8874_split_sizes_0, x = out_195_cast_fp16)[name = string("op_8874")]; fp16 const_230_promoted = const()[name = string("const_230_promoted"), val = fp16(-0x1p+0)]; tensor var_8876 = mul(x = var_8874_1, y = const_230_promoted)[name = string("op_8876")]; int32 var_8878 = const()[name = string("op_8878"), val = int32(-1)]; bool var_8879_interleave_0 = const()[name = string("op_8879_interleave_0"), val = bool(false)]; tensor var_8879 = concat(axis = var_8878, interleave = var_8879_interleave_0, values = (var_8876, var_8874_0))[name = string("op_8879")]; tensor var_8880 = mul(x = var_8879, y = sin_1)[name = string("op_8880")]; tensor q_101 = add(x = var_8873, y = var_8880)[name = string("q_101")]; tensor var_8883 = mul(x = out_197_cast_fp16, y = cos_1)[name = string("op_8883")]; tensor var_8884_split_sizes_0 = const()[name = string("op_8884_split_sizes_0"), val = tensor([128, 128])]; int32 var_8884_axis_0 = const()[name = string("op_8884_axis_0"), val = int32(-1)]; tensor var_8884_0, tensor var_8884_1 = split(axis = var_8884_axis_0, split_sizes = var_8884_split_sizes_0, x = out_197_cast_fp16)[name = string("op_8884")]; fp16 const_231_promoted = const()[name = string("const_231_promoted"), val = fp16(-0x1p+0)]; tensor var_8886 = mul(x = var_8884_1, y = const_231_promoted)[name = string("op_8886")]; int32 var_8888 = const()[name = string("op_8888"), val = int32(-1)]; bool var_8889_interleave_0 = const()[name = string("op_8889_interleave_0"), val = bool(false)]; tensor var_8889 = concat(axis = var_8888, interleave = var_8889_interleave_0, values = (var_8886, var_8884_0))[name = string("op_8889")]; tensor var_8890 = mul(x = var_8889, y = sin_1)[name = string("op_8890")]; tensor k_101 = add(x = var_8883, y = var_8890)[name = string("k_101")]; tensor var_8895_begin_0 = const()[name = string("op_8895_begin_0"), val = tensor([16, 0, 0, 0])]; tensor var_8895_end_0 = const()[name = string("op_8895_end_0"), val = tensor([17, 1, 2048, 256])]; tensor var_8895_end_mask_0 = const()[name = string("op_8895_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8895_squeeze_mask_0 = const()[name = string("op_8895_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_8895_cast_fp16 = slice_by_index(begin = var_8895_begin_0, end = var_8895_end_0, end_mask = var_8895_end_mask_0, squeeze_mask = var_8895_squeeze_mask_0, x = coreml_update_state_67)[name = string("op_8895_cast_fp16")]; tensor K_cache_33_axes_0 = const()[name = string("K_cache_33_axes_0"), val = tensor([0])]; tensor K_cache_33_cast_fp16 = expand_dims(axes = K_cache_33_axes_0, x = var_8895_cast_fp16)[name = string("K_cache_33_cast_fp16")]; tensor var_8900_begin_0 = const()[name = string("op_8900_begin_0"), val = tensor([34, 0, 0, 0])]; tensor var_8900_end_0 = const()[name = string("op_8900_end_0"), val = tensor([35, 1, 2048, 256])]; tensor var_8900_end_mask_0 = const()[name = string("op_8900_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8900_squeeze_mask_0 = const()[name = string("op_8900_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_8900_cast_fp16 = slice_by_index(begin = var_8900_begin_0, end = var_8900_end_0, end_mask = var_8900_end_mask_0, squeeze_mask = var_8900_squeeze_mask_0, x = coreml_update_state_67)[name = string("op_8900_cast_fp16")]; tensor V_cache_33_axes_0 = const()[name = string("V_cache_33_axes_0"), val = tensor([0])]; tensor V_cache_33_cast_fp16 = expand_dims(axes = V_cache_33_axes_0, x = var_8900_cast_fp16)[name = string("V_cache_33_cast_fp16")]; bool k_increment_33_transpose_x_0 = const()[name = string("k_increment_33_transpose_x_0"), val = bool(false)]; bool k_increment_33_transpose_y_0 = const()[name = string("k_increment_33_transpose_y_0"), val = bool(false)]; tensor k_increment_33 = matmul(transpose_x = k_increment_33_transpose_x_0, transpose_y = k_increment_33_transpose_y_0, x = update_mask, y = k_101)[name = string("k_increment_33")]; bool v_increment_33_transpose_x_1 = const()[name = string("v_increment_33_transpose_x_1"), val = bool(false)]; bool v_increment_33_transpose_y_1 = const()[name = string("v_increment_33_transpose_y_1"), val = bool(true)]; tensor v_increment_33 = matmul(transpose_x = v_increment_33_transpose_x_1, transpose_y = v_increment_33_transpose_y_1, x = update_mask, y = var_8866)[name = string("v_increment_33")]; tensor var_8918_cast_fp16 = mul(x = K_cache_33_cast_fp16, y = var_1125_cast_fp16)[name = string("op_8918_cast_fp16")]; tensor K_new_33_cast_fp16 = add(x = var_8918_cast_fp16, y = k_increment_33)[name = string("K_new_33_cast_fp16")]; tensor var_8924_cast_fp16 = mul(x = V_cache_33_cast_fp16, y = var_1125_cast_fp16)[name = string("op_8924_cast_fp16")]; tensor V_new_33_cast_fp16 = add(x = var_8924_cast_fp16, y = v_increment_33)[name = string("V_new_33_cast_fp16")]; tensor var_8928_axes_0 = const()[name = string("op_8928_axes_0"), val = tensor([0])]; tensor var_8928_cast_fp16 = squeeze(axes = var_8928_axes_0, x = K_new_33_cast_fp16)[name = string("op_8928_cast_fp16")]; tensor concat_64 = const()[name = string("concat_64"), val = tensor([16, 0, 0, 0])]; tensor concat_65 = const()[name = string("concat_65"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_33_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_33_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_33_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_33_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_33_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_33_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_33_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_33_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_33_cast_fp16 = slice_update(begin = concat_64, begin_mask = kv_cache_0_internal_tensor_assign_33_begin_mask_0, end = concat_65, end_mask = kv_cache_0_internal_tensor_assign_33_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_33_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_33_stride_0, update = var_8928_cast_fp16, x = coreml_update_state_67)[name = string("kv_cache_0_internal_tensor_assign_33_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_33_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_68_write_state")]; tensor coreml_update_state_68 = read_state(input = kv_cache_0)[name = string("coreml_update_state_68")]; tensor var_8935_axes_0 = const()[name = string("op_8935_axes_0"), val = tensor([0])]; tensor var_8935_cast_fp16 = squeeze(axes = var_8935_axes_0, x = V_new_33_cast_fp16)[name = string("op_8935_cast_fp16")]; tensor concat_66 = const()[name = string("concat_66"), val = tensor([34, 0, 0, 0])]; tensor concat_67 = const()[name = string("concat_67"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_34_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_34_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_34_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_34_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_34_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_34_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_34_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_34_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_34_cast_fp16 = slice_update(begin = concat_66, begin_mask = kv_cache_0_internal_tensor_assign_34_begin_mask_0, end = concat_67, end_mask = kv_cache_0_internal_tensor_assign_34_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_34_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_34_stride_0, update = var_8935_cast_fp16, x = coreml_update_state_68)[name = string("kv_cache_0_internal_tensor_assign_34_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_34_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_69_write_state")]; tensor coreml_update_state_69 = read_state(input = kv_cache_0)[name = string("coreml_update_state_69")]; tensor hidden_states_129_axes_0 = const()[name = string("hidden_states_129_axes_0"), val = tensor([2])]; tensor hidden_states_129_cast_fp16 = expand_dims(axes = hidden_states_129_axes_0, x = K_new_33_cast_fp16)[name = string("hidden_states_129_cast_fp16")]; tensor var_8948 = const()[name = string("op_8948"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_131_cast_fp16 = tile(reps = var_8948, x = hidden_states_129_cast_fp16)[name = string("hidden_states_131_cast_fp16")]; tensor var_8954 = const()[name = string("op_8954"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_33_cast_fp16 = reshape(shape = var_8954, x = hidden_states_131_cast_fp16)[name = string("K_expanded_33_cast_fp16")]; tensor hidden_states_133_axes_0 = const()[name = string("hidden_states_133_axes_0"), val = tensor([2])]; tensor hidden_states_133_cast_fp16 = expand_dims(axes = hidden_states_133_axes_0, x = V_new_33_cast_fp16)[name = string("hidden_states_133_cast_fp16")]; tensor var_8963 = const()[name = string("op_8963"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_135_cast_fp16 = tile(reps = var_8963, x = hidden_states_133_cast_fp16)[name = string("hidden_states_135_cast_fp16")]; tensor var_8969 = const()[name = string("op_8969"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_33_cast_fp16 = reshape(shape = var_8969, x = hidden_states_135_cast_fp16)[name = string("V_expanded_33_cast_fp16")]; bool var_8984_transpose_x_1 = const()[name = string("op_8984_transpose_x_1"), val = bool(false)]; bool var_8984_transpose_y_1 = const()[name = string("op_8984_transpose_y_1"), val = bool(true)]; tensor var_8984_cast_fp16 = matmul(transpose_x = var_8984_transpose_x_1, transpose_y = var_8984_transpose_y_1, x = q_101, y = K_expanded_33_cast_fp16)[name = string("op_8984_cast_fp16")]; fp16 var_8985_to_fp16 = const()[name = string("op_8985_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_97_cast_fp16 = mul(x = var_8984_cast_fp16, y = var_8985_to_fp16)[name = string("attn_weights_97_cast_fp16")]; tensor attn_weights_99_cast_fp16 = add(x = attn_weights_97_cast_fp16, y = causal_mask)[name = string("attn_weights_99_cast_fp16")]; int32 var_8994 = const()[name = string("op_8994"), val = int32(-1)]; tensor var_8996_cast_fp16 = softmax(axis = var_8994, x = attn_weights_99_cast_fp16)[name = string("op_8996_cast_fp16")]; bool var_9012_transpose_x_0 = const()[name = string("op_9012_transpose_x_0"), val = bool(false)]; bool var_9012_transpose_y_0 = const()[name = string("op_9012_transpose_y_0"), val = bool(false)]; tensor var_9012_cast_fp16 = matmul(transpose_x = var_9012_transpose_x_0, transpose_y = var_9012_transpose_y_0, x = var_8996_cast_fp16, y = V_expanded_33_cast_fp16)[name = string("op_9012_cast_fp16")]; tensor var_9022 = const()[name = string("op_9022"), val = tensor([0, 2, 1, 3])]; tensor var_9029 = const()[name = string("op_9029"), val = tensor([1, 32, 1024])]; tensor var_9023 = transpose(perm = var_9022, x = var_9012_cast_fp16)[name = string("transpose_14")]; tensor attn_output_99 = reshape(shape = var_9029, x = var_9023)[name = string("attn_output_99")]; tensor var_9034 = const()[name = string("op_9034"), val = tensor([0, 2, 1])]; tensor squeeze_16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271852672))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272508096))))[name = string("squeeze_16_quantized")]; string var_9050_pad_type_0 = const()[name = string("op_9050_pad_type_0"), val = string("valid")]; int32 var_9050_groups_0 = const()[name = string("op_9050_groups_0"), val = int32(1)]; tensor var_9050_strides_0 = const()[name = string("op_9050_strides_0"), val = tensor([1])]; tensor var_9050_pad_0 = const()[name = string("op_9050_pad_0"), val = tensor([0, 0])]; tensor var_9050_dilations_0 = const()[name = string("op_9050_dilations_0"), val = tensor([1])]; tensor var_9035 = transpose(perm = var_9034, x = attn_output_99)[name = string("transpose_13")]; tensor var_9050 = conv(dilations = var_9050_dilations_0, groups = var_9050_groups_0, pad = var_9050_pad_0, pad_type = var_9050_pad_type_0, strides = var_9050_strides_0, weight = squeeze_16_quantized, x = var_9035)[name = string("op_9050")]; tensor var_9054 = const()[name = string("op_9054"), val = tensor([0, 2, 1])]; int32 var_9061 = const()[name = string("op_9061"), val = int32(-1)]; fp16 const_232_promoted_to_fp16 = const()[name = string("const_232_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_265 = transpose(perm = var_9054, x = var_9050)[name = string("transpose_12")]; tensor var_9067_cast_fp16 = mul(x = x_265, y = const_232_promoted_to_fp16)[name = string("op_9067_cast_fp16")]; bool input_331_interleave_0 = const()[name = string("input_331_interleave_0"), val = bool(false)]; tensor input_331_cast_fp16 = concat(axis = var_9061, interleave = input_331_interleave_0, values = (x_265, var_9067_cast_fp16))[name = string("input_331_cast_fp16")]; tensor normed_463_axes_0 = const()[name = string("normed_463_axes_0"), val = tensor([-1])]; fp16 var_9059_to_fp16 = const()[name = string("op_9059_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_463_cast_fp16 = layer_norm(axes = normed_463_axes_0, epsilon = var_9059_to_fp16, x = input_331_cast_fp16)[name = string("normed_463_cast_fp16")]; tensor var_9072_split_sizes_0 = const()[name = string("op_9072_split_sizes_0"), val = tensor([640, 640])]; int32 var_9072_axis_0 = const()[name = string("op_9072_axis_0"), val = int32(-1)]; tensor var_9072_cast_fp16_0, tensor var_9072_cast_fp16_1 = split(axis = var_9072_axis_0, split_sizes = var_9072_split_sizes_0, x = normed_463_cast_fp16)[name = string("op_9072_cast_fp16")]; tensor var_9076_to_fp16 = const()[name = string("op_9076_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272509440)))]; tensor out_199_cast_fp16 = mul(x = var_9072_cast_fp16_0, y = var_9076_to_fp16)[name = string("out_199_cast_fp16")]; tensor x_267_cast_fp16 = add(x = x_257_cast_fp16, y = out_199_cast_fp16)[name = string("x_267_cast_fp16")]; int32 var_9090 = const()[name = string("op_9090"), val = int32(-1)]; fp16 const_234_promoted_to_fp16 = const()[name = string("const_234_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9096_cast_fp16 = mul(x = x_267_cast_fp16, y = const_234_promoted_to_fp16)[name = string("op_9096_cast_fp16")]; bool input_333_interleave_0 = const()[name = string("input_333_interleave_0"), val = bool(false)]; tensor input_333_cast_fp16 = concat(axis = var_9090, interleave = input_333_interleave_0, values = (x_267_cast_fp16, var_9096_cast_fp16))[name = string("input_333_cast_fp16")]; tensor normed_467_axes_0 = const()[name = string("normed_467_axes_0"), val = tensor([-1])]; fp16 var_9088_to_fp16 = const()[name = string("op_9088_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_467_cast_fp16 = layer_norm(axes = normed_467_axes_0, epsilon = var_9088_to_fp16, x = input_333_cast_fp16)[name = string("normed_467_cast_fp16")]; tensor var_9101_split_sizes_0 = const()[name = string("op_9101_split_sizes_0"), val = tensor([640, 640])]; int32 var_9101_axis_0 = const()[name = string("op_9101_axis_0"), val = int32(-1)]; tensor var_9101_cast_fp16_0, tensor var_9101_cast_fp16_1 = split(axis = var_9101_axis_0, split_sizes = var_9101_split_sizes_0, x = normed_467_cast_fp16)[name = string("op_9101_cast_fp16")]; tensor var_9105_to_fp16 = const()[name = string("op_9105_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272510784)))]; tensor out_201_cast_fp16 = mul(x = var_9101_cast_fp16_0, y = var_9105_to_fp16)[name = string("out_201_cast_fp16")]; tensor var_9119 = const()[name = string("op_9119"), val = tensor([0, 2, 1])]; tensor input_335_axes_0 = const()[name = string("input_335_axes_0"), val = tensor([2])]; tensor var_9120 = transpose(perm = var_9119, x = out_201_cast_fp16)[name = string("transpose_11")]; tensor input_335 = expand_dims(axes = input_335_axes_0, x = var_9120)[name = string("input_335")]; string gate_65_pad_type_0 = const()[name = string("gate_65_pad_type_0"), val = string("valid")]; tensor gate_65_strides_0 = const()[name = string("gate_65_strides_0"), val = tensor([1, 1])]; tensor gate_65_pad_0 = const()[name = string("gate_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_65_dilations_0 = const()[name = string("gate_65_dilations_0"), val = tensor([1, 1])]; int32 gate_65_groups_0 = const()[name = string("gate_65_groups_0"), val = int32(1)]; tensor gate_65 = conv(dilations = gate_65_dilations_0, groups = gate_65_groups_0, pad = gate_65_pad_0, pad_type = gate_65_pad_type_0, strides = gate_65_strides_0, weight = layers_16_mlp_gate_proj_weight_quantized, x = input_335)[name = string("gate_65")]; string up_33_pad_type_0 = const()[name = string("up_33_pad_type_0"), val = string("valid")]; tensor up_33_strides_0 = const()[name = string("up_33_strides_0"), val = tensor([1, 1])]; tensor up_33_pad_0 = const()[name = string("up_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_33_dilations_0 = const()[name = string("up_33_dilations_0"), val = tensor([1, 1])]; int32 up_33_groups_0 = const()[name = string("up_33_groups_0"), val = int32(1)]; tensor up_33 = conv(dilations = up_33_dilations_0, groups = up_33_groups_0, pad = up_33_pad_0, pad_type = up_33_pad_type_0, strides = up_33_strides_0, weight = layers_16_mlp_up_proj_weight_quantized, x = input_335)[name = string("up_33")]; string gate_67_mode_0 = const()[name = string("gate_67_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_67 = gelu(mode = gate_67_mode_0, x = gate_65)[name = string("gate_67")]; tensor input_337 = mul(x = gate_67, y = up_33)[name = string("input_337")]; string var_9158_pad_type_0 = const()[name = string("op_9158_pad_type_0"), val = string("valid")]; tensor var_9158_strides_0 = const()[name = string("op_9158_strides_0"), val = tensor([1, 1])]; tensor var_9158_pad_0 = const()[name = string("op_9158_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9158_dilations_0 = const()[name = string("op_9158_dilations_0"), val = tensor([1, 1])]; int32 var_9158_groups_0 = const()[name = string("op_9158_groups_0"), val = int32(1)]; tensor var_9158 = conv(dilations = var_9158_dilations_0, groups = var_9158_groups_0, pad = var_9158_pad_0, pad_type = var_9158_pad_type_0, strides = var_9158_strides_0, weight = layers_16_mlp_down_proj_weight_quantized, x = input_337)[name = string("op_9158")]; tensor var_9160_axes_0 = const()[name = string("op_9160_axes_0"), val = tensor([2])]; tensor var_9160 = squeeze(axes = var_9160_axes_0, x = var_9158)[name = string("op_9160")]; tensor var_9164 = const()[name = string("op_9164"), val = tensor([0, 2, 1])]; int32 var_9171 = const()[name = string("op_9171"), val = int32(-1)]; fp16 const_236_promoted_to_fp16 = const()[name = string("const_236_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_271 = transpose(perm = var_9164, x = var_9160)[name = string("transpose_10")]; tensor var_9177_cast_fp16 = mul(x = x_271, y = const_236_promoted_to_fp16)[name = string("op_9177_cast_fp16")]; bool input_339_interleave_0 = const()[name = string("input_339_interleave_0"), val = bool(false)]; tensor input_339_cast_fp16 = concat(axis = var_9171, interleave = input_339_interleave_0, values = (x_271, var_9177_cast_fp16))[name = string("input_339_cast_fp16")]; tensor normed_473_axes_0 = const()[name = string("normed_473_axes_0"), val = tensor([-1])]; fp16 var_9169_to_fp16 = const()[name = string("op_9169_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_473_cast_fp16 = layer_norm(axes = normed_473_axes_0, epsilon = var_9169_to_fp16, x = input_339_cast_fp16)[name = string("normed_473_cast_fp16")]; tensor var_9182_split_sizes_0 = const()[name = string("op_9182_split_sizes_0"), val = tensor([640, 640])]; int32 var_9182_axis_0 = const()[name = string("op_9182_axis_0"), val = int32(-1)]; tensor var_9182_cast_fp16_0, tensor var_9182_cast_fp16_1 = split(axis = var_9182_axis_0, split_sizes = var_9182_split_sizes_0, x = normed_473_cast_fp16)[name = string("op_9182_cast_fp16")]; tensor var_9186_to_fp16 = const()[name = string("op_9186_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272512128)))]; tensor out_203_cast_fp16 = mul(x = var_9182_cast_fp16_0, y = var_9186_to_fp16)[name = string("out_203_cast_fp16")]; tensor x_273_cast_fp16 = add(x = x_267_cast_fp16, y = out_203_cast_fp16)[name = string("x_273_cast_fp16")]; int32 var_9200 = const()[name = string("op_9200"), val = int32(-1)]; fp16 const_238_promoted_to_fp16 = const()[name = string("const_238_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9206_cast_fp16 = mul(x = x_273_cast_fp16, y = const_238_promoted_to_fp16)[name = string("op_9206_cast_fp16")]; bool input_341_interleave_0 = const()[name = string("input_341_interleave_0"), val = bool(false)]; tensor input_341_cast_fp16 = concat(axis = var_9200, interleave = input_341_interleave_0, values = (x_273_cast_fp16, var_9206_cast_fp16))[name = string("input_341_cast_fp16")]; tensor normed_477_axes_0 = const()[name = string("normed_477_axes_0"), val = tensor([-1])]; fp16 var_9198_to_fp16 = const()[name = string("op_9198_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_477_cast_fp16 = layer_norm(axes = normed_477_axes_0, epsilon = var_9198_to_fp16, x = input_341_cast_fp16)[name = string("normed_477_cast_fp16")]; tensor var_9211_split_sizes_0 = const()[name = string("op_9211_split_sizes_0"), val = tensor([640, 640])]; int32 var_9211_axis_0 = const()[name = string("op_9211_axis_0"), val = int32(-1)]; tensor var_9211_cast_fp16_0, tensor var_9211_cast_fp16_1 = split(axis = var_9211_axis_0, split_sizes = var_9211_split_sizes_0, x = normed_477_cast_fp16)[name = string("op_9211_cast_fp16")]; tensor var_9215_to_fp16 = const()[name = string("op_9215_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272513472)))]; tensor out_205_cast_fp16 = mul(x = var_9211_cast_fp16_0, y = var_9215_to_fp16)[name = string("out_205_cast_fp16")]; tensor var_9229 = const()[name = string("op_9229"), val = tensor([0, 2, 1])]; tensor input_343_axes_0 = const()[name = string("input_343_axes_0"), val = tensor([2])]; tensor var_9230 = transpose(perm = var_9229, x = out_205_cast_fp16)[name = string("transpose_9")]; tensor input_343 = expand_dims(axes = input_343_axes_0, x = var_9230)[name = string("input_343")]; string var_9243_pad_type_0 = const()[name = string("op_9243_pad_type_0"), val = string("valid")]; tensor var_9243_strides_0 = const()[name = string("op_9243_strides_0"), val = tensor([1, 1])]; tensor var_9243_pad_0 = const()[name = string("op_9243_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9243_dilations_0 = const()[name = string("op_9243_dilations_0"), val = tensor([1, 1])]; int32 var_9243_groups_0 = const()[name = string("op_9243_groups_0"), val = int32(1)]; tensor var_9243 = conv(dilations = var_9243_dilations_0, groups = var_9243_groups_0, pad = var_9243_pad_0, pad_type = var_9243_pad_type_0, strides = var_9243_strides_0, weight = layers_17_self_attn_q_proj_weight_quantized, x = input_343)[name = string("op_9243")]; tensor var_9248 = const()[name = string("op_9248"), val = tensor([1, 4, 256, 32])]; tensor var_9249 = reshape(shape = var_9248, x = var_9243)[name = string("op_9249")]; tensor var_9254 = const()[name = string("op_9254"), val = tensor([0, 1, 3, 2])]; int32 var_9267 = const()[name = string("op_9267"), val = int32(-1)]; fp16 const_240_promoted_to_fp16 = const()[name = string("const_240_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor q_103 = transpose(perm = var_9254, x = var_9249)[name = string("transpose_8")]; tensor var_9273_cast_fp16 = mul(x = q_103, y = const_240_promoted_to_fp16)[name = string("op_9273_cast_fp16")]; bool input_345_interleave_0 = const()[name = string("input_345_interleave_0"), val = bool(false)]; tensor input_345_cast_fp16 = concat(axis = var_9267, interleave = input_345_interleave_0, values = (q_103, var_9273_cast_fp16))[name = string("input_345_cast_fp16")]; tensor normed_483_axes_0 = const()[name = string("normed_483_axes_0"), val = tensor([-1])]; fp16 var_9265_to_fp16 = const()[name = string("op_9265_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_483_cast_fp16 = layer_norm(axes = normed_483_axes_0, epsilon = var_9265_to_fp16, x = input_345_cast_fp16)[name = string("normed_483_cast_fp16")]; tensor var_9278_split_sizes_0 = const()[name = string("op_9278_split_sizes_0"), val = tensor([256, 256])]; int32 var_9278_axis_0 = const()[name = string("op_9278_axis_0"), val = int32(-1)]; tensor var_9278_cast_fp16_0, tensor var_9278_cast_fp16_1 = split(axis = var_9278_axis_0, split_sizes = var_9278_split_sizes_0, x = normed_483_cast_fp16)[name = string("op_9278_cast_fp16")]; tensor var_9282_to_fp16 = const()[name = string("op_9282_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272514816)))]; tensor out_207_cast_fp16 = mul(x = var_9278_cast_fp16_0, y = var_9282_to_fp16)[name = string("out_207_cast_fp16")]; string var_9295_pad_type_0 = const()[name = string("op_9295_pad_type_0"), val = string("valid")]; tensor var_9295_strides_0 = const()[name = string("op_9295_strides_0"), val = tensor([1, 1])]; tensor var_9295_pad_0 = const()[name = string("op_9295_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9295_dilations_0 = const()[name = string("op_9295_dilations_0"), val = tensor([1, 1])]; int32 var_9295_groups_0 = const()[name = string("op_9295_groups_0"), val = int32(1)]; tensor var_9295 = conv(dilations = var_9295_dilations_0, groups = var_9295_groups_0, pad = var_9295_pad_0, pad_type = var_9295_pad_type_0, strides = var_9295_strides_0, weight = layers_17_self_attn_k_proj_weight_quantized, x = input_343)[name = string("op_9295")]; tensor var_9300 = const()[name = string("op_9300"), val = tensor([1, 1, 256, 32])]; tensor var_9301 = reshape(shape = var_9300, x = var_9295)[name = string("op_9301")]; tensor var_9306 = const()[name = string("op_9306"), val = tensor([0, 1, 3, 2])]; int32 var_9319 = const()[name = string("op_9319"), val = int32(-1)]; fp16 const_242_promoted_to_fp16 = const()[name = string("const_242_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor k_103 = transpose(perm = var_9306, x = var_9301)[name = string("transpose_7")]; tensor var_9325_cast_fp16 = mul(x = k_103, y = const_242_promoted_to_fp16)[name = string("op_9325_cast_fp16")]; bool input_347_interleave_0 = const()[name = string("input_347_interleave_0"), val = bool(false)]; tensor input_347_cast_fp16 = concat(axis = var_9319, interleave = input_347_interleave_0, values = (k_103, var_9325_cast_fp16))[name = string("input_347_cast_fp16")]; tensor normed_487_axes_0 = const()[name = string("normed_487_axes_0"), val = tensor([-1])]; fp16 var_9317_to_fp16 = const()[name = string("op_9317_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_487_cast_fp16 = layer_norm(axes = normed_487_axes_0, epsilon = var_9317_to_fp16, x = input_347_cast_fp16)[name = string("normed_487_cast_fp16")]; tensor var_9330_split_sizes_0 = const()[name = string("op_9330_split_sizes_0"), val = tensor([256, 256])]; int32 var_9330_axis_0 = const()[name = string("op_9330_axis_0"), val = int32(-1)]; tensor var_9330_cast_fp16_0, tensor var_9330_cast_fp16_1 = split(axis = var_9330_axis_0, split_sizes = var_9330_split_sizes_0, x = normed_487_cast_fp16)[name = string("op_9330_cast_fp16")]; tensor var_9334_to_fp16 = const()[name = string("op_9334_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272515392)))]; tensor out_209_cast_fp16 = mul(x = var_9330_cast_fp16_0, y = var_9334_to_fp16)[name = string("out_209_cast_fp16")]; string var_9347_pad_type_0 = const()[name = string("op_9347_pad_type_0"), val = string("valid")]; tensor var_9347_strides_0 = const()[name = string("op_9347_strides_0"), val = tensor([1, 1])]; tensor var_9347_pad_0 = const()[name = string("op_9347_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9347_dilations_0 = const()[name = string("op_9347_dilations_0"), val = tensor([1, 1])]; int32 var_9347_groups_0 = const()[name = string("op_9347_groups_0"), val = int32(1)]; tensor var_9347 = conv(dilations = var_9347_dilations_0, groups = var_9347_groups_0, pad = var_9347_pad_0, pad_type = var_9347_pad_type_0, strides = var_9347_strides_0, weight = layers_17_self_attn_v_proj_weight_quantized, x = input_343)[name = string("op_9347")]; tensor var_9352 = const()[name = string("op_9352"), val = tensor([1, 1, 256, 32])]; tensor var_9353 = reshape(shape = var_9352, x = var_9347)[name = string("op_9353")]; tensor var_9360 = mul(x = out_207_cast_fp16, y = cos)[name = string("op_9360")]; tensor var_9361_split_sizes_0 = const()[name = string("op_9361_split_sizes_0"), val = tensor([128, 128])]; int32 var_9361_axis_0 = const()[name = string("op_9361_axis_0"), val = int32(-1)]; tensor var_9361_0, tensor var_9361_1 = split(axis = var_9361_axis_0, split_sizes = var_9361_split_sizes_0, x = out_207_cast_fp16)[name = string("op_9361")]; fp16 const_244_promoted = const()[name = string("const_244_promoted"), val = fp16(-0x1p+0)]; tensor var_9363 = mul(x = var_9361_1, y = const_244_promoted)[name = string("op_9363")]; int32 var_9365 = const()[name = string("op_9365"), val = int32(-1)]; bool var_9366_interleave_0 = const()[name = string("op_9366_interleave_0"), val = bool(false)]; tensor var_9366 = concat(axis = var_9365, interleave = var_9366_interleave_0, values = (var_9363, var_9361_0))[name = string("op_9366")]; tensor var_9367 = mul(x = var_9366, y = sin)[name = string("op_9367")]; tensor q = add(x = var_9360, y = var_9367)[name = string("q")]; tensor var_9370 = mul(x = out_209_cast_fp16, y = cos)[name = string("op_9370")]; tensor var_9371_split_sizes_0 = const()[name = string("op_9371_split_sizes_0"), val = tensor([128, 128])]; int32 var_9371_axis_0 = const()[name = string("op_9371_axis_0"), val = int32(-1)]; tensor var_9371_0, tensor var_9371_1 = split(axis = var_9371_axis_0, split_sizes = var_9371_split_sizes_0, x = out_209_cast_fp16)[name = string("op_9371")]; fp16 const_245_promoted = const()[name = string("const_245_promoted"), val = fp16(-0x1p+0)]; tensor var_9373 = mul(x = var_9371_1, y = const_245_promoted)[name = string("op_9373")]; int32 var_9375 = const()[name = string("op_9375"), val = int32(-1)]; bool var_9376_interleave_0 = const()[name = string("op_9376_interleave_0"), val = bool(false)]; tensor var_9376 = concat(axis = var_9375, interleave = var_9376_interleave_0, values = (var_9373, var_9371_0))[name = string("op_9376")]; tensor var_9377 = mul(x = var_9376, y = sin)[name = string("op_9377")]; tensor k = add(x = var_9370, y = var_9377)[name = string("k")]; tensor var_9382_begin_0 = const()[name = string("op_9382_begin_0"), val = tensor([17, 0, 0, 0])]; tensor var_9382_end_0 = const()[name = string("op_9382_end_0"), val = tensor([18, 1, 2048, 256])]; tensor var_9382_end_mask_0 = const()[name = string("op_9382_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_9382_squeeze_mask_0 = const()[name = string("op_9382_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_9382_cast_fp16 = slice_by_index(begin = var_9382_begin_0, end = var_9382_end_0, end_mask = var_9382_end_mask_0, squeeze_mask = var_9382_squeeze_mask_0, x = coreml_update_state_69)[name = string("op_9382_cast_fp16")]; tensor K_cache_axes_0 = const()[name = string("K_cache_axes_0"), val = tensor([0])]; tensor K_cache_cast_fp16 = expand_dims(axes = K_cache_axes_0, x = var_9382_cast_fp16)[name = string("K_cache_cast_fp16")]; tensor var_9387_begin_0 = const()[name = string("op_9387_begin_0"), val = tensor([35, 0, 0, 0])]; tensor var_9387_end_0 = const()[name = string("op_9387_end_0"), val = tensor([36, 1, 2048, 256])]; tensor var_9387_end_mask_0 = const()[name = string("op_9387_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_9387_squeeze_mask_0 = const()[name = string("op_9387_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_9387_cast_fp16 = slice_by_index(begin = var_9387_begin_0, end = var_9387_end_0, end_mask = var_9387_end_mask_0, squeeze_mask = var_9387_squeeze_mask_0, x = coreml_update_state_69)[name = string("op_9387_cast_fp16")]; tensor V_cache_axes_0 = const()[name = string("V_cache_axes_0"), val = tensor([0])]; tensor V_cache_cast_fp16 = expand_dims(axes = V_cache_axes_0, x = var_9387_cast_fp16)[name = string("V_cache_cast_fp16")]; bool k_increment_transpose_x_0 = const()[name = string("k_increment_transpose_x_0"), val = bool(false)]; bool k_increment_transpose_y_0 = const()[name = string("k_increment_transpose_y_0"), val = bool(false)]; tensor k_increment = matmul(transpose_x = k_increment_transpose_x_0, transpose_y = k_increment_transpose_y_0, x = update_mask, y = k)[name = string("k_increment")]; bool v_increment_transpose_x_1 = const()[name = string("v_increment_transpose_x_1"), val = bool(false)]; bool v_increment_transpose_y_1 = const()[name = string("v_increment_transpose_y_1"), val = bool(true)]; tensor v_increment = matmul(transpose_x = v_increment_transpose_x_1, transpose_y = v_increment_transpose_y_1, x = update_mask, y = var_9353)[name = string("v_increment")]; tensor var_9405_cast_fp16 = mul(x = K_cache_cast_fp16, y = var_1125_cast_fp16)[name = string("op_9405_cast_fp16")]; tensor K_new_cast_fp16 = add(x = var_9405_cast_fp16, y = k_increment)[name = string("K_new_cast_fp16")]; tensor var_9411_cast_fp16 = mul(x = V_cache_cast_fp16, y = var_1125_cast_fp16)[name = string("op_9411_cast_fp16")]; tensor V_new_cast_fp16 = add(x = var_9411_cast_fp16, y = v_increment)[name = string("V_new_cast_fp16")]; tensor var_9415_axes_0 = const()[name = string("op_9415_axes_0"), val = tensor([0])]; tensor var_9415_cast_fp16 = squeeze(axes = var_9415_axes_0, x = K_new_cast_fp16)[name = string("op_9415_cast_fp16")]; tensor concat_68 = const()[name = string("concat_68"), val = tensor([17, 0, 0, 0])]; tensor concat_69 = const()[name = string("concat_69"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_35_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_35_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_35_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_35_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_35_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_35_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_35_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_35_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_35_cast_fp16 = slice_update(begin = concat_68, begin_mask = kv_cache_0_internal_tensor_assign_35_begin_mask_0, end = concat_69, end_mask = kv_cache_0_internal_tensor_assign_35_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_35_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_35_stride_0, update = var_9415_cast_fp16, x = coreml_update_state_69)[name = string("kv_cache_0_internal_tensor_assign_35_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_35_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_70_write_state")]; tensor coreml_update_state_70 = read_state(input = kv_cache_0)[name = string("coreml_update_state_70")]; tensor var_9422_axes_0 = const()[name = string("op_9422_axes_0"), val = tensor([0])]; tensor var_9422_cast_fp16 = squeeze(axes = var_9422_axes_0, x = V_new_cast_fp16)[name = string("op_9422_cast_fp16")]; tensor concat_70 = const()[name = string("concat_70"), val = tensor([35, 0, 0, 0])]; tensor concat_71 = const()[name = string("concat_71"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_36_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_36_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_36_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_36_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_36_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_36_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_36_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_36_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_36_cast_fp16 = slice_update(begin = concat_70, begin_mask = kv_cache_0_internal_tensor_assign_36_begin_mask_0, end = concat_71, end_mask = kv_cache_0_internal_tensor_assign_36_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_36_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_36_stride_0, update = var_9422_cast_fp16, x = coreml_update_state_70)[name = string("kv_cache_0_internal_tensor_assign_36_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_36_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_71_write_state")]; tensor hidden_states_137_axes_0 = const()[name = string("hidden_states_137_axes_0"), val = tensor([2])]; tensor hidden_states_137_cast_fp16 = expand_dims(axes = hidden_states_137_axes_0, x = K_new_cast_fp16)[name = string("hidden_states_137_cast_fp16")]; tensor var_9435 = const()[name = string("op_9435"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_139_cast_fp16 = tile(reps = var_9435, x = hidden_states_137_cast_fp16)[name = string("hidden_states_139_cast_fp16")]; tensor var_9441 = const()[name = string("op_9441"), val = tensor([1, 4, 2048, 256])]; tensor K_expanded_cast_fp16 = reshape(shape = var_9441, x = hidden_states_139_cast_fp16)[name = string("K_expanded_cast_fp16")]; tensor hidden_states_141_axes_0 = const()[name = string("hidden_states_141_axes_0"), val = tensor([2])]; tensor hidden_states_141_cast_fp16 = expand_dims(axes = hidden_states_141_axes_0, x = V_new_cast_fp16)[name = string("hidden_states_141_cast_fp16")]; tensor var_9450 = const()[name = string("op_9450"), val = tensor([1, 1, 4, 1, 1])]; tensor hidden_states_cast_fp16 = tile(reps = var_9450, x = hidden_states_141_cast_fp16)[name = string("hidden_states_cast_fp16")]; tensor var_9456 = const()[name = string("op_9456"), val = tensor([1, 4, 2048, 256])]; tensor V_expanded_cast_fp16 = reshape(shape = var_9456, x = hidden_states_cast_fp16)[name = string("V_expanded_cast_fp16")]; bool var_9471_transpose_x_1 = const()[name = string("op_9471_transpose_x_1"), val = bool(false)]; bool var_9471_transpose_y_1 = const()[name = string("op_9471_transpose_y_1"), val = bool(true)]; tensor var_9471_cast_fp16 = matmul(transpose_x = var_9471_transpose_x_1, transpose_y = var_9471_transpose_y_1, x = q, y = K_expanded_cast_fp16)[name = string("op_9471_cast_fp16")]; fp16 var_9472_to_fp16 = const()[name = string("op_9472_to_fp16"), val = fp16(0x1p-4)]; tensor attn_weights_103_cast_fp16 = mul(x = var_9471_cast_fp16, y = var_9472_to_fp16)[name = string("attn_weights_103_cast_fp16")]; tensor attn_weights_105_cast_fp16 = add(x = attn_weights_103_cast_fp16, y = causal_mask)[name = string("attn_weights_105_cast_fp16")]; int32 var_9481 = const()[name = string("op_9481"), val = int32(-1)]; tensor var_9483_cast_fp16 = softmax(axis = var_9481, x = attn_weights_105_cast_fp16)[name = string("op_9483_cast_fp16")]; bool var_9499_transpose_x_0 = const()[name = string("op_9499_transpose_x_0"), val = bool(false)]; bool var_9499_transpose_y_0 = const()[name = string("op_9499_transpose_y_0"), val = bool(false)]; tensor var_9499_cast_fp16 = matmul(transpose_x = var_9499_transpose_x_0, transpose_y = var_9499_transpose_y_0, x = var_9483_cast_fp16, y = V_expanded_cast_fp16)[name = string("op_9499_cast_fp16")]; tensor var_9509 = const()[name = string("op_9509"), val = tensor([0, 2, 1, 3])]; tensor var_9516 = const()[name = string("op_9516"), val = tensor([1, 32, 1024])]; tensor var_9510 = transpose(perm = var_9509, x = var_9499_cast_fp16)[name = string("transpose_6")]; tensor attn_output_105 = reshape(shape = var_9516, x = var_9510)[name = string("attn_output_105")]; tensor var_9521 = const()[name = string("op_9521"), val = tensor([0, 2, 1])]; tensor squeeze_17_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272515968))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273171392))))[name = string("squeeze_17_quantized")]; string var_9537_pad_type_0 = const()[name = string("op_9537_pad_type_0"), val = string("valid")]; int32 var_9537_groups_0 = const()[name = string("op_9537_groups_0"), val = int32(1)]; tensor var_9537_strides_0 = const()[name = string("op_9537_strides_0"), val = tensor([1])]; tensor var_9537_pad_0 = const()[name = string("op_9537_pad_0"), val = tensor([0, 0])]; tensor var_9537_dilations_0 = const()[name = string("op_9537_dilations_0"), val = tensor([1])]; tensor var_9522 = transpose(perm = var_9521, x = attn_output_105)[name = string("transpose_5")]; tensor var_9537 = conv(dilations = var_9537_dilations_0, groups = var_9537_groups_0, pad = var_9537_pad_0, pad_type = var_9537_pad_type_0, strides = var_9537_strides_0, weight = squeeze_17_quantized, x = var_9522)[name = string("op_9537")]; tensor var_9541 = const()[name = string("op_9541"), val = tensor([0, 2, 1])]; int32 var_9548 = const()[name = string("op_9548"), val = int32(-1)]; fp16 const_246_promoted_to_fp16 = const()[name = string("const_246_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_281 = transpose(perm = var_9541, x = var_9537)[name = string("transpose_4")]; tensor var_9554_cast_fp16 = mul(x = x_281, y = const_246_promoted_to_fp16)[name = string("op_9554_cast_fp16")]; bool input_351_interleave_0 = const()[name = string("input_351_interleave_0"), val = bool(false)]; tensor input_351_cast_fp16 = concat(axis = var_9548, interleave = input_351_interleave_0, values = (x_281, var_9554_cast_fp16))[name = string("input_351_cast_fp16")]; tensor normed_491_axes_0 = const()[name = string("normed_491_axes_0"), val = tensor([-1])]; fp16 var_9546_to_fp16 = const()[name = string("op_9546_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_491_cast_fp16 = layer_norm(axes = normed_491_axes_0, epsilon = var_9546_to_fp16, x = input_351_cast_fp16)[name = string("normed_491_cast_fp16")]; tensor var_9559_split_sizes_0 = const()[name = string("op_9559_split_sizes_0"), val = tensor([640, 640])]; int32 var_9559_axis_0 = const()[name = string("op_9559_axis_0"), val = int32(-1)]; tensor var_9559_cast_fp16_0, tensor var_9559_cast_fp16_1 = split(axis = var_9559_axis_0, split_sizes = var_9559_split_sizes_0, x = normed_491_cast_fp16)[name = string("op_9559_cast_fp16")]; tensor var_9563_to_fp16 = const()[name = string("op_9563_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273172736)))]; tensor out_211_cast_fp16 = mul(x = var_9559_cast_fp16_0, y = var_9563_to_fp16)[name = string("out_211_cast_fp16")]; tensor x_283_cast_fp16 = add(x = x_273_cast_fp16, y = out_211_cast_fp16)[name = string("x_283_cast_fp16")]; int32 var_9577 = const()[name = string("op_9577"), val = int32(-1)]; fp16 const_248_promoted_to_fp16 = const()[name = string("const_248_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9583_cast_fp16 = mul(x = x_283_cast_fp16, y = const_248_promoted_to_fp16)[name = string("op_9583_cast_fp16")]; bool input_353_interleave_0 = const()[name = string("input_353_interleave_0"), val = bool(false)]; tensor input_353_cast_fp16 = concat(axis = var_9577, interleave = input_353_interleave_0, values = (x_283_cast_fp16, var_9583_cast_fp16))[name = string("input_353_cast_fp16")]; tensor normed_495_axes_0 = const()[name = string("normed_495_axes_0"), val = tensor([-1])]; fp16 var_9575_to_fp16 = const()[name = string("op_9575_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_495_cast_fp16 = layer_norm(axes = normed_495_axes_0, epsilon = var_9575_to_fp16, x = input_353_cast_fp16)[name = string("normed_495_cast_fp16")]; tensor var_9588_split_sizes_0 = const()[name = string("op_9588_split_sizes_0"), val = tensor([640, 640])]; int32 var_9588_axis_0 = const()[name = string("op_9588_axis_0"), val = int32(-1)]; tensor var_9588_cast_fp16_0, tensor var_9588_cast_fp16_1 = split(axis = var_9588_axis_0, split_sizes = var_9588_split_sizes_0, x = normed_495_cast_fp16)[name = string("op_9588_cast_fp16")]; tensor var_9592_to_fp16 = const()[name = string("op_9592_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273174080)))]; tensor out_213_cast_fp16 = mul(x = var_9588_cast_fp16_0, y = var_9592_to_fp16)[name = string("out_213_cast_fp16")]; tensor var_9606 = const()[name = string("op_9606"), val = tensor([0, 2, 1])]; tensor input_355_axes_0 = const()[name = string("input_355_axes_0"), val = tensor([2])]; tensor var_9607 = transpose(perm = var_9606, x = out_213_cast_fp16)[name = string("transpose_3")]; tensor input_355 = expand_dims(axes = input_355_axes_0, x = var_9607)[name = string("input_355")]; string gate_69_pad_type_0 = const()[name = string("gate_69_pad_type_0"), val = string("valid")]; tensor gate_69_strides_0 = const()[name = string("gate_69_strides_0"), val = tensor([1, 1])]; tensor gate_69_pad_0 = const()[name = string("gate_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_69_dilations_0 = const()[name = string("gate_69_dilations_0"), val = tensor([1, 1])]; int32 gate_69_groups_0 = const()[name = string("gate_69_groups_0"), val = int32(1)]; tensor gate_69 = conv(dilations = gate_69_dilations_0, groups = gate_69_groups_0, pad = gate_69_pad_0, pad_type = gate_69_pad_type_0, strides = gate_69_strides_0, weight = layers_17_mlp_gate_proj_weight_quantized, x = input_355)[name = string("gate_69")]; string up_pad_type_0 = const()[name = string("up_pad_type_0"), val = string("valid")]; tensor up_strides_0 = const()[name = string("up_strides_0"), val = tensor([1, 1])]; tensor up_pad_0 = const()[name = string("up_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_dilations_0 = const()[name = string("up_dilations_0"), val = tensor([1, 1])]; int32 up_groups_0 = const()[name = string("up_groups_0"), val = int32(1)]; tensor up = conv(dilations = up_dilations_0, groups = up_groups_0, pad = up_pad_0, pad_type = up_pad_type_0, strides = up_strides_0, weight = layers_17_mlp_up_proj_weight_quantized, x = input_355)[name = string("up")]; string gate_mode_0 = const()[name = string("gate_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate = gelu(mode = gate_mode_0, x = gate_69)[name = string("gate")]; tensor input_357 = mul(x = gate, y = up)[name = string("input_357")]; string var_9645_pad_type_0 = const()[name = string("op_9645_pad_type_0"), val = string("valid")]; tensor var_9645_strides_0 = const()[name = string("op_9645_strides_0"), val = tensor([1, 1])]; tensor var_9645_pad_0 = const()[name = string("op_9645_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9645_dilations_0 = const()[name = string("op_9645_dilations_0"), val = tensor([1, 1])]; int32 var_9645_groups_0 = const()[name = string("op_9645_groups_0"), val = int32(1)]; tensor var_9645 = conv(dilations = var_9645_dilations_0, groups = var_9645_groups_0, pad = var_9645_pad_0, pad_type = var_9645_pad_type_0, strides = var_9645_strides_0, weight = layers_17_mlp_down_proj_weight_quantized, x = input_357)[name = string("op_9645")]; tensor var_9647_axes_0 = const()[name = string("op_9647_axes_0"), val = tensor([2])]; tensor var_9647 = squeeze(axes = var_9647_axes_0, x = var_9645)[name = string("op_9647")]; tensor var_9651 = const()[name = string("op_9651"), val = tensor([0, 2, 1])]; int32 var_9658 = const()[name = string("op_9658"), val = int32(-1)]; fp16 const_250_promoted_to_fp16 = const()[name = string("const_250_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_287 = transpose(perm = var_9651, x = var_9647)[name = string("transpose_2")]; tensor var_9664_cast_fp16 = mul(x = x_287, y = const_250_promoted_to_fp16)[name = string("op_9664_cast_fp16")]; bool input_359_interleave_0 = const()[name = string("input_359_interleave_0"), val = bool(false)]; tensor input_359_cast_fp16 = concat(axis = var_9658, interleave = input_359_interleave_0, values = (x_287, var_9664_cast_fp16))[name = string("input_359_cast_fp16")]; tensor normed_501_axes_0 = const()[name = string("normed_501_axes_0"), val = tensor([-1])]; fp16 var_9656_to_fp16 = const()[name = string("op_9656_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_501_cast_fp16 = layer_norm(axes = normed_501_axes_0, epsilon = var_9656_to_fp16, x = input_359_cast_fp16)[name = string("normed_501_cast_fp16")]; tensor var_9669_split_sizes_0 = const()[name = string("op_9669_split_sizes_0"), val = tensor([640, 640])]; int32 var_9669_axis_0 = const()[name = string("op_9669_axis_0"), val = int32(-1)]; tensor var_9669_cast_fp16_0, tensor var_9669_cast_fp16_1 = split(axis = var_9669_axis_0, split_sizes = var_9669_split_sizes_0, x = normed_501_cast_fp16)[name = string("op_9669_cast_fp16")]; tensor var_9673_to_fp16 = const()[name = string("op_9673_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273175424)))]; tensor out_215_cast_fp16 = mul(x = var_9669_cast_fp16_0, y = var_9673_to_fp16)[name = string("out_215_cast_fp16")]; tensor x_289_cast_fp16 = add(x = x_283_cast_fp16, y = out_215_cast_fp16)[name = string("x_289_cast_fp16")]; int32 var_9687 = const()[name = string("op_9687"), val = int32(-1)]; fp16 const_252_promoted_to_fp16 = const()[name = string("const_252_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9693_cast_fp16 = mul(x = x_289_cast_fp16, y = const_252_promoted_to_fp16)[name = string("op_9693_cast_fp16")]; bool input_361_interleave_0 = const()[name = string("input_361_interleave_0"), val = bool(false)]; tensor input_361_cast_fp16 = concat(axis = var_9687, interleave = input_361_interleave_0, values = (x_289_cast_fp16, var_9693_cast_fp16))[name = string("input_361_cast_fp16")]; tensor normed_505_axes_0 = const()[name = string("normed_505_axes_0"), val = tensor([-1])]; fp16 var_9685_to_fp16 = const()[name = string("op_9685_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_505_cast_fp16 = layer_norm(axes = normed_505_axes_0, epsilon = var_9685_to_fp16, x = input_361_cast_fp16)[name = string("normed_505_cast_fp16")]; tensor var_9698_split_sizes_0 = const()[name = string("op_9698_split_sizes_0"), val = tensor([640, 640])]; int32 var_9698_axis_0 = const()[name = string("op_9698_axis_0"), val = int32(-1)]; tensor var_9698_cast_fp16_0, tensor var_9698_cast_fp16_1 = split(axis = var_9698_axis_0, split_sizes = var_9698_split_sizes_0, x = normed_505_cast_fp16)[name = string("op_9698_cast_fp16")]; tensor var_9702_to_fp16 = const()[name = string("op_9702_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273176768)))]; tensor out_cast_fp16 = mul(x = var_9698_cast_fp16_0, y = var_9702_to_fp16)[name = string("out_cast_fp16")]; tensor var_9714_begin_0 = const()[name = string("op_9714_begin_0"), val = tensor([0, -1, 0])]; tensor var_9714_end_0 = const()[name = string("op_9714_end_0"), val = tensor([1, 32, 640])]; tensor var_9714_end_mask_0 = const()[name = string("op_9714_end_mask_0"), val = tensor([true, true, true])]; tensor var_9714_cast_fp16 = slice_by_index(begin = var_9714_begin_0, end = var_9714_end_0, end_mask = var_9714_end_mask_0, x = out_cast_fp16)[name = string("op_9714_cast_fp16")]; tensor var_9728 = const()[name = string("op_9728"), val = tensor([0, 2, 1])]; tensor squeeze_18_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273178112))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440950336))))[name = string("squeeze_18_quantized")]; string var_9744_pad_type_0 = const()[name = string("op_9744_pad_type_0"), val = string("valid")]; int32 var_9744_groups_0 = const()[name = string("op_9744_groups_0"), val = int32(1)]; tensor var_9744_strides_0 = const()[name = string("op_9744_strides_0"), val = tensor([1])]; tensor var_9744_pad_0 = const()[name = string("op_9744_pad_0"), val = tensor([0, 0])]; tensor var_9744_dilations_0 = const()[name = string("op_9744_dilations_0"), val = tensor([1])]; tensor var_9729 = transpose(perm = var_9728, x = var_9714_cast_fp16)[name = string("transpose_1")]; tensor var_9744 = conv(dilations = var_9744_dilations_0, groups = var_9744_groups_0, pad = var_9744_pad_0, pad_type = var_9744_pad_type_0, strides = var_9744_strides_0, weight = squeeze_18_quantized, x = var_9729)[name = string("op_9744")]; tensor var_9748 = const()[name = string("op_9748"), val = tensor([0, 2, 1])]; tensor logits_axes_0 = const()[name = string("logits_axes_0"), val = tensor([0])]; tensor logits_1 = transpose(perm = var_9748, x = var_9744)[name = string("transpose_0")]; tensor logits = squeeze(axes = logits_axes_0, x = logits_1)[name = string("logits")]; int32 var_9753 = const()[name = string("op_9753"), val = int32(-1)]; int32 token_id_axis_0 = const()[name = string("token_id_axis_0"), val = int32(-1)]; bool token_id_keep_dims_0 = const()[name = string("token_id_keep_dims_0"), val = bool(false)]; string token_id_output_dtype_0 = const()[name = string("token_id_output_dtype_0"), val = string("int32")]; tensor token_id = reduce_argmax(axis = token_id_axis_0, keep_dims = token_id_keep_dims_0, output_dtype = token_id_output_dtype_0, x = logits)[name = string("token_id")]; tensor var_9755_axes_0 = const()[name = string("op_9755_axes_0"), val = tensor([-1])]; tensor var_9755 = expand_dims(axes = var_9755_axes_0, x = token_id)[name = string("op_9755")]; bool var_9756_validate_indices_0 = const()[name = string("op_9756_validate_indices_0"), val = bool(false)]; tensor var_9756 = gather_along_axis(axis = var_9753, indices = var_9755, validate_indices = var_9756_validate_indices_0, x = logits)[name = string("op_9756")]; tensor var_9757_axes_0 = const()[name = string("op_9757_axes_0"), val = tensor([-1])]; tensor token_logit = squeeze(axes = var_9757_axes_0, x = var_9756)[name = string("op_9757")]; } -> (token_id, token_logit); }