program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}})] { func main(tensor causal_mask, tensor input_ids, state> kv_cache_0, tensor position_ids, tensor update_mask) { tensor sin_full_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262272))))[name = string("sin_full_palettized")]; tensor cos_full_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(525568))))[name = string("cos_full_palettized")]; tensor sin_sliding_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(526656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(657792))))[name = string("sin_sliding_palettized")]; tensor cos_sliding_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(790016))))[name = string("cos_sliding_palettized")]; tensor layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(791104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2364032))))[name = string("layers_0_self_attn_q_proj_weight_palettized")]; tensor layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2366144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2562816))))[name = string("layers_0_self_attn_k_proj_weight_palettized")]; tensor layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2563136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2759808))))[name = string("layers_0_self_attn_v_proj_weight_palettized")]; tensor layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2760128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7478784))))[name = string("layers_0_mlp_gate_proj_weight_palettized")]; tensor layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7484992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12203648))))[name = string("layers_0_mlp_up_proj_weight_palettized")]; tensor layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12209856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16928512))))[name = string("layers_0_mlp_down_proj_weight_palettized")]; tensor layers_0_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16930112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17126784))))[name = string("layers_0_per_layer_input_gate_weight_palettized")]; tensor layers_0_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17127104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17323776))))[name = string("layers_0_per_layer_projection_weight_palettized")]; tensor layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17325376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18898304))))[name = string("layers_1_self_attn_q_proj_weight_palettized")]; tensor layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18900416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19097088))))[name = string("layers_1_self_attn_k_proj_weight_palettized")]; tensor layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19097408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19294080))))[name = string("layers_1_self_attn_v_proj_weight_palettized")]; tensor layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19294400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24013056))))[name = string("layers_1_mlp_gate_proj_weight_palettized")]; tensor layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24019264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28737920))))[name = string("layers_1_mlp_up_proj_weight_palettized")]; tensor layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28744128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33462784))))[name = string("layers_1_mlp_down_proj_weight_palettized")]; tensor layers_1_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33464384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33661056))))[name = string("layers_1_per_layer_input_gate_weight_palettized")]; tensor layers_1_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33661376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33858048))))[name = string("layers_1_per_layer_projection_weight_palettized")]; tensor layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33859648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35432576))))[name = string("layers_2_self_attn_q_proj_weight_palettized")]; tensor layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35434688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35631360))))[name = string("layers_2_self_attn_k_proj_weight_palettized")]; tensor layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35631680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35828352))))[name = string("layers_2_self_attn_v_proj_weight_palettized")]; tensor layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35828672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40547328))))[name = string("layers_2_mlp_gate_proj_weight_palettized")]; tensor layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40553536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45272192))))[name = string("layers_2_mlp_up_proj_weight_palettized")]; tensor layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45278400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49997056))))[name = string("layers_2_mlp_down_proj_weight_palettized")]; tensor layers_2_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49998656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50195328))))[name = string("layers_2_per_layer_input_gate_weight_palettized")]; tensor layers_2_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50195648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50392320))))[name = string("layers_2_per_layer_projection_weight_palettized")]; tensor layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50393920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51966848))))[name = string("layers_3_self_attn_q_proj_weight_palettized")]; tensor layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51968960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52165632))))[name = string("layers_3_self_attn_k_proj_weight_palettized")]; tensor layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52165952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52362624))))[name = string("layers_3_self_attn_v_proj_weight_palettized")]; tensor layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52362944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57081600))))[name = string("layers_3_mlp_gate_proj_weight_palettized")]; tensor layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57087808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61806464))))[name = string("layers_3_mlp_up_proj_weight_palettized")]; tensor layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61812672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66531328))))[name = string("layers_3_mlp_down_proj_weight_palettized")]; tensor layers_3_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66532928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66729600))))[name = string("layers_3_per_layer_input_gate_weight_palettized")]; tensor layers_3_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66729920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66926592))))[name = string("layers_3_per_layer_projection_weight_palettized")]; tensor layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66928192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70073984))))[name = string("layers_4_self_attn_q_proj_weight_palettized")]; tensor layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70078144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70471424))))[name = string("layers_4_self_attn_k_proj_weight_palettized")]; tensor layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70472000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70865280))))[name = string("layers_4_self_attn_v_proj_weight_palettized")]; tensor layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70865856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75584512))))[name = string("layers_4_mlp_gate_proj_weight_palettized")]; tensor layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75590720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80309376))))[name = string("layers_4_mlp_up_proj_weight_palettized")]; tensor layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80315584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85034240))))[name = string("layers_4_mlp_down_proj_weight_palettized")]; tensor layers_4_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85035840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85232512))))[name = string("layers_4_per_layer_input_gate_weight_palettized")]; tensor layers_4_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85232832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85429504))))[name = string("layers_4_per_layer_projection_weight_palettized")]; tensor layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85431104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87004032))))[name = string("layers_5_self_attn_q_proj_weight_palettized")]; tensor layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87006144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87202816))))[name = string("layers_5_self_attn_k_proj_weight_palettized")]; tensor layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87203136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87399808))))[name = string("layers_5_self_attn_v_proj_weight_palettized")]; tensor layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87400128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92118784))))[name = string("layers_5_mlp_gate_proj_weight_palettized")]; tensor layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92124992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96843648))))[name = string("layers_5_mlp_up_proj_weight_palettized")]; tensor layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96849856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101568512))))[name = string("layers_5_mlp_down_proj_weight_palettized")]; tensor layers_5_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101570112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101766784))))[name = string("layers_5_per_layer_input_gate_weight_palettized")]; tensor layers_5_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101767104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101963776))))[name = string("layers_5_per_layer_projection_weight_palettized")]; tensor layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101965376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103538304))))[name = string("layers_6_self_attn_q_proj_weight_palettized")]; tensor layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103540416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103737088))))[name = string("layers_6_self_attn_k_proj_weight_palettized")]; tensor layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103737408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103934080))))[name = string("layers_6_self_attn_v_proj_weight_palettized")]; tensor layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103934400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108653056))))[name = string("layers_6_mlp_gate_proj_weight_palettized")]; tensor layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108659264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113377920))))[name = string("layers_6_mlp_up_proj_weight_palettized")]; tensor layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113384128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118102784))))[name = string("layers_6_mlp_down_proj_weight_palettized")]; tensor layers_6_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118104384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118301056))))[name = string("layers_6_per_layer_input_gate_weight_palettized")]; tensor layers_6_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118301376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118498048))))[name = string("layers_6_per_layer_projection_weight_palettized")]; tensor layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118499648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120072576))))[name = string("layers_7_self_attn_q_proj_weight_palettized")]; tensor layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120074688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120271360))))[name = string("layers_7_self_attn_k_proj_weight_palettized")]; tensor layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120271680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120468352))))[name = string("layers_7_self_attn_v_proj_weight_palettized")]; tensor layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120468672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125187328))))[name = string("layers_7_mlp_gate_proj_weight_palettized")]; tensor layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125193536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129912192))))[name = string("layers_7_mlp_up_proj_weight_palettized")]; tensor layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129918400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134637056))))[name = string("layers_7_mlp_down_proj_weight_palettized")]; tensor layers_7_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134638656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134835328))))[name = string("layers_7_per_layer_input_gate_weight_palettized")]; tensor layers_7_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134835648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135032320))))[name = string("layers_7_per_layer_projection_weight_palettized")]; tensor layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135033920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136606848))))[name = string("layers_8_self_attn_q_proj_weight_palettized")]; tensor layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136608960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136805632))))[name = string("layers_8_self_attn_k_proj_weight_palettized")]; tensor layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136805952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137002624))))[name = string("layers_8_self_attn_v_proj_weight_palettized")]; tensor layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137002944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141721600))))[name = string("layers_8_mlp_gate_proj_weight_palettized")]; tensor layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141727808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(146446464))))[name = string("layers_8_mlp_up_proj_weight_palettized")]; tensor layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(146452672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151171328))))[name = string("layers_8_mlp_down_proj_weight_palettized")]; tensor layers_8_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151172928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151369600))))[name = string("layers_8_per_layer_input_gate_weight_palettized")]; tensor layers_8_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151369920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151566592))))[name = string("layers_8_per_layer_projection_weight_palettized")]; tensor layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151568192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154713984))))[name = string("layers_9_self_attn_q_proj_weight_palettized")]; tensor layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154718144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155111424))))[name = string("layers_9_self_attn_k_proj_weight_palettized")]; tensor layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155112000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155505280))))[name = string("layers_9_self_attn_v_proj_weight_palettized")]; tensor layers_9_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155505856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160224512))))[name = string("layers_9_mlp_gate_proj_weight_palettized")]; tensor layers_9_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160230720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164949376))))[name = string("layers_9_mlp_up_proj_weight_palettized")]; tensor layers_9_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164955584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169674240))))[name = string("layers_9_mlp_down_proj_weight_palettized")]; tensor layers_9_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169675840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169872512))))[name = string("layers_9_per_layer_input_gate_weight_palettized")]; tensor layers_9_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169872832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170069504))))[name = string("layers_9_per_layer_projection_weight_palettized")]; tensor layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170071104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171644032))))[name = string("layers_10_self_attn_q_proj_weight_palettized")]; tensor layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171646144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171842816))))[name = string("layers_10_self_attn_k_proj_weight_palettized")]; tensor layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171843136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172039808))))[name = string("layers_10_self_attn_v_proj_weight_palettized")]; tensor layers_10_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172040128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176758784))))[name = string("layers_10_mlp_gate_proj_weight_palettized")]; tensor layers_10_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176764992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181483648))))[name = string("layers_10_mlp_up_proj_weight_palettized")]; tensor layers_10_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181489856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186208512))))[name = string("layers_10_mlp_down_proj_weight_palettized")]; tensor layers_10_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186210112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186406784))))[name = string("layers_10_per_layer_input_gate_weight_palettized")]; tensor layers_10_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186407104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186603776))))[name = string("layers_10_per_layer_projection_weight_palettized")]; tensor layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186605376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188178304))))[name = string("layers_11_self_attn_q_proj_weight_palettized")]; tensor layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188180416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188377088))))[name = string("layers_11_self_attn_k_proj_weight_palettized")]; tensor layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188377408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188574080))))[name = string("layers_11_self_attn_v_proj_weight_palettized")]; tensor layers_11_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188574400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193293056))))[name = string("layers_11_mlp_gate_proj_weight_palettized")]; tensor layers_11_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193299264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198017920))))[name = string("layers_11_mlp_up_proj_weight_palettized")]; tensor layers_11_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198024128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202742784))))[name = string("layers_11_mlp_down_proj_weight_palettized")]; tensor layers_11_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202744384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202941056))))[name = string("layers_11_per_layer_input_gate_weight_palettized")]; tensor layers_11_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202941376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203138048))))[name = string("layers_11_per_layer_projection_weight_palettized")]; tensor layers_12_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203139648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204712576))))[name = string("layers_12_self_attn_q_proj_weight_palettized")]; tensor layers_12_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204714688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204911360))))[name = string("layers_12_self_attn_k_proj_weight_palettized")]; tensor layers_12_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204911680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205108352))))[name = string("layers_12_self_attn_v_proj_weight_palettized")]; tensor layers_12_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205108672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209827328))))[name = string("layers_12_mlp_gate_proj_weight_palettized")]; tensor layers_12_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209833536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214552192))))[name = string("layers_12_mlp_up_proj_weight_palettized")]; tensor layers_12_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214558400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219277056))))[name = string("layers_12_mlp_down_proj_weight_palettized")]; tensor layers_12_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219278656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219475328))))[name = string("layers_12_per_layer_input_gate_weight_palettized")]; tensor layers_12_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219475648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219672320))))[name = string("layers_12_per_layer_projection_weight_palettized")]; tensor layers_13_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219673920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221246848))))[name = string("layers_13_self_attn_q_proj_weight_palettized")]; tensor layers_13_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221248960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221445632))))[name = string("layers_13_self_attn_k_proj_weight_palettized")]; tensor layers_13_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221445952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221642624))))[name = string("layers_13_self_attn_v_proj_weight_palettized")]; tensor layers_13_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221642944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226361600))))[name = string("layers_13_mlp_gate_proj_weight_palettized")]; tensor layers_13_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226367808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231086464))))[name = string("layers_13_mlp_up_proj_weight_palettized")]; tensor layers_13_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231092672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235811328))))[name = string("layers_13_mlp_down_proj_weight_palettized")]; tensor layers_13_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235812928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236009600))))[name = string("layers_13_per_layer_input_gate_weight_palettized")]; tensor layers_13_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236009920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236206592))))[name = string("layers_13_per_layer_projection_weight_palettized")]; tensor layers_14_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236208192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239353984))))[name = string("layers_14_self_attn_q_proj_weight_palettized")]; tensor layers_14_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239358144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239751424))))[name = string("layers_14_self_attn_k_proj_weight_palettized")]; tensor layers_14_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239752000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240145280))))[name = string("layers_14_self_attn_v_proj_weight_palettized")]; tensor layers_14_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240145856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244864512))))[name = string("layers_14_mlp_gate_proj_weight_palettized")]; tensor layers_14_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244870720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249589376))))[name = string("layers_14_mlp_up_proj_weight_palettized")]; tensor layers_14_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249595584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254314240))))[name = string("layers_14_mlp_down_proj_weight_palettized")]; tensor layers_14_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254315840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254512512))))[name = string("layers_14_per_layer_input_gate_weight_palettized")]; tensor layers_14_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254512832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254709504))))[name = string("layers_14_per_layer_projection_weight_palettized")]; tensor layers_15_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254711104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256284032))))[name = string("layers_15_self_attn_q_proj_weight_palettized")]; tensor layers_15_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256286144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265723392))))[name = string("layers_15_mlp_gate_proj_weight_palettized")]; tensor layers_15_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265735744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275172992))))[name = string("layers_15_mlp_up_proj_weight_palettized")]; tensor layers_15_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275185344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(284622592))))[name = string("layers_15_mlp_down_proj_weight_palettized")]; tensor layers_15_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(284624192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(284820864))))[name = string("layers_15_per_layer_input_gate_weight_palettized")]; tensor layers_15_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(284821184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285017856))))[name = string("layers_15_per_layer_projection_weight_palettized")]; tensor layers_16_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285019456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286592384))))[name = string("layers_16_self_attn_q_proj_weight_palettized")]; tensor layers_16_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286594496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296031744))))[name = string("layers_16_mlp_gate_proj_weight_palettized")]; tensor layers_16_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296044096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305481344))))[name = string("layers_16_mlp_up_proj_weight_palettized")]; tensor layers_16_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305493696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314930944))))[name = string("layers_16_mlp_down_proj_weight_palettized")]; tensor layers_16_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314932544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315129216))))[name = string("layers_16_per_layer_input_gate_weight_palettized")]; tensor layers_16_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315129536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315326208))))[name = string("layers_16_per_layer_projection_weight_palettized")]; tensor layers_17_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315327808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316900736))))[name = string("layers_17_self_attn_q_proj_weight_palettized")]; tensor layers_17_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316902848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326340096))))[name = string("layers_17_mlp_gate_proj_weight_palettized")]; tensor layers_17_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326352448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(335789696))))[name = string("layers_17_mlp_up_proj_weight_palettized")]; tensor layers_17_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(335802048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345239296))))[name = string("layers_17_mlp_down_proj_weight_palettized")]; tensor layers_17_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345240896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345437568))))[name = string("layers_17_per_layer_input_gate_weight_palettized")]; tensor layers_17_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345437888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345634560))))[name = string("layers_17_per_layer_projection_weight_palettized")]; tensor layers_18_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345636160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347209088))))[name = string("layers_18_self_attn_q_proj_weight_palettized")]; tensor layers_18_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347211200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356648448))))[name = string("layers_18_mlp_gate_proj_weight_palettized")]; tensor layers_18_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356660800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366098048))))[name = string("layers_18_mlp_up_proj_weight_palettized")]; tensor layers_18_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366110400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375547648))))[name = string("layers_18_mlp_down_proj_weight_palettized")]; tensor layers_18_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375549248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375745920))))[name = string("layers_18_per_layer_input_gate_weight_palettized")]; tensor layers_18_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375746240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375942912))))[name = string("layers_18_per_layer_projection_weight_palettized")]; tensor layers_19_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375944512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379090304))))[name = string("layers_19_self_attn_q_proj_weight_palettized")]; tensor layers_19_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379094464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388531712))))[name = string("layers_19_mlp_gate_proj_weight_palettized")]; tensor layers_19_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388544064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397981312))))[name = string("layers_19_mlp_up_proj_weight_palettized")]; tensor layers_19_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397993664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407430912))))[name = string("layers_19_mlp_down_proj_weight_palettized")]; tensor layers_19_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407432512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407629184))))[name = string("layers_19_per_layer_input_gate_weight_palettized")]; tensor layers_19_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407629504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407826176))))[name = string("layers_19_per_layer_projection_weight_palettized")]; tensor layers_20_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407827776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409400704))))[name = string("layers_20_self_attn_q_proj_weight_palettized")]; tensor layers_20_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409402816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418840064))))[name = string("layers_20_mlp_gate_proj_weight_palettized")]; tensor layers_20_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418852416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428289664))))[name = string("layers_20_mlp_up_proj_weight_palettized")]; tensor layers_20_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428302016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437739264))))[name = string("layers_20_mlp_down_proj_weight_palettized")]; tensor layers_20_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437740864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437937536))))[name = string("layers_20_per_layer_input_gate_weight_palettized")]; tensor layers_20_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437937856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438134528))))[name = string("layers_20_per_layer_projection_weight_palettized")]; tensor layers_21_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438136128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(439709056))))[name = string("layers_21_self_attn_q_proj_weight_palettized")]; tensor layers_21_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(439711168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(449148416))))[name = string("layers_21_mlp_gate_proj_weight_palettized")]; tensor layers_21_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(449160768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458598016))))[name = string("layers_21_mlp_up_proj_weight_palettized")]; tensor layers_21_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458610368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(468047616))))[name = string("layers_21_mlp_down_proj_weight_palettized")]; tensor layers_21_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(468049216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(468245888))))[name = string("layers_21_per_layer_input_gate_weight_palettized")]; tensor layers_21_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(468246208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(468442880))))[name = string("layers_21_per_layer_projection_weight_palettized")]; tensor layers_22_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(468444480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470017408))))[name = string("layers_22_self_attn_q_proj_weight_palettized")]; tensor layers_22_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470019520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479456768))))[name = string("layers_22_mlp_gate_proj_weight_palettized")]; tensor layers_22_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479469120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488906368))))[name = string("layers_22_mlp_up_proj_weight_palettized")]; tensor layers_22_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488918720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(498355968))))[name = string("layers_22_mlp_down_proj_weight_palettized")]; tensor layers_22_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(498357568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(498554240))))[name = string("layers_22_per_layer_input_gate_weight_palettized")]; tensor layers_22_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(498554560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(498751232))))[name = string("layers_22_per_layer_projection_weight_palettized")]; tensor layers_23_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(498752832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(500325760))))[name = string("layers_23_self_attn_q_proj_weight_palettized")]; tensor layers_23_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(500327872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509765120))))[name = string("layers_23_mlp_gate_proj_weight_palettized")]; tensor layers_23_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509777472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519214720))))[name = string("layers_23_mlp_up_proj_weight_palettized")]; tensor layers_23_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519227072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528664320))))[name = string("layers_23_mlp_down_proj_weight_palettized")]; tensor layers_23_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528665920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528862592))))[name = string("layers_23_per_layer_input_gate_weight_palettized")]; tensor layers_23_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528862912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529059584))))[name = string("layers_23_per_layer_projection_weight_palettized")]; tensor layers_24_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529061184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(532206976))))[name = string("layers_24_self_attn_q_proj_weight_palettized")]; tensor layers_24_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(532211136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(541648384))))[name = string("layers_24_mlp_gate_proj_weight_palettized")]; tensor layers_24_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(541660736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551097984))))[name = string("layers_24_mlp_up_proj_weight_palettized")]; tensor layers_24_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551110336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560547584))))[name = string("layers_24_mlp_down_proj_weight_palettized")]; tensor layers_24_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560549184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560745856))))[name = string("layers_24_per_layer_input_gate_weight_palettized")]; tensor layers_24_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560746176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560942848))))[name = string("layers_24_per_layer_projection_weight_palettized")]; tensor layers_25_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560944448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562517376))))[name = string("layers_25_self_attn_q_proj_weight_palettized")]; tensor layers_25_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562519488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571956736))))[name = string("layers_25_mlp_gate_proj_weight_palettized")]; tensor layers_25_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571969088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581406336))))[name = string("layers_25_mlp_up_proj_weight_palettized")]; tensor layers_25_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581418688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(590855936))))[name = string("layers_25_mlp_down_proj_weight_palettized")]; tensor layers_25_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(590857536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(591054208))))[name = string("layers_25_per_layer_input_gate_weight_palettized")]; tensor layers_25_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(591054528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(591251200))))[name = string("layers_25_per_layer_projection_weight_palettized")]; tensor layers_26_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(591252800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(592825728))))[name = string("layers_26_self_attn_q_proj_weight_palettized")]; tensor layers_26_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(592827840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(602265088))))[name = string("layers_26_mlp_gate_proj_weight_palettized")]; tensor layers_26_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(602277440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(611714688))))[name = string("layers_26_mlp_up_proj_weight_palettized")]; tensor layers_26_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(611727040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(621164288))))[name = string("layers_26_mlp_down_proj_weight_palettized")]; tensor layers_26_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(621165888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(621362560))))[name = string("layers_26_per_layer_input_gate_weight_palettized")]; tensor layers_26_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(621362880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(621559552))))[name = string("layers_26_per_layer_projection_weight_palettized")]; tensor layers_27_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(621561152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(623134080))))[name = string("layers_27_self_attn_q_proj_weight_palettized")]; tensor layers_27_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(623136192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(632573440))))[name = string("layers_27_mlp_gate_proj_weight_palettized")]; tensor layers_27_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(632585792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(642023040))))[name = string("layers_27_mlp_up_proj_weight_palettized")]; tensor layers_27_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(642035392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(651472640))))[name = string("layers_27_mlp_down_proj_weight_palettized")]; tensor layers_27_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(651474240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(651670912))))[name = string("layers_27_per_layer_input_gate_weight_palettized")]; tensor layers_27_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(651671232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(651867904))))[name = string("layers_27_per_layer_projection_weight_palettized")]; tensor layers_28_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(651869504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(653442432))))[name = string("layers_28_self_attn_q_proj_weight_palettized")]; tensor layers_28_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(653444544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(662881792))))[name = string("layers_28_mlp_gate_proj_weight_palettized")]; tensor layers_28_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(662894144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(672331392))))[name = string("layers_28_mlp_up_proj_weight_palettized")]; tensor layers_28_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(672343744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(681780992))))[name = string("layers_28_mlp_down_proj_weight_palettized")]; tensor layers_28_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(681782592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(681979264))))[name = string("layers_28_per_layer_input_gate_weight_palettized")]; tensor layers_28_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(681979584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(682176256))))[name = string("layers_28_per_layer_projection_weight_palettized")]; tensor layers_29_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(682177856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(685323648))))[name = string("layers_29_self_attn_q_proj_weight_palettized")]; tensor layers_29_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(685327808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(694765056))))[name = string("layers_29_mlp_gate_proj_weight_palettized")]; tensor layers_29_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(694777408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(704214656))))[name = string("layers_29_mlp_up_proj_weight_palettized")]; tensor layers_29_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(704227008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(713664256))))[name = string("layers_29_mlp_down_proj_weight_palettized")]; tensor layers_29_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(713665856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(713862528))))[name = string("layers_29_per_layer_input_gate_weight_palettized")]; tensor layers_29_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(713862848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(714059520))))[name = string("layers_29_per_layer_projection_weight_palettized")]; tensor layers_30_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(714061120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(715634048))))[name = string("layers_30_self_attn_q_proj_weight_palettized")]; tensor layers_30_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(715636160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(725073408))))[name = string("layers_30_mlp_gate_proj_weight_palettized")]; tensor layers_30_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(725085760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(734523008))))[name = string("layers_30_mlp_up_proj_weight_palettized")]; tensor layers_30_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(734535360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743972608))))[name = string("layers_30_mlp_down_proj_weight_palettized")]; tensor layers_30_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743974208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(744170880))))[name = string("layers_30_per_layer_input_gate_weight_palettized")]; tensor layers_30_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(744171200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(744367872))))[name = string("layers_30_per_layer_projection_weight_palettized")]; tensor layers_31_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(744369472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745942400))))[name = string("layers_31_self_attn_q_proj_weight_palettized")]; tensor layers_31_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745944512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755381760))))[name = string("layers_31_mlp_gate_proj_weight_palettized")]; tensor layers_31_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755394112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(764831360))))[name = string("layers_31_mlp_up_proj_weight_palettized")]; tensor layers_31_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(764843712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774280960))))[name = string("layers_31_mlp_down_proj_weight_palettized")]; tensor layers_31_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774282560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774479232))))[name = string("layers_31_per_layer_input_gate_weight_palettized")]; tensor layers_31_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774479552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774676224))))[name = string("layers_31_per_layer_projection_weight_palettized")]; tensor layers_32_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774677824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(776250752))))[name = string("layers_32_self_attn_q_proj_weight_palettized")]; tensor layers_32_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(776252864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(785690112))))[name = string("layers_32_mlp_gate_proj_weight_palettized")]; tensor layers_32_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(785702464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(795139712))))[name = string("layers_32_mlp_up_proj_weight_palettized")]; tensor layers_32_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(795152064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(804589312))))[name = string("layers_32_mlp_down_proj_weight_palettized")]; tensor layers_32_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(804590912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(804787584))))[name = string("layers_32_per_layer_input_gate_weight_palettized")]; tensor layers_32_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(804787904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(804984576))))[name = string("layers_32_per_layer_projection_weight_palettized")]; tensor layers_33_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(804986176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(806559104))))[name = string("layers_33_self_attn_q_proj_weight_palettized")]; tensor layers_33_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(806561216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(815998464))))[name = string("layers_33_mlp_gate_proj_weight_palettized")]; tensor layers_33_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816010816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(825448064))))[name = string("layers_33_mlp_up_proj_weight_palettized")]; tensor layers_33_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(825460416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(834897664))))[name = string("layers_33_mlp_down_proj_weight_palettized")]; tensor layers_33_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(834899264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(835095936))))[name = string("layers_33_per_layer_input_gate_weight_palettized")]; tensor layers_33_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(835096256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(835292928))))[name = string("layers_33_per_layer_projection_weight_palettized")]; tensor layers_34_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(835294528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(838440320))))[name = string("layers_34_self_attn_q_proj_weight_palettized")]; tensor layers_34_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(838444480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(847881728))))[name = string("layers_34_mlp_gate_proj_weight_palettized")]; tensor layers_34_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(847894080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(857331328))))[name = string("layers_34_mlp_up_proj_weight_palettized")]; tensor layers_34_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(857343680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(866780928))))[name = string("layers_34_mlp_down_proj_weight_palettized")]; tensor layers_34_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(866782528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(866979200))))[name = string("layers_34_per_layer_input_gate_weight_palettized")]; tensor layers_34_per_layer_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(866979520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(867176192))))[name = string("layers_34_per_layer_projection_weight_palettized")]; int32 var_1879_batch_dims_0 = const()[name = string("op_1879_batch_dims_0"), val = int32(0)]; bool var_1879_validate_indices_0 = const()[name = string("op_1879_validate_indices_0"), val = bool(false)]; tensor embed_tokens_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(867177792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1068504448))))[name = string("embed_tokens_weight_to_fp16_palettized")]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor greater_equal_0 = greater_equal(x = input_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_70 = const()[name = string("slice_by_index_70"), val = int32(262144)]; tensor add_0 = add(x = input_ids, y = slice_by_index_70)[name = string("add_0")]; tensor select_0 = select(a = input_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")]; int32 greater_equal_0_y_0_1 = const()[name = string("greater_equal_0_y_0_1"), val = int32(0)]; tensor greater_equal_0_1 = greater_equal(x = select_0, y = greater_equal_0_y_0_1)[name = string("greater_equal_0_1")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(262144)]; tensor add_0_1 = add(x = select_0, y = slice_by_index_0)[name = string("add_0_1")]; tensor select_0_1 = select(a = select_0, b = add_0_1, cond = greater_equal_0_1)[name = string("select_0_1")]; int32 op_1879_cast_fp16_axis_0 = const()[name = string("op_1879_cast_fp16_axis_0"), val = int32(0)]; tensor op_1879_cast_fp16 = gather(axis = op_1879_cast_fp16_axis_0, batch_dims = var_1879_batch_dims_0, indices = select_0_1, validate_indices = var_1879_validate_indices_0, x = embed_tokens_weight_to_fp16_palettized)[name = string("op_1879_cast_fp16")]; fp16 const_0 = const()[name = string("const_0"), val = fp16(0x1.398p+5)]; tensor hidden_states_3 = mul(x = op_1879_cast_fp16, y = const_0)[name = string("hidden_states_3")]; int32 var_1897_batch_dims_0 = const()[name = string("op_1897_batch_dims_0"), val = int32(0)]; bool var_1897_validate_indices_0 = const()[name = string("op_1897_validate_indices_0"), val = bool(false)]; tensor embed_tokens_per_layer_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1068766656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2243171840))))[name = string("embed_tokens_per_layer_weight_to_fp16_palettized")]; int32 op_1897_cast_fp16_axis_0 = const()[name = string("op_1897_cast_fp16_axis_0"), val = int32(0)]; tensor op_1897_cast_fp16 = gather(axis = op_1897_cast_fp16_axis_0, batch_dims = var_1897_batch_dims_0, indices = select_0_1, validate_indices = var_1897_validate_indices_0, x = embed_tokens_per_layer_weight_to_fp16_palettized)[name = string("op_1897_cast_fp16")]; fp16 const_1 = const()[name = string("const_1"), val = fp16(0x1p+4)]; tensor per_layer_raw = mul(x = op_1897_cast_fp16, y = const_1)[name = string("per_layer_raw")]; tensor per_layer_model_projection_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2243434048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2250315392))))[name = string("per_layer_model_projection_weight_to_fp16_palettized")]; tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2250324416)))]; tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = per_layer_model_projection_weight_to_fp16_palettized, x = hidden_states_3)[name = string("linear_0_cast_fp16")]; fp16 const_2 = const()[name = string("const_2"), val = fp16(0x1.a2p-6)]; tensor per_layer_proj = mul(x = linear_0_cast_fp16, y = const_2)[name = string("per_layer_proj")]; tensor x_1_begin_0 = const()[name = string("x_1_begin_0"), val = tensor([0, 0, 0])]; tensor x_1_end_0 = const()[name = string("x_1_end_0"), val = tensor([1, 1, 256])]; tensor x_1_end_mask_0 = const()[name = string("x_1_end_mask_0"), val = tensor([true, true, false])]; tensor x_1 = slice_by_index(begin = x_1_begin_0, end = x_1_end_0, end_mask = x_1_end_mask_0, x = per_layer_proj)[name = string("x_1")]; int32 var_1953 = const()[name = string("op_1953"), val = int32(-1)]; fp16 const_3_promoted_to_fp16 = const()[name = string("const_3_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1959_cast_fp16 = mul(x = x_1, y = const_3_promoted_to_fp16)[name = string("op_1959_cast_fp16")]; bool input_3_interleave_0 = const()[name = string("input_3_interleave_0"), val = bool(false)]; tensor input_3_cast_fp16 = concat(axis = var_1953, interleave = input_3_interleave_0, values = (x_1, var_1959_cast_fp16))[name = string("input_3_cast_fp16")]; tensor normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor([-1])]; fp16 var_1951_to_fp16 = const()[name = string("op_1951_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_1951_to_fp16, x = input_3_cast_fp16)[name = string("normed_1_cast_fp16")]; tensor var_1964_split_sizes_0 = const()[name = string("op_1964_split_sizes_0"), val = tensor([256, 256])]; int32 var_1964_axis_0 = const()[name = string("op_1964_axis_0"), val = int32(-1)]; tensor var_1964_cast_fp16_0, tensor var_1964_cast_fp16_1 = split(axis = var_1964_axis_0, split_sizes = var_1964_split_sizes_0, x = normed_1_cast_fp16)[name = string("op_1964_cast_fp16")]; tensor const_4_to_fp16 = const()[name = string("const_4_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2250342400)))]; tensor var_1967_cast_fp16 = mul(x = var_1964_cast_fp16_0, y = const_4_to_fp16)[name = string("op_1967_cast_fp16")]; tensor x_5_begin_0 = const()[name = string("x_5_begin_0"), val = tensor([0, 0, 256])]; tensor x_5_end_0 = const()[name = string("x_5_end_0"), val = tensor([1, 1, 512])]; tensor x_5_end_mask_0 = const()[name = string("x_5_end_mask_0"), val = tensor([true, true, false])]; tensor x_5 = slice_by_index(begin = x_5_begin_0, end = x_5_end_0, end_mask = x_5_end_mask_0, x = per_layer_proj)[name = string("x_5")]; int32 var_1991 = const()[name = string("op_1991"), val = int32(-1)]; fp16 const_5_promoted_to_fp16 = const()[name = string("const_5_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1996_cast_fp16 = mul(x = x_5, y = const_5_promoted_to_fp16)[name = string("op_1996_cast_fp16")]; bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)]; tensor input_5_cast_fp16 = concat(axis = var_1991, interleave = input_5_interleave_0, values = (x_5, var_1996_cast_fp16))[name = string("input_5_cast_fp16")]; tensor normed_5_axes_0 = const()[name = string("normed_5_axes_0"), val = tensor([-1])]; fp16 var_1989_to_fp16 = const()[name = string("op_1989_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_5_cast_fp16 = layer_norm(axes = normed_5_axes_0, epsilon = var_1989_to_fp16, x = input_5_cast_fp16)[name = string("normed_5_cast_fp16")]; tensor var_2001_split_sizes_0 = const()[name = string("op_2001_split_sizes_0"), val = tensor([256, 256])]; int32 var_2001_axis_0 = const()[name = string("op_2001_axis_0"), val = int32(-1)]; tensor var_2001_cast_fp16_0, tensor var_2001_cast_fp16_1 = split(axis = var_2001_axis_0, split_sizes = var_2001_split_sizes_0, x = normed_5_cast_fp16)[name = string("op_2001_cast_fp16")]; tensor var_2004_cast_fp16 = mul(x = var_2001_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2004_cast_fp16")]; tensor x_9_begin_0 = const()[name = string("x_9_begin_0"), val = tensor([0, 0, 512])]; tensor x_9_end_0 = const()[name = string("x_9_end_0"), val = tensor([1, 1, 768])]; tensor x_9_end_mask_0 = const()[name = string("x_9_end_mask_0"), val = tensor([true, true, false])]; tensor x_9 = slice_by_index(begin = x_9_begin_0, end = x_9_end_0, end_mask = x_9_end_mask_0, x = per_layer_proj)[name = string("x_9")]; int32 var_2028 = const()[name = string("op_2028"), val = int32(-1)]; fp16 const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2033_cast_fp16 = mul(x = x_9, y = const_7_promoted_to_fp16)[name = string("op_2033_cast_fp16")]; bool input_7_interleave_0 = const()[name = string("input_7_interleave_0"), val = bool(false)]; tensor input_7_cast_fp16 = concat(axis = var_2028, interleave = input_7_interleave_0, values = (x_9, var_2033_cast_fp16))[name = string("input_7_cast_fp16")]; tensor normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor([-1])]; fp16 var_2026_to_fp16 = const()[name = string("op_2026_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_2026_to_fp16, x = input_7_cast_fp16)[name = string("normed_9_cast_fp16")]; tensor var_2038_split_sizes_0 = const()[name = string("op_2038_split_sizes_0"), val = tensor([256, 256])]; int32 var_2038_axis_0 = const()[name = string("op_2038_axis_0"), val = int32(-1)]; tensor var_2038_cast_fp16_0, tensor var_2038_cast_fp16_1 = split(axis = var_2038_axis_0, split_sizes = var_2038_split_sizes_0, x = normed_9_cast_fp16)[name = string("op_2038_cast_fp16")]; tensor var_2041_cast_fp16 = mul(x = var_2038_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2041_cast_fp16")]; tensor x_13_begin_0 = const()[name = string("x_13_begin_0"), val = tensor([0, 0, 768])]; tensor x_13_end_0 = const()[name = string("x_13_end_0"), val = tensor([1, 1, 1024])]; tensor x_13_end_mask_0 = const()[name = string("x_13_end_mask_0"), val = tensor([true, true, false])]; tensor x_13 = slice_by_index(begin = x_13_begin_0, end = x_13_end_0, end_mask = x_13_end_mask_0, x = per_layer_proj)[name = string("x_13")]; int32 var_2065 = const()[name = string("op_2065"), val = int32(-1)]; fp16 const_9_promoted_to_fp16 = const()[name = string("const_9_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2070_cast_fp16 = mul(x = x_13, y = const_9_promoted_to_fp16)[name = string("op_2070_cast_fp16")]; bool input_9_interleave_0 = const()[name = string("input_9_interleave_0"), val = bool(false)]; tensor input_9_cast_fp16 = concat(axis = var_2065, interleave = input_9_interleave_0, values = (x_13, var_2070_cast_fp16))[name = string("input_9_cast_fp16")]; tensor normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor([-1])]; fp16 var_2063_to_fp16 = const()[name = string("op_2063_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_2063_to_fp16, x = input_9_cast_fp16)[name = string("normed_13_cast_fp16")]; tensor var_2075_split_sizes_0 = const()[name = string("op_2075_split_sizes_0"), val = tensor([256, 256])]; int32 var_2075_axis_0 = const()[name = string("op_2075_axis_0"), val = int32(-1)]; tensor var_2075_cast_fp16_0, tensor var_2075_cast_fp16_1 = split(axis = var_2075_axis_0, split_sizes = var_2075_split_sizes_0, x = normed_13_cast_fp16)[name = string("op_2075_cast_fp16")]; tensor var_2078_cast_fp16 = mul(x = var_2075_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2078_cast_fp16")]; tensor x_17_begin_0 = const()[name = string("x_17_begin_0"), val = tensor([0, 0, 1024])]; tensor x_17_end_0 = const()[name = string("x_17_end_0"), val = tensor([1, 1, 1280])]; tensor x_17_end_mask_0 = const()[name = string("x_17_end_mask_0"), val = tensor([true, true, false])]; tensor x_17 = slice_by_index(begin = x_17_begin_0, end = x_17_end_0, end_mask = x_17_end_mask_0, x = per_layer_proj)[name = string("x_17")]; int32 var_2102 = const()[name = string("op_2102"), val = int32(-1)]; fp16 const_11_promoted_to_fp16 = const()[name = string("const_11_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2107_cast_fp16 = mul(x = x_17, y = const_11_promoted_to_fp16)[name = string("op_2107_cast_fp16")]; bool input_11_interleave_0 = const()[name = string("input_11_interleave_0"), val = bool(false)]; tensor input_11_cast_fp16 = concat(axis = var_2102, interleave = input_11_interleave_0, values = (x_17, var_2107_cast_fp16))[name = string("input_11_cast_fp16")]; tensor normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor([-1])]; fp16 var_2100_to_fp16 = const()[name = string("op_2100_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_2100_to_fp16, x = input_11_cast_fp16)[name = string("normed_17_cast_fp16")]; tensor var_2112_split_sizes_0 = const()[name = string("op_2112_split_sizes_0"), val = tensor([256, 256])]; int32 var_2112_axis_0 = const()[name = string("op_2112_axis_0"), val = int32(-1)]; tensor var_2112_cast_fp16_0, tensor var_2112_cast_fp16_1 = split(axis = var_2112_axis_0, split_sizes = var_2112_split_sizes_0, x = normed_17_cast_fp16)[name = string("op_2112_cast_fp16")]; tensor var_2115_cast_fp16 = mul(x = var_2112_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2115_cast_fp16")]; tensor x_21_begin_0 = const()[name = string("x_21_begin_0"), val = tensor([0, 0, 1280])]; tensor x_21_end_0 = const()[name = string("x_21_end_0"), val = tensor([1, 1, 1536])]; tensor x_21_end_mask_0 = const()[name = string("x_21_end_mask_0"), val = tensor([true, true, false])]; tensor x_21 = slice_by_index(begin = x_21_begin_0, end = x_21_end_0, end_mask = x_21_end_mask_0, x = per_layer_proj)[name = string("x_21")]; int32 var_2139 = const()[name = string("op_2139"), val = int32(-1)]; fp16 const_13_promoted_to_fp16 = const()[name = string("const_13_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2144_cast_fp16 = mul(x = x_21, y = const_13_promoted_to_fp16)[name = string("op_2144_cast_fp16")]; bool input_13_interleave_0 = const()[name = string("input_13_interleave_0"), val = bool(false)]; tensor input_13_cast_fp16 = concat(axis = var_2139, interleave = input_13_interleave_0, values = (x_21, var_2144_cast_fp16))[name = string("input_13_cast_fp16")]; tensor normed_21_axes_0 = const()[name = string("normed_21_axes_0"), val = tensor([-1])]; fp16 var_2137_to_fp16 = const()[name = string("op_2137_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_21_cast_fp16 = layer_norm(axes = normed_21_axes_0, epsilon = var_2137_to_fp16, x = input_13_cast_fp16)[name = string("normed_21_cast_fp16")]; tensor var_2149_split_sizes_0 = const()[name = string("op_2149_split_sizes_0"), val = tensor([256, 256])]; int32 var_2149_axis_0 = const()[name = string("op_2149_axis_0"), val = int32(-1)]; tensor var_2149_cast_fp16_0, tensor var_2149_cast_fp16_1 = split(axis = var_2149_axis_0, split_sizes = var_2149_split_sizes_0, x = normed_21_cast_fp16)[name = string("op_2149_cast_fp16")]; tensor var_2152_cast_fp16 = mul(x = var_2149_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2152_cast_fp16")]; tensor x_25_begin_0 = const()[name = string("x_25_begin_0"), val = tensor([0, 0, 1536])]; tensor x_25_end_0 = const()[name = string("x_25_end_0"), val = tensor([1, 1, 1792])]; tensor x_25_end_mask_0 = const()[name = string("x_25_end_mask_0"), val = tensor([true, true, false])]; tensor x_25 = slice_by_index(begin = x_25_begin_0, end = x_25_end_0, end_mask = x_25_end_mask_0, x = per_layer_proj)[name = string("x_25")]; int32 var_2176 = const()[name = string("op_2176"), val = int32(-1)]; fp16 const_15_promoted_to_fp16 = const()[name = string("const_15_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2181_cast_fp16 = mul(x = x_25, y = const_15_promoted_to_fp16)[name = string("op_2181_cast_fp16")]; bool input_15_interleave_0 = const()[name = string("input_15_interleave_0"), val = bool(false)]; tensor input_15_cast_fp16 = concat(axis = var_2176, interleave = input_15_interleave_0, values = (x_25, var_2181_cast_fp16))[name = string("input_15_cast_fp16")]; tensor normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor([-1])]; fp16 var_2174_to_fp16 = const()[name = string("op_2174_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_2174_to_fp16, x = input_15_cast_fp16)[name = string("normed_25_cast_fp16")]; tensor var_2186_split_sizes_0 = const()[name = string("op_2186_split_sizes_0"), val = tensor([256, 256])]; int32 var_2186_axis_0 = const()[name = string("op_2186_axis_0"), val = int32(-1)]; tensor var_2186_cast_fp16_0, tensor var_2186_cast_fp16_1 = split(axis = var_2186_axis_0, split_sizes = var_2186_split_sizes_0, x = normed_25_cast_fp16)[name = string("op_2186_cast_fp16")]; tensor var_2189_cast_fp16 = mul(x = var_2186_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2189_cast_fp16")]; tensor x_29_begin_0 = const()[name = string("x_29_begin_0"), val = tensor([0, 0, 1792])]; tensor x_29_end_0 = const()[name = string("x_29_end_0"), val = tensor([1, 1, 2048])]; tensor x_29_end_mask_0 = const()[name = string("x_29_end_mask_0"), val = tensor([true, true, false])]; tensor x_29 = slice_by_index(begin = x_29_begin_0, end = x_29_end_0, end_mask = x_29_end_mask_0, x = per_layer_proj)[name = string("x_29")]; int32 var_2213 = const()[name = string("op_2213"), val = int32(-1)]; fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2218_cast_fp16 = mul(x = x_29, y = const_17_promoted_to_fp16)[name = string("op_2218_cast_fp16")]; bool input_17_interleave_0 = const()[name = string("input_17_interleave_0"), val = bool(false)]; tensor input_17_cast_fp16 = concat(axis = var_2213, interleave = input_17_interleave_0, values = (x_29, var_2218_cast_fp16))[name = string("input_17_cast_fp16")]; tensor normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor([-1])]; fp16 var_2211_to_fp16 = const()[name = string("op_2211_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_2211_to_fp16, x = input_17_cast_fp16)[name = string("normed_29_cast_fp16")]; tensor var_2223_split_sizes_0 = const()[name = string("op_2223_split_sizes_0"), val = tensor([256, 256])]; int32 var_2223_axis_0 = const()[name = string("op_2223_axis_0"), val = int32(-1)]; tensor var_2223_cast_fp16_0, tensor var_2223_cast_fp16_1 = split(axis = var_2223_axis_0, split_sizes = var_2223_split_sizes_0, x = normed_29_cast_fp16)[name = string("op_2223_cast_fp16")]; tensor var_2226_cast_fp16 = mul(x = var_2223_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2226_cast_fp16")]; tensor x_33_begin_0 = const()[name = string("x_33_begin_0"), val = tensor([0, 0, 2048])]; tensor x_33_end_0 = const()[name = string("x_33_end_0"), val = tensor([1, 1, 2304])]; tensor x_33_end_mask_0 = const()[name = string("x_33_end_mask_0"), val = tensor([true, true, false])]; tensor x_33 = slice_by_index(begin = x_33_begin_0, end = x_33_end_0, end_mask = x_33_end_mask_0, x = per_layer_proj)[name = string("x_33")]; int32 var_2250 = const()[name = string("op_2250"), val = int32(-1)]; fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2255_cast_fp16 = mul(x = x_33, y = const_19_promoted_to_fp16)[name = string("op_2255_cast_fp16")]; bool input_19_interleave_0 = const()[name = string("input_19_interleave_0"), val = bool(false)]; tensor input_19_cast_fp16 = concat(axis = var_2250, interleave = input_19_interleave_0, values = (x_33, var_2255_cast_fp16))[name = string("input_19_cast_fp16")]; tensor normed_33_axes_0 = const()[name = string("normed_33_axes_0"), val = tensor([-1])]; fp16 var_2248_to_fp16 = const()[name = string("op_2248_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_33_cast_fp16 = layer_norm(axes = normed_33_axes_0, epsilon = var_2248_to_fp16, x = input_19_cast_fp16)[name = string("normed_33_cast_fp16")]; tensor var_2260_split_sizes_0 = const()[name = string("op_2260_split_sizes_0"), val = tensor([256, 256])]; int32 var_2260_axis_0 = const()[name = string("op_2260_axis_0"), val = int32(-1)]; tensor var_2260_cast_fp16_0, tensor var_2260_cast_fp16_1 = split(axis = var_2260_axis_0, split_sizes = var_2260_split_sizes_0, x = normed_33_cast_fp16)[name = string("op_2260_cast_fp16")]; tensor var_2263_cast_fp16 = mul(x = var_2260_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2263_cast_fp16")]; tensor x_37_begin_0 = const()[name = string("x_37_begin_0"), val = tensor([0, 0, 2304])]; tensor x_37_end_0 = const()[name = string("x_37_end_0"), val = tensor([1, 1, 2560])]; tensor x_37_end_mask_0 = const()[name = string("x_37_end_mask_0"), val = tensor([true, true, false])]; tensor x_37 = slice_by_index(begin = x_37_begin_0, end = x_37_end_0, end_mask = x_37_end_mask_0, x = per_layer_proj)[name = string("x_37")]; int32 var_2287 = const()[name = string("op_2287"), val = int32(-1)]; fp16 const_21_promoted_to_fp16 = const()[name = string("const_21_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2292_cast_fp16 = mul(x = x_37, y = const_21_promoted_to_fp16)[name = string("op_2292_cast_fp16")]; bool input_21_interleave_0 = const()[name = string("input_21_interleave_0"), val = bool(false)]; tensor input_21_cast_fp16 = concat(axis = var_2287, interleave = input_21_interleave_0, values = (x_37, var_2292_cast_fp16))[name = string("input_21_cast_fp16")]; tensor normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor([-1])]; fp16 var_2285_to_fp16 = const()[name = string("op_2285_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_2285_to_fp16, x = input_21_cast_fp16)[name = string("normed_37_cast_fp16")]; tensor var_2297_split_sizes_0 = const()[name = string("op_2297_split_sizes_0"), val = tensor([256, 256])]; int32 var_2297_axis_0 = const()[name = string("op_2297_axis_0"), val = int32(-1)]; tensor var_2297_cast_fp16_0, tensor var_2297_cast_fp16_1 = split(axis = var_2297_axis_0, split_sizes = var_2297_split_sizes_0, x = normed_37_cast_fp16)[name = string("op_2297_cast_fp16")]; tensor var_2300_cast_fp16 = mul(x = var_2297_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2300_cast_fp16")]; tensor x_41_begin_0 = const()[name = string("x_41_begin_0"), val = tensor([0, 0, 2560])]; tensor x_41_end_0 = const()[name = string("x_41_end_0"), val = tensor([1, 1, 2816])]; tensor x_41_end_mask_0 = const()[name = string("x_41_end_mask_0"), val = tensor([true, true, false])]; tensor x_41 = slice_by_index(begin = x_41_begin_0, end = x_41_end_0, end_mask = x_41_end_mask_0, x = per_layer_proj)[name = string("x_41")]; int32 var_2324 = const()[name = string("op_2324"), val = int32(-1)]; fp16 const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2329_cast_fp16 = mul(x = x_41, y = const_23_promoted_to_fp16)[name = string("op_2329_cast_fp16")]; bool input_23_interleave_0 = const()[name = string("input_23_interleave_0"), val = bool(false)]; tensor input_23_cast_fp16 = concat(axis = var_2324, interleave = input_23_interleave_0, values = (x_41, var_2329_cast_fp16))[name = string("input_23_cast_fp16")]; tensor normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor([-1])]; fp16 var_2322_to_fp16 = const()[name = string("op_2322_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_2322_to_fp16, x = input_23_cast_fp16)[name = string("normed_41_cast_fp16")]; tensor var_2334_split_sizes_0 = const()[name = string("op_2334_split_sizes_0"), val = tensor([256, 256])]; int32 var_2334_axis_0 = const()[name = string("op_2334_axis_0"), val = int32(-1)]; tensor var_2334_cast_fp16_0, tensor var_2334_cast_fp16_1 = split(axis = var_2334_axis_0, split_sizes = var_2334_split_sizes_0, x = normed_41_cast_fp16)[name = string("op_2334_cast_fp16")]; tensor var_2337_cast_fp16 = mul(x = var_2334_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2337_cast_fp16")]; tensor x_45_begin_0 = const()[name = string("x_45_begin_0"), val = tensor([0, 0, 2816])]; tensor x_45_end_0 = const()[name = string("x_45_end_0"), val = tensor([1, 1, 3072])]; tensor x_45_end_mask_0 = const()[name = string("x_45_end_mask_0"), val = tensor([true, true, false])]; tensor x_45 = slice_by_index(begin = x_45_begin_0, end = x_45_end_0, end_mask = x_45_end_mask_0, x = per_layer_proj)[name = string("x_45")]; int32 var_2361 = const()[name = string("op_2361"), val = int32(-1)]; fp16 const_25_promoted_to_fp16 = const()[name = string("const_25_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2366_cast_fp16 = mul(x = x_45, y = const_25_promoted_to_fp16)[name = string("op_2366_cast_fp16")]; bool input_25_interleave_0 = const()[name = string("input_25_interleave_0"), val = bool(false)]; tensor input_25_cast_fp16 = concat(axis = var_2361, interleave = input_25_interleave_0, values = (x_45, var_2366_cast_fp16))[name = string("input_25_cast_fp16")]; tensor normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor([-1])]; fp16 var_2359_to_fp16 = const()[name = string("op_2359_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_2359_to_fp16, x = input_25_cast_fp16)[name = string("normed_45_cast_fp16")]; tensor var_2371_split_sizes_0 = const()[name = string("op_2371_split_sizes_0"), val = tensor([256, 256])]; int32 var_2371_axis_0 = const()[name = string("op_2371_axis_0"), val = int32(-1)]; tensor var_2371_cast_fp16_0, tensor var_2371_cast_fp16_1 = split(axis = var_2371_axis_0, split_sizes = var_2371_split_sizes_0, x = normed_45_cast_fp16)[name = string("op_2371_cast_fp16")]; tensor var_2374_cast_fp16 = mul(x = var_2371_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2374_cast_fp16")]; tensor x_49_begin_0 = const()[name = string("x_49_begin_0"), val = tensor([0, 0, 3072])]; tensor x_49_end_0 = const()[name = string("x_49_end_0"), val = tensor([1, 1, 3328])]; tensor x_49_end_mask_0 = const()[name = string("x_49_end_mask_0"), val = tensor([true, true, false])]; tensor x_49 = slice_by_index(begin = x_49_begin_0, end = x_49_end_0, end_mask = x_49_end_mask_0, x = per_layer_proj)[name = string("x_49")]; int32 var_2398 = const()[name = string("op_2398"), val = int32(-1)]; fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2403_cast_fp16 = mul(x = x_49, y = const_27_promoted_to_fp16)[name = string("op_2403_cast_fp16")]; bool input_27_interleave_0 = const()[name = string("input_27_interleave_0"), val = bool(false)]; tensor input_27_cast_fp16 = concat(axis = var_2398, interleave = input_27_interleave_0, values = (x_49, var_2403_cast_fp16))[name = string("input_27_cast_fp16")]; tensor normed_49_axes_0 = const()[name = string("normed_49_axes_0"), val = tensor([-1])]; fp16 var_2396_to_fp16 = const()[name = string("op_2396_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_49_cast_fp16 = layer_norm(axes = normed_49_axes_0, epsilon = var_2396_to_fp16, x = input_27_cast_fp16)[name = string("normed_49_cast_fp16")]; tensor var_2408_split_sizes_0 = const()[name = string("op_2408_split_sizes_0"), val = tensor([256, 256])]; int32 var_2408_axis_0 = const()[name = string("op_2408_axis_0"), val = int32(-1)]; tensor var_2408_cast_fp16_0, tensor var_2408_cast_fp16_1 = split(axis = var_2408_axis_0, split_sizes = var_2408_split_sizes_0, x = normed_49_cast_fp16)[name = string("op_2408_cast_fp16")]; tensor var_2411_cast_fp16 = mul(x = var_2408_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2411_cast_fp16")]; tensor x_53_begin_0 = const()[name = string("x_53_begin_0"), val = tensor([0, 0, 3328])]; tensor x_53_end_0 = const()[name = string("x_53_end_0"), val = tensor([1, 1, 3584])]; tensor x_53_end_mask_0 = const()[name = string("x_53_end_mask_0"), val = tensor([true, true, false])]; tensor x_53 = slice_by_index(begin = x_53_begin_0, end = x_53_end_0, end_mask = x_53_end_mask_0, x = per_layer_proj)[name = string("x_53")]; int32 var_2435 = const()[name = string("op_2435"), val = int32(-1)]; fp16 const_29_promoted_to_fp16 = const()[name = string("const_29_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2440_cast_fp16 = mul(x = x_53, y = const_29_promoted_to_fp16)[name = string("op_2440_cast_fp16")]; bool input_29_interleave_0 = const()[name = string("input_29_interleave_0"), val = bool(false)]; tensor input_29_cast_fp16 = concat(axis = var_2435, interleave = input_29_interleave_0, values = (x_53, var_2440_cast_fp16))[name = string("input_29_cast_fp16")]; tensor normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor([-1])]; fp16 var_2433_to_fp16 = const()[name = string("op_2433_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_2433_to_fp16, x = input_29_cast_fp16)[name = string("normed_53_cast_fp16")]; tensor var_2445_split_sizes_0 = const()[name = string("op_2445_split_sizes_0"), val = tensor([256, 256])]; int32 var_2445_axis_0 = const()[name = string("op_2445_axis_0"), val = int32(-1)]; tensor var_2445_cast_fp16_0, tensor var_2445_cast_fp16_1 = split(axis = var_2445_axis_0, split_sizes = var_2445_split_sizes_0, x = normed_53_cast_fp16)[name = string("op_2445_cast_fp16")]; tensor var_2448_cast_fp16 = mul(x = var_2445_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2448_cast_fp16")]; tensor x_57_begin_0 = const()[name = string("x_57_begin_0"), val = tensor([0, 0, 3584])]; tensor x_57_end_0 = const()[name = string("x_57_end_0"), val = tensor([1, 1, 3840])]; tensor x_57_end_mask_0 = const()[name = string("x_57_end_mask_0"), val = tensor([true, true, false])]; tensor x_57 = slice_by_index(begin = x_57_begin_0, end = x_57_end_0, end_mask = x_57_end_mask_0, x = per_layer_proj)[name = string("x_57")]; int32 var_2472 = const()[name = string("op_2472"), val = int32(-1)]; fp16 const_31_promoted_to_fp16 = const()[name = string("const_31_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2477_cast_fp16 = mul(x = x_57, y = const_31_promoted_to_fp16)[name = string("op_2477_cast_fp16")]; bool input_31_interleave_0 = const()[name = string("input_31_interleave_0"), val = bool(false)]; tensor input_31_cast_fp16 = concat(axis = var_2472, interleave = input_31_interleave_0, values = (x_57, var_2477_cast_fp16))[name = string("input_31_cast_fp16")]; tensor normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor([-1])]; fp16 var_2470_to_fp16 = const()[name = string("op_2470_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_2470_to_fp16, x = input_31_cast_fp16)[name = string("normed_57_cast_fp16")]; tensor var_2482_split_sizes_0 = const()[name = string("op_2482_split_sizes_0"), val = tensor([256, 256])]; int32 var_2482_axis_0 = const()[name = string("op_2482_axis_0"), val = int32(-1)]; tensor var_2482_cast_fp16_0, tensor var_2482_cast_fp16_1 = split(axis = var_2482_axis_0, split_sizes = var_2482_split_sizes_0, x = normed_57_cast_fp16)[name = string("op_2482_cast_fp16")]; tensor var_2485_cast_fp16 = mul(x = var_2482_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2485_cast_fp16")]; tensor x_61_begin_0 = const()[name = string("x_61_begin_0"), val = tensor([0, 0, 3840])]; tensor x_61_end_0 = const()[name = string("x_61_end_0"), val = tensor([1, 1, 4096])]; tensor x_61_end_mask_0 = const()[name = string("x_61_end_mask_0"), val = tensor([true, true, false])]; tensor x_61 = slice_by_index(begin = x_61_begin_0, end = x_61_end_0, end_mask = x_61_end_mask_0, x = per_layer_proj)[name = string("x_61")]; int32 var_2509 = const()[name = string("op_2509"), val = int32(-1)]; fp16 const_33_promoted_to_fp16 = const()[name = string("const_33_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2514_cast_fp16 = mul(x = x_61, y = const_33_promoted_to_fp16)[name = string("op_2514_cast_fp16")]; bool input_33_interleave_0 = const()[name = string("input_33_interleave_0"), val = bool(false)]; tensor input_33_cast_fp16 = concat(axis = var_2509, interleave = input_33_interleave_0, values = (x_61, var_2514_cast_fp16))[name = string("input_33_cast_fp16")]; tensor normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor([-1])]; fp16 var_2507_to_fp16 = const()[name = string("op_2507_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_2507_to_fp16, x = input_33_cast_fp16)[name = string("normed_61_cast_fp16")]; tensor var_2519_split_sizes_0 = const()[name = string("op_2519_split_sizes_0"), val = tensor([256, 256])]; int32 var_2519_axis_0 = const()[name = string("op_2519_axis_0"), val = int32(-1)]; tensor var_2519_cast_fp16_0, tensor var_2519_cast_fp16_1 = split(axis = var_2519_axis_0, split_sizes = var_2519_split_sizes_0, x = normed_61_cast_fp16)[name = string("op_2519_cast_fp16")]; tensor var_2522_cast_fp16 = mul(x = var_2519_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2522_cast_fp16")]; tensor x_65_begin_0 = const()[name = string("x_65_begin_0"), val = tensor([0, 0, 4096])]; tensor x_65_end_0 = const()[name = string("x_65_end_0"), val = tensor([1, 1, 4352])]; tensor x_65_end_mask_0 = const()[name = string("x_65_end_mask_0"), val = tensor([true, true, false])]; tensor x_65 = slice_by_index(begin = x_65_begin_0, end = x_65_end_0, end_mask = x_65_end_mask_0, x = per_layer_proj)[name = string("x_65")]; int32 var_2546 = const()[name = string("op_2546"), val = int32(-1)]; fp16 const_35_promoted_to_fp16 = const()[name = string("const_35_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2551_cast_fp16 = mul(x = x_65, y = const_35_promoted_to_fp16)[name = string("op_2551_cast_fp16")]; bool input_35_interleave_0 = const()[name = string("input_35_interleave_0"), val = bool(false)]; tensor input_35_cast_fp16 = concat(axis = var_2546, interleave = input_35_interleave_0, values = (x_65, var_2551_cast_fp16))[name = string("input_35_cast_fp16")]; tensor normed_65_axes_0 = const()[name = string("normed_65_axes_0"), val = tensor([-1])]; fp16 var_2544_to_fp16 = const()[name = string("op_2544_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_65_cast_fp16 = layer_norm(axes = normed_65_axes_0, epsilon = var_2544_to_fp16, x = input_35_cast_fp16)[name = string("normed_65_cast_fp16")]; tensor var_2556_split_sizes_0 = const()[name = string("op_2556_split_sizes_0"), val = tensor([256, 256])]; int32 var_2556_axis_0 = const()[name = string("op_2556_axis_0"), val = int32(-1)]; tensor var_2556_cast_fp16_0, tensor var_2556_cast_fp16_1 = split(axis = var_2556_axis_0, split_sizes = var_2556_split_sizes_0, x = normed_65_cast_fp16)[name = string("op_2556_cast_fp16")]; tensor var_2559_cast_fp16 = mul(x = var_2556_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2559_cast_fp16")]; tensor x_69_begin_0 = const()[name = string("x_69_begin_0"), val = tensor([0, 0, 4352])]; tensor x_69_end_0 = const()[name = string("x_69_end_0"), val = tensor([1, 1, 4608])]; tensor x_69_end_mask_0 = const()[name = string("x_69_end_mask_0"), val = tensor([true, true, false])]; tensor x_69 = slice_by_index(begin = x_69_begin_0, end = x_69_end_0, end_mask = x_69_end_mask_0, x = per_layer_proj)[name = string("x_69")]; int32 var_2583 = const()[name = string("op_2583"), val = int32(-1)]; fp16 const_37_promoted_to_fp16 = const()[name = string("const_37_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2588_cast_fp16 = mul(x = x_69, y = const_37_promoted_to_fp16)[name = string("op_2588_cast_fp16")]; bool input_37_interleave_0 = const()[name = string("input_37_interleave_0"), val = bool(false)]; tensor input_37_cast_fp16 = concat(axis = var_2583, interleave = input_37_interleave_0, values = (x_69, var_2588_cast_fp16))[name = string("input_37_cast_fp16")]; tensor normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor([-1])]; fp16 var_2581_to_fp16 = const()[name = string("op_2581_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_2581_to_fp16, x = input_37_cast_fp16)[name = string("normed_69_cast_fp16")]; tensor var_2593_split_sizes_0 = const()[name = string("op_2593_split_sizes_0"), val = tensor([256, 256])]; int32 var_2593_axis_0 = const()[name = string("op_2593_axis_0"), val = int32(-1)]; tensor var_2593_cast_fp16_0, tensor var_2593_cast_fp16_1 = split(axis = var_2593_axis_0, split_sizes = var_2593_split_sizes_0, x = normed_69_cast_fp16)[name = string("op_2593_cast_fp16")]; tensor var_2596_cast_fp16 = mul(x = var_2593_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2596_cast_fp16")]; tensor x_73_begin_0 = const()[name = string("x_73_begin_0"), val = tensor([0, 0, 4608])]; tensor x_73_end_0 = const()[name = string("x_73_end_0"), val = tensor([1, 1, 4864])]; tensor x_73_end_mask_0 = const()[name = string("x_73_end_mask_0"), val = tensor([true, true, false])]; tensor x_73 = slice_by_index(begin = x_73_begin_0, end = x_73_end_0, end_mask = x_73_end_mask_0, x = per_layer_proj)[name = string("x_73")]; int32 var_2620 = const()[name = string("op_2620"), val = int32(-1)]; fp16 const_39_promoted_to_fp16 = const()[name = string("const_39_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2625_cast_fp16 = mul(x = x_73, y = const_39_promoted_to_fp16)[name = string("op_2625_cast_fp16")]; bool input_39_interleave_0 = const()[name = string("input_39_interleave_0"), val = bool(false)]; tensor input_39_cast_fp16 = concat(axis = var_2620, interleave = input_39_interleave_0, values = (x_73, var_2625_cast_fp16))[name = string("input_39_cast_fp16")]; tensor normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor([-1])]; fp16 var_2618_to_fp16 = const()[name = string("op_2618_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_2618_to_fp16, x = input_39_cast_fp16)[name = string("normed_73_cast_fp16")]; tensor var_2630_split_sizes_0 = const()[name = string("op_2630_split_sizes_0"), val = tensor([256, 256])]; int32 var_2630_axis_0 = const()[name = string("op_2630_axis_0"), val = int32(-1)]; tensor var_2630_cast_fp16_0, tensor var_2630_cast_fp16_1 = split(axis = var_2630_axis_0, split_sizes = var_2630_split_sizes_0, x = normed_73_cast_fp16)[name = string("op_2630_cast_fp16")]; tensor var_2633_cast_fp16 = mul(x = var_2630_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2633_cast_fp16")]; tensor x_77_begin_0 = const()[name = string("x_77_begin_0"), val = tensor([0, 0, 4864])]; tensor x_77_end_0 = const()[name = string("x_77_end_0"), val = tensor([1, 1, 5120])]; tensor x_77_end_mask_0 = const()[name = string("x_77_end_mask_0"), val = tensor([true, true, false])]; tensor x_77 = slice_by_index(begin = x_77_begin_0, end = x_77_end_0, end_mask = x_77_end_mask_0, x = per_layer_proj)[name = string("x_77")]; int32 var_2657 = const()[name = string("op_2657"), val = int32(-1)]; fp16 const_41_promoted_to_fp16 = const()[name = string("const_41_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2662_cast_fp16 = mul(x = x_77, y = const_41_promoted_to_fp16)[name = string("op_2662_cast_fp16")]; bool input_41_interleave_0 = const()[name = string("input_41_interleave_0"), val = bool(false)]; tensor input_41_cast_fp16 = concat(axis = var_2657, interleave = input_41_interleave_0, values = (x_77, var_2662_cast_fp16))[name = string("input_41_cast_fp16")]; tensor normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor([-1])]; fp16 var_2655_to_fp16 = const()[name = string("op_2655_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_2655_to_fp16, x = input_41_cast_fp16)[name = string("normed_77_cast_fp16")]; tensor var_2667_split_sizes_0 = const()[name = string("op_2667_split_sizes_0"), val = tensor([256, 256])]; int32 var_2667_axis_0 = const()[name = string("op_2667_axis_0"), val = int32(-1)]; tensor var_2667_cast_fp16_0, tensor var_2667_cast_fp16_1 = split(axis = var_2667_axis_0, split_sizes = var_2667_split_sizes_0, x = normed_77_cast_fp16)[name = string("op_2667_cast_fp16")]; tensor var_2670_cast_fp16 = mul(x = var_2667_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2670_cast_fp16")]; tensor x_81_begin_0 = const()[name = string("x_81_begin_0"), val = tensor([0, 0, 5120])]; tensor x_81_end_0 = const()[name = string("x_81_end_0"), val = tensor([1, 1, 5376])]; tensor x_81_end_mask_0 = const()[name = string("x_81_end_mask_0"), val = tensor([true, true, false])]; tensor x_81 = slice_by_index(begin = x_81_begin_0, end = x_81_end_0, end_mask = x_81_end_mask_0, x = per_layer_proj)[name = string("x_81")]; int32 var_2694 = const()[name = string("op_2694"), val = int32(-1)]; fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2699_cast_fp16 = mul(x = x_81, y = const_43_promoted_to_fp16)[name = string("op_2699_cast_fp16")]; bool input_43_interleave_0 = const()[name = string("input_43_interleave_0"), val = bool(false)]; tensor input_43_cast_fp16 = concat(axis = var_2694, interleave = input_43_interleave_0, values = (x_81, var_2699_cast_fp16))[name = string("input_43_cast_fp16")]; tensor normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor([-1])]; fp16 var_2692_to_fp16 = const()[name = string("op_2692_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_2692_to_fp16, x = input_43_cast_fp16)[name = string("normed_81_cast_fp16")]; tensor var_2704_split_sizes_0 = const()[name = string("op_2704_split_sizes_0"), val = tensor([256, 256])]; int32 var_2704_axis_0 = const()[name = string("op_2704_axis_0"), val = int32(-1)]; tensor var_2704_cast_fp16_0, tensor var_2704_cast_fp16_1 = split(axis = var_2704_axis_0, split_sizes = var_2704_split_sizes_0, x = normed_81_cast_fp16)[name = string("op_2704_cast_fp16")]; tensor var_2707_cast_fp16 = mul(x = var_2704_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2707_cast_fp16")]; tensor x_85_begin_0 = const()[name = string("x_85_begin_0"), val = tensor([0, 0, 5376])]; tensor x_85_end_0 = const()[name = string("x_85_end_0"), val = tensor([1, 1, 5632])]; tensor x_85_end_mask_0 = const()[name = string("x_85_end_mask_0"), val = tensor([true, true, false])]; tensor x_85 = slice_by_index(begin = x_85_begin_0, end = x_85_end_0, end_mask = x_85_end_mask_0, x = per_layer_proj)[name = string("x_85")]; int32 var_2731 = const()[name = string("op_2731"), val = int32(-1)]; fp16 const_45_promoted_to_fp16 = const()[name = string("const_45_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2736_cast_fp16 = mul(x = x_85, y = const_45_promoted_to_fp16)[name = string("op_2736_cast_fp16")]; bool input_45_interleave_0 = const()[name = string("input_45_interleave_0"), val = bool(false)]; tensor input_45_cast_fp16 = concat(axis = var_2731, interleave = input_45_interleave_0, values = (x_85, var_2736_cast_fp16))[name = string("input_45_cast_fp16")]; tensor normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor([-1])]; fp16 var_2729_to_fp16 = const()[name = string("op_2729_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_2729_to_fp16, x = input_45_cast_fp16)[name = string("normed_85_cast_fp16")]; tensor var_2741_split_sizes_0 = const()[name = string("op_2741_split_sizes_0"), val = tensor([256, 256])]; int32 var_2741_axis_0 = const()[name = string("op_2741_axis_0"), val = int32(-1)]; tensor var_2741_cast_fp16_0, tensor var_2741_cast_fp16_1 = split(axis = var_2741_axis_0, split_sizes = var_2741_split_sizes_0, x = normed_85_cast_fp16)[name = string("op_2741_cast_fp16")]; tensor var_2744_cast_fp16 = mul(x = var_2741_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2744_cast_fp16")]; tensor x_89_begin_0 = const()[name = string("x_89_begin_0"), val = tensor([0, 0, 5632])]; tensor x_89_end_0 = const()[name = string("x_89_end_0"), val = tensor([1, 1, 5888])]; tensor x_89_end_mask_0 = const()[name = string("x_89_end_mask_0"), val = tensor([true, true, false])]; tensor x_89 = slice_by_index(begin = x_89_begin_0, end = x_89_end_0, end_mask = x_89_end_mask_0, x = per_layer_proj)[name = string("x_89")]; int32 var_2768 = const()[name = string("op_2768"), val = int32(-1)]; fp16 const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2773_cast_fp16 = mul(x = x_89, y = const_47_promoted_to_fp16)[name = string("op_2773_cast_fp16")]; bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)]; tensor input_47_cast_fp16 = concat(axis = var_2768, interleave = input_47_interleave_0, values = (x_89, var_2773_cast_fp16))[name = string("input_47_cast_fp16")]; tensor normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor([-1])]; fp16 var_2766_to_fp16 = const()[name = string("op_2766_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_2766_to_fp16, x = input_47_cast_fp16)[name = string("normed_89_cast_fp16")]; tensor var_2778_split_sizes_0 = const()[name = string("op_2778_split_sizes_0"), val = tensor([256, 256])]; int32 var_2778_axis_0 = const()[name = string("op_2778_axis_0"), val = int32(-1)]; tensor var_2778_cast_fp16_0, tensor var_2778_cast_fp16_1 = split(axis = var_2778_axis_0, split_sizes = var_2778_split_sizes_0, x = normed_89_cast_fp16)[name = string("op_2778_cast_fp16")]; tensor var_2781_cast_fp16 = mul(x = var_2778_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2781_cast_fp16")]; tensor x_93_begin_0 = const()[name = string("x_93_begin_0"), val = tensor([0, 0, 5888])]; tensor x_93_end_0 = const()[name = string("x_93_end_0"), val = tensor([1, 1, 6144])]; tensor x_93_end_mask_0 = const()[name = string("x_93_end_mask_0"), val = tensor([true, true, false])]; tensor x_93 = slice_by_index(begin = x_93_begin_0, end = x_93_end_0, end_mask = x_93_end_mask_0, x = per_layer_proj)[name = string("x_93")]; int32 var_2805 = const()[name = string("op_2805"), val = int32(-1)]; fp16 const_49_promoted_to_fp16 = const()[name = string("const_49_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2810_cast_fp16 = mul(x = x_93, y = const_49_promoted_to_fp16)[name = string("op_2810_cast_fp16")]; bool input_49_interleave_0 = const()[name = string("input_49_interleave_0"), val = bool(false)]; tensor input_49_cast_fp16 = concat(axis = var_2805, interleave = input_49_interleave_0, values = (x_93, var_2810_cast_fp16))[name = string("input_49_cast_fp16")]; tensor normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor([-1])]; fp16 var_2803_to_fp16 = const()[name = string("op_2803_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_2803_to_fp16, x = input_49_cast_fp16)[name = string("normed_93_cast_fp16")]; tensor var_2815_split_sizes_0 = const()[name = string("op_2815_split_sizes_0"), val = tensor([256, 256])]; int32 var_2815_axis_0 = const()[name = string("op_2815_axis_0"), val = int32(-1)]; tensor var_2815_cast_fp16_0, tensor var_2815_cast_fp16_1 = split(axis = var_2815_axis_0, split_sizes = var_2815_split_sizes_0, x = normed_93_cast_fp16)[name = string("op_2815_cast_fp16")]; tensor var_2818_cast_fp16 = mul(x = var_2815_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2818_cast_fp16")]; tensor x_97_begin_0 = const()[name = string("x_97_begin_0"), val = tensor([0, 0, 6144])]; tensor x_97_end_0 = const()[name = string("x_97_end_0"), val = tensor([1, 1, 6400])]; tensor x_97_end_mask_0 = const()[name = string("x_97_end_mask_0"), val = tensor([true, true, false])]; tensor x_97 = slice_by_index(begin = x_97_begin_0, end = x_97_end_0, end_mask = x_97_end_mask_0, x = per_layer_proj)[name = string("x_97")]; int32 var_2842 = const()[name = string("op_2842"), val = int32(-1)]; fp16 const_51_promoted_to_fp16 = const()[name = string("const_51_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2847_cast_fp16 = mul(x = x_97, y = const_51_promoted_to_fp16)[name = string("op_2847_cast_fp16")]; bool input_51_interleave_0 = const()[name = string("input_51_interleave_0"), val = bool(false)]; tensor input_51_cast_fp16 = concat(axis = var_2842, interleave = input_51_interleave_0, values = (x_97, var_2847_cast_fp16))[name = string("input_51_cast_fp16")]; tensor normed_97_axes_0 = const()[name = string("normed_97_axes_0"), val = tensor([-1])]; fp16 var_2840_to_fp16 = const()[name = string("op_2840_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_97_cast_fp16 = layer_norm(axes = normed_97_axes_0, epsilon = var_2840_to_fp16, x = input_51_cast_fp16)[name = string("normed_97_cast_fp16")]; tensor var_2852_split_sizes_0 = const()[name = string("op_2852_split_sizes_0"), val = tensor([256, 256])]; int32 var_2852_axis_0 = const()[name = string("op_2852_axis_0"), val = int32(-1)]; tensor var_2852_cast_fp16_0, tensor var_2852_cast_fp16_1 = split(axis = var_2852_axis_0, split_sizes = var_2852_split_sizes_0, x = normed_97_cast_fp16)[name = string("op_2852_cast_fp16")]; tensor var_2855_cast_fp16 = mul(x = var_2852_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2855_cast_fp16")]; tensor x_101_begin_0 = const()[name = string("x_101_begin_0"), val = tensor([0, 0, 6400])]; tensor x_101_end_0 = const()[name = string("x_101_end_0"), val = tensor([1, 1, 6656])]; tensor x_101_end_mask_0 = const()[name = string("x_101_end_mask_0"), val = tensor([true, true, false])]; tensor x_101 = slice_by_index(begin = x_101_begin_0, end = x_101_end_0, end_mask = x_101_end_mask_0, x = per_layer_proj)[name = string("x_101")]; int32 var_2879 = const()[name = string("op_2879"), val = int32(-1)]; fp16 const_53_promoted_to_fp16 = const()[name = string("const_53_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2884_cast_fp16 = mul(x = x_101, y = const_53_promoted_to_fp16)[name = string("op_2884_cast_fp16")]; bool input_53_interleave_0 = const()[name = string("input_53_interleave_0"), val = bool(false)]; tensor input_53_cast_fp16 = concat(axis = var_2879, interleave = input_53_interleave_0, values = (x_101, var_2884_cast_fp16))[name = string("input_53_cast_fp16")]; tensor normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor([-1])]; fp16 var_2877_to_fp16 = const()[name = string("op_2877_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_2877_to_fp16, x = input_53_cast_fp16)[name = string("normed_101_cast_fp16")]; tensor var_2889_split_sizes_0 = const()[name = string("op_2889_split_sizes_0"), val = tensor([256, 256])]; int32 var_2889_axis_0 = const()[name = string("op_2889_axis_0"), val = int32(-1)]; tensor var_2889_cast_fp16_0, tensor var_2889_cast_fp16_1 = split(axis = var_2889_axis_0, split_sizes = var_2889_split_sizes_0, x = normed_101_cast_fp16)[name = string("op_2889_cast_fp16")]; tensor var_2892_cast_fp16 = mul(x = var_2889_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2892_cast_fp16")]; tensor x_105_begin_0 = const()[name = string("x_105_begin_0"), val = tensor([0, 0, 6656])]; tensor x_105_end_0 = const()[name = string("x_105_end_0"), val = tensor([1, 1, 6912])]; tensor x_105_end_mask_0 = const()[name = string("x_105_end_mask_0"), val = tensor([true, true, false])]; tensor x_105 = slice_by_index(begin = x_105_begin_0, end = x_105_end_0, end_mask = x_105_end_mask_0, x = per_layer_proj)[name = string("x_105")]; int32 var_2916 = const()[name = string("op_2916"), val = int32(-1)]; fp16 const_55_promoted_to_fp16 = const()[name = string("const_55_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2921_cast_fp16 = mul(x = x_105, y = const_55_promoted_to_fp16)[name = string("op_2921_cast_fp16")]; bool input_55_interleave_0 = const()[name = string("input_55_interleave_0"), val = bool(false)]; tensor input_55_cast_fp16 = concat(axis = var_2916, interleave = input_55_interleave_0, values = (x_105, var_2921_cast_fp16))[name = string("input_55_cast_fp16")]; tensor normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor([-1])]; fp16 var_2914_to_fp16 = const()[name = string("op_2914_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_2914_to_fp16, x = input_55_cast_fp16)[name = string("normed_105_cast_fp16")]; tensor var_2926_split_sizes_0 = const()[name = string("op_2926_split_sizes_0"), val = tensor([256, 256])]; int32 var_2926_axis_0 = const()[name = string("op_2926_axis_0"), val = int32(-1)]; tensor var_2926_cast_fp16_0, tensor var_2926_cast_fp16_1 = split(axis = var_2926_axis_0, split_sizes = var_2926_split_sizes_0, x = normed_105_cast_fp16)[name = string("op_2926_cast_fp16")]; tensor var_2929_cast_fp16 = mul(x = var_2926_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2929_cast_fp16")]; tensor x_109_begin_0 = const()[name = string("x_109_begin_0"), val = tensor([0, 0, 6912])]; tensor x_109_end_0 = const()[name = string("x_109_end_0"), val = tensor([1, 1, 7168])]; tensor x_109_end_mask_0 = const()[name = string("x_109_end_mask_0"), val = tensor([true, true, false])]; tensor x_109 = slice_by_index(begin = x_109_begin_0, end = x_109_end_0, end_mask = x_109_end_mask_0, x = per_layer_proj)[name = string("x_109")]; int32 var_2953 = const()[name = string("op_2953"), val = int32(-1)]; fp16 const_57_promoted_to_fp16 = const()[name = string("const_57_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2958_cast_fp16 = mul(x = x_109, y = const_57_promoted_to_fp16)[name = string("op_2958_cast_fp16")]; bool input_57_interleave_0 = const()[name = string("input_57_interleave_0"), val = bool(false)]; tensor input_57_cast_fp16 = concat(axis = var_2953, interleave = input_57_interleave_0, values = (x_109, var_2958_cast_fp16))[name = string("input_57_cast_fp16")]; tensor normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor([-1])]; fp16 var_2951_to_fp16 = const()[name = string("op_2951_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_2951_to_fp16, x = input_57_cast_fp16)[name = string("normed_109_cast_fp16")]; tensor var_2963_split_sizes_0 = const()[name = string("op_2963_split_sizes_0"), val = tensor([256, 256])]; int32 var_2963_axis_0 = const()[name = string("op_2963_axis_0"), val = int32(-1)]; tensor var_2963_cast_fp16_0, tensor var_2963_cast_fp16_1 = split(axis = var_2963_axis_0, split_sizes = var_2963_split_sizes_0, x = normed_109_cast_fp16)[name = string("op_2963_cast_fp16")]; tensor var_2966_cast_fp16 = mul(x = var_2963_cast_fp16_0, y = const_4_to_fp16)[name = string("op_2966_cast_fp16")]; tensor x_113_begin_0 = const()[name = string("x_113_begin_0"), val = tensor([0, 0, 7168])]; tensor x_113_end_0 = const()[name = string("x_113_end_0"), val = tensor([1, 1, 7424])]; tensor x_113_end_mask_0 = const()[name = string("x_113_end_mask_0"), val = tensor([true, true, false])]; tensor x_113 = slice_by_index(begin = x_113_begin_0, end = x_113_end_0, end_mask = x_113_end_mask_0, x = per_layer_proj)[name = string("x_113")]; int32 var_2990 = const()[name = string("op_2990"), val = int32(-1)]; fp16 const_59_promoted_to_fp16 = const()[name = string("const_59_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2995_cast_fp16 = mul(x = x_113, y = const_59_promoted_to_fp16)[name = string("op_2995_cast_fp16")]; bool input_59_interleave_0 = const()[name = string("input_59_interleave_0"), val = bool(false)]; tensor input_59_cast_fp16 = concat(axis = var_2990, interleave = input_59_interleave_0, values = (x_113, var_2995_cast_fp16))[name = string("input_59_cast_fp16")]; tensor normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor([-1])]; fp16 var_2988_to_fp16 = const()[name = string("op_2988_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_2988_to_fp16, x = input_59_cast_fp16)[name = string("normed_113_cast_fp16")]; tensor var_3000_split_sizes_0 = const()[name = string("op_3000_split_sizes_0"), val = tensor([256, 256])]; int32 var_3000_axis_0 = const()[name = string("op_3000_axis_0"), val = int32(-1)]; tensor var_3000_cast_fp16_0, tensor var_3000_cast_fp16_1 = split(axis = var_3000_axis_0, split_sizes = var_3000_split_sizes_0, x = normed_113_cast_fp16)[name = string("op_3000_cast_fp16")]; tensor var_3003_cast_fp16 = mul(x = var_3000_cast_fp16_0, y = const_4_to_fp16)[name = string("op_3003_cast_fp16")]; tensor x_117_begin_0 = const()[name = string("x_117_begin_0"), val = tensor([0, 0, 7424])]; tensor x_117_end_0 = const()[name = string("x_117_end_0"), val = tensor([1, 1, 7680])]; tensor x_117_end_mask_0 = const()[name = string("x_117_end_mask_0"), val = tensor([true, true, false])]; tensor x_117 = slice_by_index(begin = x_117_begin_0, end = x_117_end_0, end_mask = x_117_end_mask_0, x = per_layer_proj)[name = string("x_117")]; int32 var_3027 = const()[name = string("op_3027"), val = int32(-1)]; fp16 const_61_promoted_to_fp16 = const()[name = string("const_61_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3032_cast_fp16 = mul(x = x_117, y = const_61_promoted_to_fp16)[name = string("op_3032_cast_fp16")]; bool input_61_interleave_0 = const()[name = string("input_61_interleave_0"), val = bool(false)]; tensor input_61_cast_fp16 = concat(axis = var_3027, interleave = input_61_interleave_0, values = (x_117, var_3032_cast_fp16))[name = string("input_61_cast_fp16")]; tensor normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor([-1])]; fp16 var_3025_to_fp16 = const()[name = string("op_3025_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_3025_to_fp16, x = input_61_cast_fp16)[name = string("normed_117_cast_fp16")]; tensor var_3037_split_sizes_0 = const()[name = string("op_3037_split_sizes_0"), val = tensor([256, 256])]; int32 var_3037_axis_0 = const()[name = string("op_3037_axis_0"), val = int32(-1)]; tensor var_3037_cast_fp16_0, tensor var_3037_cast_fp16_1 = split(axis = var_3037_axis_0, split_sizes = var_3037_split_sizes_0, x = normed_117_cast_fp16)[name = string("op_3037_cast_fp16")]; tensor var_3040_cast_fp16 = mul(x = var_3037_cast_fp16_0, y = const_4_to_fp16)[name = string("op_3040_cast_fp16")]; tensor x_121_begin_0 = const()[name = string("x_121_begin_0"), val = tensor([0, 0, 7680])]; tensor x_121_end_0 = const()[name = string("x_121_end_0"), val = tensor([1, 1, 7936])]; tensor x_121_end_mask_0 = const()[name = string("x_121_end_mask_0"), val = tensor([true, true, false])]; tensor x_121 = slice_by_index(begin = x_121_begin_0, end = x_121_end_0, end_mask = x_121_end_mask_0, x = per_layer_proj)[name = string("x_121")]; int32 var_3064 = const()[name = string("op_3064"), val = int32(-1)]; fp16 const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3069_cast_fp16 = mul(x = x_121, y = const_63_promoted_to_fp16)[name = string("op_3069_cast_fp16")]; bool input_63_interleave_0 = const()[name = string("input_63_interleave_0"), val = bool(false)]; tensor input_63_cast_fp16 = concat(axis = var_3064, interleave = input_63_interleave_0, values = (x_121, var_3069_cast_fp16))[name = string("input_63_cast_fp16")]; tensor normed_121_axes_0 = const()[name = string("normed_121_axes_0"), val = tensor([-1])]; fp16 var_3062_to_fp16 = const()[name = string("op_3062_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_121_cast_fp16 = layer_norm(axes = normed_121_axes_0, epsilon = var_3062_to_fp16, x = input_63_cast_fp16)[name = string("normed_121_cast_fp16")]; tensor var_3074_split_sizes_0 = const()[name = string("op_3074_split_sizes_0"), val = tensor([256, 256])]; int32 var_3074_axis_0 = const()[name = string("op_3074_axis_0"), val = int32(-1)]; tensor var_3074_cast_fp16_0, tensor var_3074_cast_fp16_1 = split(axis = var_3074_axis_0, split_sizes = var_3074_split_sizes_0, x = normed_121_cast_fp16)[name = string("op_3074_cast_fp16")]; tensor var_3077_cast_fp16 = mul(x = var_3074_cast_fp16_0, y = const_4_to_fp16)[name = string("op_3077_cast_fp16")]; tensor x_125_begin_0 = const()[name = string("x_125_begin_0"), val = tensor([0, 0, 7936])]; tensor x_125_end_0 = const()[name = string("x_125_end_0"), val = tensor([1, 1, 8192])]; tensor x_125_end_mask_0 = const()[name = string("x_125_end_mask_0"), val = tensor([true, true, false])]; tensor x_125 = slice_by_index(begin = x_125_begin_0, end = x_125_end_0, end_mask = x_125_end_mask_0, x = per_layer_proj)[name = string("x_125")]; int32 var_3101 = const()[name = string("op_3101"), val = int32(-1)]; fp16 const_65_promoted_to_fp16 = const()[name = string("const_65_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3106_cast_fp16 = mul(x = x_125, y = const_65_promoted_to_fp16)[name = string("op_3106_cast_fp16")]; bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)]; tensor input_65_cast_fp16 = concat(axis = var_3101, interleave = input_65_interleave_0, values = (x_125, var_3106_cast_fp16))[name = string("input_65_cast_fp16")]; tensor normed_125_axes_0 = const()[name = string("normed_125_axes_0"), val = tensor([-1])]; fp16 var_3099_to_fp16 = const()[name = string("op_3099_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_125_cast_fp16 = layer_norm(axes = normed_125_axes_0, epsilon = var_3099_to_fp16, x = input_65_cast_fp16)[name = string("normed_125_cast_fp16")]; tensor var_3111_split_sizes_0 = const()[name = string("op_3111_split_sizes_0"), val = tensor([256, 256])]; int32 var_3111_axis_0 = const()[name = string("op_3111_axis_0"), val = int32(-1)]; tensor var_3111_cast_fp16_0, tensor var_3111_cast_fp16_1 = split(axis = var_3111_axis_0, split_sizes = var_3111_split_sizes_0, x = normed_125_cast_fp16)[name = string("op_3111_cast_fp16")]; tensor var_3114_cast_fp16 = mul(x = var_3111_cast_fp16_0, y = const_4_to_fp16)[name = string("op_3114_cast_fp16")]; tensor x_129_begin_0 = const()[name = string("x_129_begin_0"), val = tensor([0, 0, 8192])]; tensor x_129_end_0 = const()[name = string("x_129_end_0"), val = tensor([1, 1, 8448])]; tensor x_129_end_mask_0 = const()[name = string("x_129_end_mask_0"), val = tensor([true, true, false])]; tensor x_129 = slice_by_index(begin = x_129_begin_0, end = x_129_end_0, end_mask = x_129_end_mask_0, x = per_layer_proj)[name = string("x_129")]; int32 var_3138 = const()[name = string("op_3138"), val = int32(-1)]; fp16 const_67_promoted_to_fp16 = const()[name = string("const_67_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3143_cast_fp16 = mul(x = x_129, y = const_67_promoted_to_fp16)[name = string("op_3143_cast_fp16")]; bool input_67_interleave_0 = const()[name = string("input_67_interleave_0"), val = bool(false)]; tensor input_67_cast_fp16 = concat(axis = var_3138, interleave = input_67_interleave_0, values = (x_129, var_3143_cast_fp16))[name = string("input_67_cast_fp16")]; tensor normed_129_axes_0 = const()[name = string("normed_129_axes_0"), val = tensor([-1])]; fp16 var_3136_to_fp16 = const()[name = string("op_3136_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_129_cast_fp16 = layer_norm(axes = normed_129_axes_0, epsilon = var_3136_to_fp16, x = input_67_cast_fp16)[name = string("normed_129_cast_fp16")]; tensor var_3148_split_sizes_0 = const()[name = string("op_3148_split_sizes_0"), val = tensor([256, 256])]; int32 var_3148_axis_0 = const()[name = string("op_3148_axis_0"), val = int32(-1)]; tensor var_3148_cast_fp16_0, tensor var_3148_cast_fp16_1 = split(axis = var_3148_axis_0, split_sizes = var_3148_split_sizes_0, x = normed_129_cast_fp16)[name = string("op_3148_cast_fp16")]; tensor var_3151_cast_fp16 = mul(x = var_3148_cast_fp16_0, y = const_4_to_fp16)[name = string("op_3151_cast_fp16")]; tensor x_133_begin_0 = const()[name = string("x_133_begin_0"), val = tensor([0, 0, 8448])]; tensor x_133_end_0 = const()[name = string("x_133_end_0"), val = tensor([1, 1, 8704])]; tensor x_133_end_mask_0 = const()[name = string("x_133_end_mask_0"), val = tensor([true, true, false])]; tensor x_133 = slice_by_index(begin = x_133_begin_0, end = x_133_end_0, end_mask = x_133_end_mask_0, x = per_layer_proj)[name = string("x_133")]; int32 var_3175 = const()[name = string("op_3175"), val = int32(-1)]; fp16 const_69_promoted_to_fp16 = const()[name = string("const_69_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3180_cast_fp16 = mul(x = x_133, y = const_69_promoted_to_fp16)[name = string("op_3180_cast_fp16")]; bool input_69_interleave_0 = const()[name = string("input_69_interleave_0"), val = bool(false)]; tensor input_69_cast_fp16 = concat(axis = var_3175, interleave = input_69_interleave_0, values = (x_133, var_3180_cast_fp16))[name = string("input_69_cast_fp16")]; tensor normed_133_axes_0 = const()[name = string("normed_133_axes_0"), val = tensor([-1])]; fp16 var_3173_to_fp16 = const()[name = string("op_3173_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_133_cast_fp16 = layer_norm(axes = normed_133_axes_0, epsilon = var_3173_to_fp16, x = input_69_cast_fp16)[name = string("normed_133_cast_fp16")]; tensor var_3185_split_sizes_0 = const()[name = string("op_3185_split_sizes_0"), val = tensor([256, 256])]; int32 var_3185_axis_0 = const()[name = string("op_3185_axis_0"), val = int32(-1)]; tensor var_3185_cast_fp16_0, tensor var_3185_cast_fp16_1 = split(axis = var_3185_axis_0, split_sizes = var_3185_split_sizes_0, x = normed_133_cast_fp16)[name = string("op_3185_cast_fp16")]; tensor var_3188_cast_fp16 = mul(x = var_3185_cast_fp16_0, y = const_4_to_fp16)[name = string("op_3188_cast_fp16")]; tensor x_137_begin_0 = const()[name = string("x_137_begin_0"), val = tensor([0, 0, 8704])]; tensor x_137_end_0 = const()[name = string("x_137_end_0"), val = tensor([1, 1, 1])]; tensor x_137_end_mask_0 = const()[name = string("x_137_end_mask_0"), val = tensor([true, true, true])]; tensor x_137 = slice_by_index(begin = x_137_begin_0, end = x_137_end_0, end_mask = x_137_end_mask_0, x = per_layer_proj)[name = string("x_137")]; int32 var_3212 = const()[name = string("op_3212"), val = int32(-1)]; fp16 const_71_promoted_to_fp16 = const()[name = string("const_71_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3217_cast_fp16 = mul(x = x_137, y = const_71_promoted_to_fp16)[name = string("op_3217_cast_fp16")]; bool input_71_interleave_0 = const()[name = string("input_71_interleave_0"), val = bool(false)]; tensor input_71_cast_fp16 = concat(axis = var_3212, interleave = input_71_interleave_0, values = (x_137, var_3217_cast_fp16))[name = string("input_71_cast_fp16")]; tensor normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor([-1])]; fp16 var_3210_to_fp16 = const()[name = string("op_3210_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_3210_to_fp16, x = input_71_cast_fp16)[name = string("normed_137_cast_fp16")]; tensor var_3222_split_sizes_0 = const()[name = string("op_3222_split_sizes_0"), val = tensor([256, 256])]; int32 var_3222_axis_0 = const()[name = string("op_3222_axis_0"), val = int32(-1)]; tensor var_3222_cast_fp16_0, tensor var_3222_cast_fp16_1 = split(axis = var_3222_axis_0, split_sizes = var_3222_split_sizes_0, x = normed_137_cast_fp16)[name = string("op_3222_cast_fp16")]; tensor var_3225_cast_fp16 = mul(x = var_3222_cast_fp16_0, y = const_4_to_fp16)[name = string("op_3225_cast_fp16")]; int32 var_3228 = const()[name = string("op_3228"), val = int32(-1)]; bool per_layer_proj_normed_interleave_0 = const()[name = string("per_layer_proj_normed_interleave_0"), val = bool(false)]; tensor per_layer_proj_normed = concat(axis = var_3228, interleave = per_layer_proj_normed_interleave_0, values = (var_1967_cast_fp16, var_2004_cast_fp16, var_2041_cast_fp16, var_2078_cast_fp16, var_2115_cast_fp16, var_2152_cast_fp16, var_2189_cast_fp16, var_2226_cast_fp16, var_2263_cast_fp16, var_2300_cast_fp16, var_2337_cast_fp16, var_2374_cast_fp16, var_2411_cast_fp16, var_2448_cast_fp16, var_2485_cast_fp16, var_2522_cast_fp16, var_2559_cast_fp16, var_2596_cast_fp16, var_2633_cast_fp16, var_2670_cast_fp16, var_2707_cast_fp16, var_2744_cast_fp16, var_2781_cast_fp16, var_2818_cast_fp16, var_2855_cast_fp16, var_2892_cast_fp16, var_2929_cast_fp16, var_2966_cast_fp16, var_3003_cast_fp16, var_3040_cast_fp16, var_3077_cast_fp16, var_3114_cast_fp16, var_3151_cast_fp16, var_3188_cast_fp16, var_3225_cast_fp16))[name = string("per_layer_proj_normed")]; tensor var_3231 = add(x = per_layer_proj_normed, y = per_layer_raw)[name = string("op_3231")]; fp16 const_73 = const()[name = string("const_73"), val = fp16(0x1.6ap-1)]; tensor per_layer_combined = mul(x = var_3231, y = const_73)[name = string("per_layer_combined")]; int32 var_3241 = const()[name = string("op_3241"), val = int32(0)]; int32 var_3242_batch_dims_0 = const()[name = string("op_3242_batch_dims_0"), val = int32(0)]; bool var_3242_validate_indices_0 = const()[name = string("op_3242_validate_indices_0"), val = bool(false)]; string position_ids_to_uint16_dtype_0 = const()[name = string("position_ids_to_uint16_dtype_0"), val = string("uint16")]; tensor position_ids_to_uint16 = cast(dtype = position_ids_to_uint16_dtype_0, x = position_ids)[name = string("cast_0")]; tensor var_3242_cast_uint16 = gather(axis = var_3241, batch_dims = var_3242_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_3242_validate_indices_0, x = cos_sliding_palettized)[name = string("op_3242_cast_uint16")]; tensor var_3244_axes_0 = const()[name = string("op_3244_axes_0"), val = tensor([0])]; tensor var_3244 = expand_dims(axes = var_3244_axes_0, x = var_3242_cast_uint16)[name = string("op_3244")]; tensor cos_1_axes_0 = const()[name = string("cos_1_axes_0"), val = tensor([0])]; tensor cos_1 = expand_dims(axes = cos_1_axes_0, x = var_3244)[name = string("cos_1")]; int32 var_3247 = const()[name = string("op_3247"), val = int32(0)]; int32 var_3248_batch_dims_0 = const()[name = string("op_3248_batch_dims_0"), val = int32(0)]; bool var_3248_validate_indices_0 = const()[name = string("op_3248_validate_indices_0"), val = bool(false)]; tensor var_3248_cast_uint16 = gather(axis = var_3247, batch_dims = var_3248_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_3248_validate_indices_0, x = sin_sliding_palettized)[name = string("op_3248_cast_uint16")]; tensor var_3250_axes_0 = const()[name = string("op_3250_axes_0"), val = tensor([0])]; tensor var_3250 = expand_dims(axes = var_3250_axes_0, x = var_3248_cast_uint16)[name = string("op_3250")]; tensor sin_1_axes_0 = const()[name = string("sin_1_axes_0"), val = tensor([0])]; tensor sin_1 = expand_dims(axes = sin_1_axes_0, x = var_3250)[name = string("sin_1")]; int32 var_3253 = const()[name = string("op_3253"), val = int32(0)]; int32 var_3254_batch_dims_0 = const()[name = string("op_3254_batch_dims_0"), val = int32(0)]; bool var_3254_validate_indices_0 = const()[name = string("op_3254_validate_indices_0"), val = bool(false)]; tensor var_3254_cast_uint16 = gather(axis = var_3253, batch_dims = var_3254_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_3254_validate_indices_0, x = cos_full_palettized)[name = string("op_3254_cast_uint16")]; tensor var_3256_axes_0 = const()[name = string("op_3256_axes_0"), val = tensor([0])]; tensor var_3256 = expand_dims(axes = var_3256_axes_0, x = var_3254_cast_uint16)[name = string("op_3256")]; tensor cos_axes_0 = const()[name = string("cos_axes_0"), val = tensor([0])]; tensor cos = expand_dims(axes = cos_axes_0, x = var_3256)[name = string("cos")]; int32 var_3259 = const()[name = string("op_3259"), val = int32(0)]; int32 var_3260_batch_dims_0 = const()[name = string("op_3260_batch_dims_0"), val = int32(0)]; bool var_3260_validate_indices_0 = const()[name = string("op_3260_validate_indices_0"), val = bool(false)]; tensor var_3260_cast_uint16 = gather(axis = var_3259, batch_dims = var_3260_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_3260_validate_indices_0, x = sin_full_palettized)[name = string("op_3260_cast_uint16")]; tensor var_3262_axes_0 = const()[name = string("op_3262_axes_0"), val = tensor([0])]; tensor var_3262 = expand_dims(axes = var_3262_axes_0, x = var_3260_cast_uint16)[name = string("op_3262")]; tensor sin_axes_0 = const()[name = string("sin_axes_0"), val = tensor([0])]; tensor sin = expand_dims(axes = sin_axes_0, x = var_3262)[name = string("sin")]; int32 var_3269 = const()[name = string("op_3269"), val = int32(-1)]; fp16 const_74_promoted_to_fp16 = const()[name = string("const_74_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3275_cast_fp16 = mul(x = hidden_states_3, y = const_74_promoted_to_fp16)[name = string("op_3275_cast_fp16")]; bool input_73_interleave_0 = const()[name = string("input_73_interleave_0"), val = bool(false)]; tensor input_73_cast_fp16 = concat(axis = var_3269, interleave = input_73_interleave_0, values = (hidden_states_3, var_3275_cast_fp16))[name = string("input_73_cast_fp16")]; tensor normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor([-1])]; fp16 var_3267_to_fp16 = const()[name = string("op_3267_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_3267_to_fp16, x = input_73_cast_fp16)[name = string("normed_141_cast_fp16")]; tensor var_3280_split_sizes_0 = const()[name = string("op_3280_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_3280_axis_0 = const()[name = string("op_3280_axis_0"), val = int32(-1)]; tensor var_3280_cast_fp16_0, tensor var_3280_cast_fp16_1 = split(axis = var_3280_axis_0, split_sizes = var_3280_split_sizes_0, x = normed_141_cast_fp16)[name = string("op_3280_cast_fp16")]; tensor const_75_to_fp16 = const()[name = string("const_75_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2250342976)))]; tensor var_3283_cast_fp16 = mul(x = var_3280_cast_fp16_0, y = const_75_to_fp16)[name = string("op_3283_cast_fp16")]; tensor var_3288 = const()[name = string("op_3288"), val = tensor([0, 2, 1])]; tensor var_3291_axes_0 = const()[name = string("op_3291_axes_0"), val = tensor([2])]; tensor var_3289 = transpose(perm = var_3288, x = var_3283_cast_fp16)[name = string("transpose_366")]; tensor var_3291 = expand_dims(axes = var_3291_axes_0, x = var_3289)[name = string("op_3291")]; string var_3307_pad_type_0 = const()[name = string("op_3307_pad_type_0"), val = string("valid")]; tensor var_3307_strides_0 = const()[name = string("op_3307_strides_0"), val = tensor([1, 1])]; tensor var_3307_pad_0 = const()[name = string("op_3307_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3307_dilations_0 = const()[name = string("op_3307_dilations_0"), val = tensor([1, 1])]; int32 var_3307_groups_0 = const()[name = string("op_3307_groups_0"), val = int32(1)]; tensor var_3307 = conv(dilations = var_3307_dilations_0, groups = var_3307_groups_0, pad = var_3307_pad_0, pad_type = var_3307_pad_type_0, strides = var_3307_strides_0, weight = layers_0_self_attn_q_proj_weight_palettized, x = var_3291)[name = string("op_3307")]; tensor var_3312 = const()[name = string("op_3312"), val = tensor([1, 8, 256, 1])]; tensor var_3313 = reshape(shape = var_3312, x = var_3307)[name = string("op_3313")]; tensor var_3318 = const()[name = string("op_3318"), val = tensor([0, 1, 3, 2])]; tensor var_3328 = const()[name = string("op_3328"), val = tensor([1, 8, 256])]; tensor var_3319 = transpose(perm = var_3318, x = var_3313)[name = string("transpose_365")]; tensor x_143 = reshape(shape = var_3328, x = var_3319)[name = string("x_143")]; int32 var_3334 = const()[name = string("op_3334"), val = int32(-1)]; fp16 const_76_promoted_to_fp16 = const()[name = string("const_76_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3340_cast_fp16 = mul(x = x_143, y = const_76_promoted_to_fp16)[name = string("op_3340_cast_fp16")]; bool input_77_interleave_0 = const()[name = string("input_77_interleave_0"), val = bool(false)]; tensor input_77_cast_fp16 = concat(axis = var_3334, interleave = input_77_interleave_0, values = (x_143, var_3340_cast_fp16))[name = string("input_77_cast_fp16")]; tensor normed_145_axes_0 = const()[name = string("normed_145_axes_0"), val = tensor([-1])]; fp16 var_3332_to_fp16 = const()[name = string("op_3332_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_145_cast_fp16 = layer_norm(axes = normed_145_axes_0, epsilon = var_3332_to_fp16, x = input_77_cast_fp16)[name = string("normed_145_cast_fp16")]; tensor var_3345_split_sizes_0 = const()[name = string("op_3345_split_sizes_0"), val = tensor([256, 256])]; int32 var_3345_axis_0 = const()[name = string("op_3345_axis_0"), val = int32(-1)]; tensor var_3345_cast_fp16_0, tensor var_3345_cast_fp16_1 = split(axis = var_3345_axis_0, split_sizes = var_3345_split_sizes_0, x = normed_145_cast_fp16)[name = string("op_3345_cast_fp16")]; tensor const_77_to_fp16 = const()[name = string("const_77_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2250346112)))]; tensor var_3348_cast_fp16 = mul(x = var_3345_cast_fp16_0, y = const_77_to_fp16)[name = string("op_3348_cast_fp16")]; tensor var_3354 = const()[name = string("op_3354"), val = tensor([1, 8, 1, 256])]; tensor q_3 = reshape(shape = var_3354, x = var_3348_cast_fp16)[name = string("q_3")]; tensor var_3356 = mul(x = q_3, y = cos_1)[name = string("op_3356")]; tensor var_3357_split_sizes_0 = const()[name = string("op_3357_split_sizes_0"), val = tensor([128, 128])]; int32 var_3357_axis_0 = const()[name = string("op_3357_axis_0"), val = int32(-1)]; tensor var_3357_0, tensor var_3357_1 = split(axis = var_3357_axis_0, split_sizes = var_3357_split_sizes_0, x = q_3)[name = string("op_3357")]; fp16 const_78_promoted = const()[name = string("const_78_promoted"), val = fp16(-0x1p+0)]; tensor var_3359 = mul(x = var_3357_1, y = const_78_promoted)[name = string("op_3359")]; int32 var_3361 = const()[name = string("op_3361"), val = int32(-1)]; bool var_3362_interleave_0 = const()[name = string("op_3362_interleave_0"), val = bool(false)]; tensor var_3362 = concat(axis = var_3361, interleave = var_3362_interleave_0, values = (var_3359, var_3357_0))[name = string("op_3362")]; tensor var_3363 = mul(x = var_3362, y = sin_1)[name = string("op_3363")]; tensor q_7 = add(x = var_3356, y = var_3363)[name = string("q_7")]; string var_3376_pad_type_0 = const()[name = string("op_3376_pad_type_0"), val = string("valid")]; tensor var_3376_strides_0 = const()[name = string("op_3376_strides_0"), val = tensor([1, 1])]; tensor var_3376_pad_0 = const()[name = string("op_3376_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3376_dilations_0 = const()[name = string("op_3376_dilations_0"), val = tensor([1, 1])]; int32 var_3376_groups_0 = const()[name = string("op_3376_groups_0"), val = int32(1)]; tensor var_3376 = conv(dilations = var_3376_dilations_0, groups = var_3376_groups_0, pad = var_3376_pad_0, pad_type = var_3376_pad_type_0, strides = var_3376_strides_0, weight = layers_0_self_attn_k_proj_weight_palettized, x = var_3291)[name = string("op_3376")]; tensor var_3381 = const()[name = string("op_3381"), val = tensor([1, 1, 256, 1])]; tensor var_3382 = reshape(shape = var_3381, x = var_3376)[name = string("op_3382")]; tensor var_3387 = const()[name = string("op_3387"), val = tensor([0, 1, 3, 2])]; string var_3404_pad_type_0 = const()[name = string("op_3404_pad_type_0"), val = string("valid")]; tensor var_3404_strides_0 = const()[name = string("op_3404_strides_0"), val = tensor([1, 1])]; tensor var_3404_pad_0 = const()[name = string("op_3404_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3404_dilations_0 = const()[name = string("op_3404_dilations_0"), val = tensor([1, 1])]; int32 var_3404_groups_0 = const()[name = string("op_3404_groups_0"), val = int32(1)]; tensor var_3404 = conv(dilations = var_3404_dilations_0, groups = var_3404_groups_0, pad = var_3404_pad_0, pad_type = var_3404_pad_type_0, strides = var_3404_strides_0, weight = layers_0_self_attn_v_proj_weight_palettized, x = var_3291)[name = string("op_3404")]; tensor var_3409 = const()[name = string("op_3409"), val = tensor([1, 1, 256, 1])]; tensor var_3410 = reshape(shape = var_3409, x = var_3404)[name = string("op_3410")]; tensor var_3415 = const()[name = string("op_3415"), val = tensor([0, 1, 3, 2])]; tensor var_3425 = const()[name = string("op_3425"), val = tensor([1, 1, 256])]; tensor var_3388 = transpose(perm = var_3387, x = var_3382)[name = string("transpose_364")]; tensor x_147 = reshape(shape = var_3425, x = var_3388)[name = string("x_147")]; int32 var_3431 = const()[name = string("op_3431"), val = int32(-1)]; fp16 const_79_promoted_to_fp16 = const()[name = string("const_79_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3437_cast_fp16 = mul(x = x_147, y = const_79_promoted_to_fp16)[name = string("op_3437_cast_fp16")]; bool input_79_interleave_0 = const()[name = string("input_79_interleave_0"), val = bool(false)]; tensor input_79_cast_fp16 = concat(axis = var_3431, interleave = input_79_interleave_0, values = (x_147, var_3437_cast_fp16))[name = string("input_79_cast_fp16")]; tensor normed_149_axes_0 = const()[name = string("normed_149_axes_0"), val = tensor([-1])]; fp16 var_3429_to_fp16 = const()[name = string("op_3429_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_149_cast_fp16 = layer_norm(axes = normed_149_axes_0, epsilon = var_3429_to_fp16, x = input_79_cast_fp16)[name = string("normed_149_cast_fp16")]; tensor var_3442_split_sizes_0 = const()[name = string("op_3442_split_sizes_0"), val = tensor([256, 256])]; int32 var_3442_axis_0 = const()[name = string("op_3442_axis_0"), val = int32(-1)]; tensor var_3442_cast_fp16_0, tensor var_3442_cast_fp16_1 = split(axis = var_3442_axis_0, split_sizes = var_3442_split_sizes_0, x = normed_149_cast_fp16)[name = string("op_3442_cast_fp16")]; tensor const_80_to_fp16 = const()[name = string("const_80_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2250346688)))]; tensor var_3445_cast_fp16 = mul(x = var_3442_cast_fp16_0, y = const_80_to_fp16)[name = string("op_3445_cast_fp16")]; tensor var_3451 = const()[name = string("op_3451"), val = tensor([1, 1, 1, 256])]; tensor q_5 = reshape(shape = var_3451, x = var_3445_cast_fp16)[name = string("q_5")]; fp16 var_3458_promoted_to_fp16 = const()[name = string("op_3458_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3416 = transpose(perm = var_3415, x = var_3410)[name = string("transpose_363")]; tensor var_3459_cast_fp16 = pow(x = var_3416, y = var_3458_promoted_to_fp16)[name = string("op_3459_cast_fp16")]; tensor var_3464_axes_0 = const()[name = string("op_3464_axes_0"), val = tensor([-1])]; bool var_3464_keep_dims_0 = const()[name = string("op_3464_keep_dims_0"), val = bool(true)]; tensor var_3464_cast_fp16 = reduce_mean(axes = var_3464_axes_0, keep_dims = var_3464_keep_dims_0, x = var_3459_cast_fp16)[name = string("op_3464_cast_fp16")]; fp16 var_3466_to_fp16 = const()[name = string("op_3466_to_fp16"), val = fp16(0x1.1p-20)]; tensor mean_sq_1_cast_fp16 = add(x = var_3464_cast_fp16, y = var_3466_to_fp16)[name = string("mean_sq_1_cast_fp16")]; fp16 var_3473_to_fp16 = const()[name = string("op_3473_to_fp16"), val = fp16(-0x1p-1)]; tensor var_3474_cast_fp16 = pow(x = mean_sq_1_cast_fp16, y = var_3473_to_fp16)[name = string("op_3474_cast_fp16")]; tensor var_3475_cast_fp16 = mul(x = var_3416, y = var_3474_cast_fp16)[name = string("op_3475_cast_fp16")]; tensor var_3481 = mul(x = q_5, y = cos_1)[name = string("op_3481")]; tensor var_3482_split_sizes_0 = const()[name = string("op_3482_split_sizes_0"), val = tensor([128, 128])]; int32 var_3482_axis_0 = const()[name = string("op_3482_axis_0"), val = int32(-1)]; tensor var_3482_0, tensor var_3482_1 = split(axis = var_3482_axis_0, split_sizes = var_3482_split_sizes_0, x = q_5)[name = string("op_3482")]; fp16 const_81_promoted = const()[name = string("const_81_promoted"), val = fp16(-0x1p+0)]; tensor var_3484 = mul(x = var_3482_1, y = const_81_promoted)[name = string("op_3484")]; int32 var_3486 = const()[name = string("op_3486"), val = int32(-1)]; bool var_3487_interleave_0 = const()[name = string("op_3487_interleave_0"), val = bool(false)]; tensor var_3487 = concat(axis = var_3486, interleave = var_3487_interleave_0, values = (var_3484, var_3482_0))[name = string("op_3487")]; tensor var_3488 = mul(x = var_3487, y = sin_1)[name = string("op_3488")]; tensor input_81 = add(x = var_3481, y = var_3488)[name = string("input_81")]; tensor read_state_0 = read_state(input = kv_cache_0)[name = string("read_state_0")]; tensor var_3493_begin_0 = const()[name = string("op_3493_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3493_end_0 = const()[name = string("op_3493_end_0"), val = tensor([1, 1, 512, 512])]; tensor var_3493_end_mask_0 = const()[name = string("op_3493_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3493_squeeze_mask_0 = const()[name = string("op_3493_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_3493_cast_fp16 = slice_by_index(begin = var_3493_begin_0, end = var_3493_end_0, end_mask = var_3493_end_mask_0, squeeze_mask = var_3493_squeeze_mask_0, x = read_state_0)[name = string("op_3493_cast_fp16")]; tensor K_cache_1_axes_0 = const()[name = string("K_cache_1_axes_0"), val = tensor([0])]; tensor K_cache_1_cast_fp16 = expand_dims(axes = K_cache_1_axes_0, x = var_3493_cast_fp16)[name = string("K_cache_1_cast_fp16")]; tensor var_3498_begin_0 = const()[name = string("op_3498_begin_0"), val = tensor([35, 0, 0, 0])]; tensor var_3498_end_0 = const()[name = string("op_3498_end_0"), val = tensor([36, 1, 512, 512])]; tensor var_3498_end_mask_0 = const()[name = string("op_3498_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_3498_squeeze_mask_0 = const()[name = string("op_3498_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_3498_cast_fp16 = slice_by_index(begin = var_3498_begin_0, end = var_3498_end_0, end_mask = var_3498_end_mask_0, squeeze_mask = var_3498_squeeze_mask_0, x = read_state_0)[name = string("op_3498_cast_fp16")]; tensor V_cache_1_axes_0 = const()[name = string("V_cache_1_axes_0"), val = tensor([0])]; tensor V_cache_1_cast_fp16 = expand_dims(axes = V_cache_1_axes_0, x = var_3498_cast_fp16)[name = string("V_cache_1_cast_fp16")]; tensor k_padded_1_pad_0 = const()[name = string("k_padded_1_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; string k_padded_1_mode_0 = const()[name = string("k_padded_1_mode_0"), val = string("constant")]; fp16 const_82_to_fp16 = const()[name = string("const_82_to_fp16"), val = fp16(0x0p+0)]; tensor k_padded_1_cast_fp16 = pad(constant_val = const_82_to_fp16, mode = k_padded_1_mode_0, pad = k_padded_1_pad_0, x = input_81)[name = string("k_padded_1_cast_fp16")]; tensor v_padded_1_pad_0 = const()[name = string("v_padded_1_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; string v_padded_1_mode_0 = const()[name = string("v_padded_1_mode_0"), val = string("constant")]; fp16 const_83_to_fp16 = const()[name = string("const_83_to_fp16"), val = fp16(0x0p+0)]; tensor v_padded_1_cast_fp16 = pad(constant_val = const_83_to_fp16, mode = v_padded_1_mode_0, pad = v_padded_1_pad_0, x = var_3475_cast_fp16)[name = string("v_padded_1_cast_fp16")]; fp16 var_3513_promoted_to_fp16 = const()[name = string("op_3513_promoted_to_fp16"), val = fp16(0x1p+0)]; tensor var_3515_cast_fp16 = sub(x = var_3513_promoted_to_fp16, y = update_mask)[name = string("op_3515_cast_fp16")]; tensor var_3516_cast_fp16 = mul(x = K_cache_1_cast_fp16, y = var_3515_cast_fp16)[name = string("op_3516_cast_fp16")]; tensor var_3517_reps_0 = const()[name = string("op_3517_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_3517_cast_fp16 = tile(reps = var_3517_reps_0, x = k_padded_1_cast_fp16)[name = string("op_3517_cast_fp16")]; tensor var_3518_cast_fp16 = mul(x = var_3517_cast_fp16, y = update_mask)[name = string("op_3518_cast_fp16")]; tensor K_new_1_cast_fp16 = add(x = var_3516_cast_fp16, y = var_3518_cast_fp16)[name = string("K_new_1_cast_fp16")]; tensor var_3524_cast_fp16 = mul(x = V_cache_1_cast_fp16, y = var_3515_cast_fp16)[name = string("op_3524_cast_fp16")]; tensor var_3525_reps_0 = const()[name = string("op_3525_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_3525_cast_fp16 = tile(reps = var_3525_reps_0, x = v_padded_1_cast_fp16)[name = string("op_3525_cast_fp16")]; tensor var_3526_cast_fp16 = mul(x = var_3525_cast_fp16, y = update_mask)[name = string("op_3526_cast_fp16")]; tensor V_new_1_cast_fp16 = add(x = var_3524_cast_fp16, y = var_3526_cast_fp16)[name = string("V_new_1_cast_fp16")]; tensor var_3530_axes_0 = const()[name = string("op_3530_axes_0"), val = tensor([0])]; tensor var_3530_cast_fp16 = squeeze(axes = var_3530_axes_0, x = K_new_1_cast_fp16)[name = string("op_3530_cast_fp16")]; tensor concat_0 = const()[name = string("concat_0"), val = tensor([0, 0, 0, 0])]; tensor concat_1 = const()[name = string("concat_1"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_0, begin_mask = kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_1, end_mask = kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_1_stride_0, update = var_3530_cast_fp16, x = read_state_0)[name = string("kv_cache_0_internal_tensor_assign_1_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_1_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_30_write_state")]; tensor coreml_update_state_30 = read_state(input = kv_cache_0)[name = string("coreml_update_state_30")]; tensor var_3537_axes_0 = const()[name = string("op_3537_axes_0"), val = tensor([0])]; tensor var_3537_cast_fp16 = squeeze(axes = var_3537_axes_0, x = V_new_1_cast_fp16)[name = string("op_3537_cast_fp16")]; tensor concat_2 = const()[name = string("concat_2"), val = tensor([35, 0, 0, 0])]; tensor concat_3 = const()[name = string("concat_3"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_2, begin_mask = kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_3, end_mask = kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_2_stride_0, update = var_3537_cast_fp16, x = coreml_update_state_30)[name = string("kv_cache_0_internal_tensor_assign_2_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_2_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_31_write_state")]; tensor coreml_update_state_31 = read_state(input = kv_cache_0)[name = string("coreml_update_state_31")]; tensor K_for_attn_1_begin_0 = const()[name = string("K_for_attn_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor K_for_attn_1_end_0 = const()[name = string("K_for_attn_1_end_0"), val = tensor([1, 1, 512, 256])]; tensor K_for_attn_1_end_mask_0 = const()[name = string("K_for_attn_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor K_for_attn_1_cast_fp16 = slice_by_index(begin = K_for_attn_1_begin_0, end = K_for_attn_1_end_0, end_mask = K_for_attn_1_end_mask_0, x = K_new_1_cast_fp16)[name = string("K_for_attn_1_cast_fp16")]; tensor V_for_attn_1_begin_0 = const()[name = string("V_for_attn_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor V_for_attn_1_end_0 = const()[name = string("V_for_attn_1_end_0"), val = tensor([1, 1, 512, 256])]; tensor V_for_attn_1_end_mask_0 = const()[name = string("V_for_attn_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor V_for_attn_1_cast_fp16 = slice_by_index(begin = V_for_attn_1_begin_0, end = V_for_attn_1_end_0, end_mask = V_for_attn_1_end_mask_0, x = V_new_1_cast_fp16)[name = string("V_for_attn_1_cast_fp16")]; tensor transpose_0_perm_0 = const()[name = string("transpose_0_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_0_reps_0 = const()[name = string("tile_0_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_0_cast_fp16 = transpose(perm = transpose_0_perm_0, x = K_for_attn_1_cast_fp16)[name = string("transpose_362")]; tensor tile_0_cast_fp16 = tile(reps = tile_0_reps_0, x = transpose_0_cast_fp16)[name = string("tile_0_cast_fp16")]; tensor concat_4 = const()[name = string("concat_4"), val = tensor([8, 1, 1, 512, 256])]; tensor reshape_0_cast_fp16 = reshape(shape = concat_4, x = tile_0_cast_fp16)[name = string("reshape_0_cast_fp16")]; tensor transpose_1_perm_0 = const()[name = string("transpose_1_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_5 = const()[name = string("concat_5"), val = tensor([-1, 1, 512, 256])]; tensor transpose_1_cast_fp16 = transpose(perm = transpose_1_perm_0, x = reshape_0_cast_fp16)[name = string("transpose_361")]; tensor reshape_1_cast_fp16 = reshape(shape = concat_5, x = transpose_1_cast_fp16)[name = string("reshape_1_cast_fp16")]; tensor transpose_140_perm_0 = const()[name = string("transpose_140_perm_0"), val = tensor([1, 0, -1, -2])]; tensor transpose_2_perm_0 = const()[name = string("transpose_2_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_1_reps_0 = const()[name = string("tile_1_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_2_cast_fp16 = transpose(perm = transpose_2_perm_0, x = V_for_attn_1_cast_fp16)[name = string("transpose_360")]; tensor tile_1_cast_fp16 = tile(reps = tile_1_reps_0, x = transpose_2_cast_fp16)[name = string("tile_1_cast_fp16")]; tensor concat_6 = const()[name = string("concat_6"), val = tensor([8, 1, 1, 512, 256])]; tensor reshape_2_cast_fp16 = reshape(shape = concat_6, x = tile_1_cast_fp16)[name = string("reshape_2_cast_fp16")]; tensor transpose_3_perm_0 = const()[name = string("transpose_3_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_7 = const()[name = string("concat_7"), val = tensor([-1, 1, 512, 256])]; tensor transpose_3_cast_fp16 = transpose(perm = transpose_3_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_359")]; tensor reshape_3_cast_fp16 = reshape(shape = concat_7, x = transpose_3_cast_fp16)[name = string("reshape_3_cast_fp16")]; tensor V_expanded_1_perm_0 = const()[name = string("V_expanded_1_perm_0"), val = tensor([1, 0, -2, -1])]; bool var_3574_transpose_x_0 = const()[name = string("op_3574_transpose_x_0"), val = bool(false)]; bool var_3574_transpose_y_0 = const()[name = string("op_3574_transpose_y_0"), val = bool(false)]; tensor transpose_140_cast_fp16 = transpose(perm = transpose_140_perm_0, x = reshape_1_cast_fp16)[name = string("transpose_358")]; tensor var_3574_cast_fp16 = matmul(transpose_x = var_3574_transpose_x_0, transpose_y = var_3574_transpose_y_0, x = q_7, y = transpose_140_cast_fp16)[name = string("op_3574_cast_fp16")]; tensor attn_weights_3_cast_fp16 = add(x = var_3574_cast_fp16, y = causal_mask)[name = string("attn_weights_3_cast_fp16")]; int32 var_3584 = const()[name = string("op_3584"), val = int32(-1)]; tensor var_3586_cast_fp16 = softmax(axis = var_3584, x = attn_weights_3_cast_fp16)[name = string("op_3586_cast_fp16")]; bool var_3602_transpose_x_0 = const()[name = string("op_3602_transpose_x_0"), val = bool(false)]; bool var_3602_transpose_y_0 = const()[name = string("op_3602_transpose_y_0"), val = bool(false)]; tensor V_expanded_1_cast_fp16 = transpose(perm = V_expanded_1_perm_0, x = reshape_3_cast_fp16)[name = string("transpose_357")]; tensor var_3602_cast_fp16 = matmul(transpose_x = var_3602_transpose_x_0, transpose_y = var_3602_transpose_y_0, x = var_3586_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("op_3602_cast_fp16")]; tensor var_3612 = const()[name = string("op_3612"), val = tensor([0, 2, 1, 3])]; tensor var_3619 = const()[name = string("op_3619"), val = tensor([1, 1, -1])]; tensor var_3613 = transpose(perm = var_3612, x = var_3602_cast_fp16)[name = string("transpose_356")]; tensor attn_output_3 = reshape(shape = var_3619, x = var_3613)[name = string("attn_output_3")]; tensor var_3624 = const()[name = string("op_3624"), val = tensor([0, 2, 1])]; tensor squeeze_0_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2250347264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2251920192))))[name = string("squeeze_0_palettized")]; string var_3640_pad_type_0 = const()[name = string("op_3640_pad_type_0"), val = string("valid")]; int32 var_3640_groups_0 = const()[name = string("op_3640_groups_0"), val = int32(1)]; tensor var_3640_strides_0 = const()[name = string("op_3640_strides_0"), val = tensor([1])]; tensor var_3640_pad_0 = const()[name = string("op_3640_pad_0"), val = tensor([0, 0])]; tensor var_3640_dilations_0 = const()[name = string("op_3640_dilations_0"), val = tensor([1])]; tensor var_3625 = transpose(perm = var_3624, x = attn_output_3)[name = string("transpose_355")]; tensor var_3640 = conv(dilations = var_3640_dilations_0, groups = var_3640_groups_0, pad = var_3640_pad_0, pad_type = var_3640_pad_type_0, strides = var_3640_strides_0, weight = squeeze_0_palettized, x = var_3625)[name = string("op_3640")]; tensor var_3644 = const()[name = string("op_3644"), val = tensor([0, 2, 1])]; int32 var_3650 = const()[name = string("op_3650"), val = int32(-1)]; fp16 const_84_promoted_to_fp16 = const()[name = string("const_84_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_153 = transpose(perm = var_3644, x = var_3640)[name = string("transpose_354")]; tensor var_3656_cast_fp16 = mul(x = x_153, y = const_84_promoted_to_fp16)[name = string("op_3656_cast_fp16")]; bool input_87_interleave_0 = const()[name = string("input_87_interleave_0"), val = bool(false)]; tensor input_87_cast_fp16 = concat(axis = var_3650, interleave = input_87_interleave_0, values = (x_153, var_3656_cast_fp16))[name = string("input_87_cast_fp16")]; tensor normed_153_axes_0 = const()[name = string("normed_153_axes_0"), val = tensor([-1])]; fp16 var_3648_to_fp16 = const()[name = string("op_3648_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_153_cast_fp16 = layer_norm(axes = normed_153_axes_0, epsilon = var_3648_to_fp16, x = input_87_cast_fp16)[name = string("normed_153_cast_fp16")]; tensor var_3661_split_sizes_0 = const()[name = string("op_3661_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_3661_axis_0 = const()[name = string("op_3661_axis_0"), val = int32(-1)]; tensor var_3661_cast_fp16_0, tensor var_3661_cast_fp16_1 = split(axis = var_3661_axis_0, split_sizes = var_3661_split_sizes_0, x = normed_153_cast_fp16)[name = string("op_3661_cast_fp16")]; tensor const_85_to_fp16 = const()[name = string("const_85_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2251921792)))]; tensor var_3664_cast_fp16 = mul(x = var_3661_cast_fp16_0, y = const_85_to_fp16)[name = string("op_3664_cast_fp16")]; tensor x_157 = add(x = hidden_states_3, y = var_3664_cast_fp16)[name = string("x_157")]; int32 var_3672 = const()[name = string("op_3672"), val = int32(-1)]; fp16 const_86_promoted_to_fp16 = const()[name = string("const_86_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3678_cast_fp16 = mul(x = x_157, y = const_86_promoted_to_fp16)[name = string("op_3678_cast_fp16")]; bool input_89_interleave_0 = const()[name = string("input_89_interleave_0"), val = bool(false)]; tensor input_89_cast_fp16 = concat(axis = var_3672, interleave = input_89_interleave_0, values = (x_157, var_3678_cast_fp16))[name = string("input_89_cast_fp16")]; tensor normed_157_axes_0 = const()[name = string("normed_157_axes_0"), val = tensor([-1])]; fp16 var_3670_to_fp16 = const()[name = string("op_3670_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_157_cast_fp16 = layer_norm(axes = normed_157_axes_0, epsilon = var_3670_to_fp16, x = input_89_cast_fp16)[name = string("normed_157_cast_fp16")]; tensor var_3683_split_sizes_0 = const()[name = string("op_3683_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_3683_axis_0 = const()[name = string("op_3683_axis_0"), val = int32(-1)]; tensor var_3683_cast_fp16_0, tensor var_3683_cast_fp16_1 = split(axis = var_3683_axis_0, split_sizes = var_3683_split_sizes_0, x = normed_157_cast_fp16)[name = string("op_3683_cast_fp16")]; tensor const_87_to_fp16 = const()[name = string("const_87_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2251924928)))]; tensor var_3686_cast_fp16 = mul(x = var_3683_cast_fp16_0, y = const_87_to_fp16)[name = string("op_3686_cast_fp16")]; tensor var_3696 = const()[name = string("op_3696"), val = tensor([0, 2, 1])]; tensor input_91_axes_0 = const()[name = string("input_91_axes_0"), val = tensor([2])]; tensor var_3697 = transpose(perm = var_3696, x = var_3686_cast_fp16)[name = string("transpose_353")]; tensor input_91 = expand_dims(axes = input_91_axes_0, x = var_3697)[name = string("input_91")]; string gate_1_pad_type_0 = const()[name = string("gate_1_pad_type_0"), val = string("valid")]; tensor gate_1_strides_0 = const()[name = string("gate_1_strides_0"), val = tensor([1, 1])]; tensor gate_1_pad_0 = const()[name = string("gate_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_1_dilations_0 = const()[name = string("gate_1_dilations_0"), val = tensor([1, 1])]; int32 gate_1_groups_0 = const()[name = string("gate_1_groups_0"), val = int32(1)]; tensor gate_1 = conv(dilations = gate_1_dilations_0, groups = gate_1_groups_0, pad = gate_1_pad_0, pad_type = gate_1_pad_type_0, strides = gate_1_strides_0, weight = layers_0_mlp_gate_proj_weight_palettized, x = input_91)[name = string("gate_1")]; string up_1_pad_type_0 = const()[name = string("up_1_pad_type_0"), val = string("valid")]; tensor up_1_strides_0 = const()[name = string("up_1_strides_0"), val = tensor([1, 1])]; tensor up_1_pad_0 = const()[name = string("up_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_1_dilations_0 = const()[name = string("up_1_dilations_0"), val = tensor([1, 1])]; int32 up_1_groups_0 = const()[name = string("up_1_groups_0"), val = int32(1)]; tensor up_1 = conv(dilations = up_1_dilations_0, groups = up_1_groups_0, pad = up_1_pad_0, pad_type = up_1_pad_type_0, strides = up_1_strides_0, weight = layers_0_mlp_up_proj_weight_palettized, x = input_91)[name = string("up_1")]; string gate_3_mode_0 = const()[name = string("gate_3_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_3 = gelu(mode = gate_3_mode_0, x = gate_1)[name = string("gate_3")]; tensor input_93 = mul(x = gate_3, y = up_1)[name = string("input_93")]; string mlp_out_1_pad_type_0 = const()[name = string("mlp_out_1_pad_type_0"), val = string("valid")]; tensor mlp_out_1_strides_0 = const()[name = string("mlp_out_1_strides_0"), val = tensor([1, 1])]; tensor mlp_out_1_pad_0 = const()[name = string("mlp_out_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_1_dilations_0 = const()[name = string("mlp_out_1_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_1_groups_0 = const()[name = string("mlp_out_1_groups_0"), val = int32(1)]; tensor mlp_out_1 = conv(dilations = mlp_out_1_dilations_0, groups = mlp_out_1_groups_0, pad = mlp_out_1_pad_0, pad_type = mlp_out_1_pad_type_0, strides = mlp_out_1_strides_0, weight = layers_0_mlp_down_proj_weight_palettized, x = input_93)[name = string("mlp_out_1")]; tensor var_3737_axes_0 = const()[name = string("op_3737_axes_0"), val = tensor([2])]; tensor var_3737 = squeeze(axes = var_3737_axes_0, x = mlp_out_1)[name = string("op_3737")]; tensor var_3741 = const()[name = string("op_3741"), val = tensor([0, 2, 1])]; int32 var_3747 = const()[name = string("op_3747"), val = int32(-1)]; fp16 const_88_promoted_to_fp16 = const()[name = string("const_88_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_161 = transpose(perm = var_3741, x = var_3737)[name = string("transpose_352")]; tensor var_3753_cast_fp16 = mul(x = x_161, y = const_88_promoted_to_fp16)[name = string("op_3753_cast_fp16")]; bool input_95_interleave_0 = const()[name = string("input_95_interleave_0"), val = bool(false)]; tensor input_95_cast_fp16 = concat(axis = var_3747, interleave = input_95_interleave_0, values = (x_161, var_3753_cast_fp16))[name = string("input_95_cast_fp16")]; tensor normed_161_axes_0 = const()[name = string("normed_161_axes_0"), val = tensor([-1])]; fp16 var_3745_to_fp16 = const()[name = string("op_3745_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_161_cast_fp16 = layer_norm(axes = normed_161_axes_0, epsilon = var_3745_to_fp16, x = input_95_cast_fp16)[name = string("normed_161_cast_fp16")]; tensor var_3758_split_sizes_0 = const()[name = string("op_3758_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_3758_axis_0 = const()[name = string("op_3758_axis_0"), val = int32(-1)]; tensor var_3758_cast_fp16_0, tensor var_3758_cast_fp16_1 = split(axis = var_3758_axis_0, split_sizes = var_3758_split_sizes_0, x = normed_161_cast_fp16)[name = string("op_3758_cast_fp16")]; tensor const_89_to_fp16 = const()[name = string("const_89_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2251928064)))]; tensor var_3761_cast_fp16 = mul(x = var_3758_cast_fp16_0, y = const_89_to_fp16)[name = string("op_3761_cast_fp16")]; tensor hidden_states_13 = add(x = x_157, y = var_3761_cast_fp16)[name = string("hidden_states_13")]; tensor per_layer_slice_1_begin_0 = const()[name = string("per_layer_slice_1_begin_0"), val = tensor([0, 0, 0])]; tensor per_layer_slice_1_end_0 = const()[name = string("per_layer_slice_1_end_0"), val = tensor([1, 1, 256])]; tensor per_layer_slice_1_end_mask_0 = const()[name = string("per_layer_slice_1_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_1 = slice_by_index(begin = per_layer_slice_1_begin_0, end = per_layer_slice_1_end_0, end_mask = per_layer_slice_1_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_1")]; tensor linear_1_bias_0 = const()[name = string("linear_1_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2251931200)))]; tensor gated_1 = linear(bias = linear_1_bias_0, weight = layers_0_per_layer_input_gate_weight_palettized, x = hidden_states_13)[name = string("linear_1")]; string gated_3_mode_0 = const()[name = string("gated_3_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_3 = gelu(mode = gated_3_mode_0, x = gated_1)[name = string("gated_3")]; tensor input_99 = mul(x = gated_3, y = per_layer_slice_1)[name = string("input_99")]; tensor linear_2_bias_0 = const()[name = string("linear_2_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2251931776)))]; tensor x_165 = linear(bias = linear_2_bias_0, weight = layers_0_per_layer_projection_weight_palettized, x = input_99)[name = string("linear_2")]; int32 var_3798 = const()[name = string("op_3798"), val = int32(-1)]; fp16 const_90_promoted_to_fp16 = const()[name = string("const_90_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3804_cast_fp16 = mul(x = x_165, y = const_90_promoted_to_fp16)[name = string("op_3804_cast_fp16")]; bool input_101_interleave_0 = const()[name = string("input_101_interleave_0"), val = bool(false)]; tensor input_101_cast_fp16 = concat(axis = var_3798, interleave = input_101_interleave_0, values = (x_165, var_3804_cast_fp16))[name = string("input_101_cast_fp16")]; tensor normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor([-1])]; fp16 var_3796_to_fp16 = const()[name = string("op_3796_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_3796_to_fp16, x = input_101_cast_fp16)[name = string("normed_165_cast_fp16")]; tensor var_3809_split_sizes_0 = const()[name = string("op_3809_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_3809_axis_0 = const()[name = string("op_3809_axis_0"), val = int32(-1)]; tensor var_3809_cast_fp16_0, tensor var_3809_cast_fp16_1 = split(axis = var_3809_axis_0, split_sizes = var_3809_split_sizes_0, x = normed_165_cast_fp16)[name = string("op_3809_cast_fp16")]; tensor const_91_to_fp16 = const()[name = string("const_91_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2251934912)))]; tensor var_3812_cast_fp16 = mul(x = var_3809_cast_fp16_0, y = const_91_to_fp16)[name = string("op_3812_cast_fp16")]; tensor hidden_states_17 = add(x = hidden_states_13, y = var_3812_cast_fp16)[name = string("hidden_states_17")]; tensor layers_0_layer_scalar_to_fp16 = const()[name = string("layers_0_layer_scalar_to_fp16"), val = tensor([0x1.24p-6])]; tensor x_169_cast_fp16 = mul(x = hidden_states_17, y = layers_0_layer_scalar_to_fp16)[name = string("x_169_cast_fp16")]; int32 var_3820 = const()[name = string("op_3820"), val = int32(-1)]; fp16 const_92_promoted_to_fp16 = const()[name = string("const_92_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3826_cast_fp16 = mul(x = x_169_cast_fp16, y = const_92_promoted_to_fp16)[name = string("op_3826_cast_fp16")]; bool input_103_interleave_0 = const()[name = string("input_103_interleave_0"), val = bool(false)]; tensor input_103_cast_fp16 = concat(axis = var_3820, interleave = input_103_interleave_0, values = (x_169_cast_fp16, var_3826_cast_fp16))[name = string("input_103_cast_fp16")]; tensor normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor([-1])]; fp16 var_3818_to_fp16 = const()[name = string("op_3818_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_3818_to_fp16, x = input_103_cast_fp16)[name = string("normed_169_cast_fp16")]; tensor var_3831_split_sizes_0 = const()[name = string("op_3831_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_3831_axis_0 = const()[name = string("op_3831_axis_0"), val = int32(-1)]; tensor var_3831_cast_fp16_0, tensor var_3831_cast_fp16_1 = split(axis = var_3831_axis_0, split_sizes = var_3831_split_sizes_0, x = normed_169_cast_fp16)[name = string("op_3831_cast_fp16")]; tensor const_93_to_fp16 = const()[name = string("const_93_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2251938048)))]; tensor var_3834_cast_fp16 = mul(x = var_3831_cast_fp16_0, y = const_93_to_fp16)[name = string("op_3834_cast_fp16")]; tensor var_3842 = const()[name = string("op_3842"), val = tensor([0, 2, 1])]; tensor var_3845_axes_0 = const()[name = string("op_3845_axes_0"), val = tensor([2])]; tensor var_3843_cast_fp16 = transpose(perm = var_3842, x = var_3834_cast_fp16)[name = string("transpose_351")]; tensor var_3845_cast_fp16 = expand_dims(axes = var_3845_axes_0, x = var_3843_cast_fp16)[name = string("op_3845_cast_fp16")]; string var_3861_pad_type_0 = const()[name = string("op_3861_pad_type_0"), val = string("valid")]; tensor var_3861_strides_0 = const()[name = string("op_3861_strides_0"), val = tensor([1, 1])]; tensor var_3861_pad_0 = const()[name = string("op_3861_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3861_dilations_0 = const()[name = string("op_3861_dilations_0"), val = tensor([1, 1])]; int32 var_3861_groups_0 = const()[name = string("op_3861_groups_0"), val = int32(1)]; tensor var_3861 = conv(dilations = var_3861_dilations_0, groups = var_3861_groups_0, pad = var_3861_pad_0, pad_type = var_3861_pad_type_0, strides = var_3861_strides_0, weight = layers_1_self_attn_q_proj_weight_palettized, x = var_3845_cast_fp16)[name = string("op_3861")]; tensor var_3866 = const()[name = string("op_3866"), val = tensor([1, 8, 256, 1])]; tensor var_3867 = reshape(shape = var_3866, x = var_3861)[name = string("op_3867")]; tensor var_3872 = const()[name = string("op_3872"), val = tensor([0, 1, 3, 2])]; tensor var_3882 = const()[name = string("op_3882"), val = tensor([1, 8, 256])]; tensor var_3873 = transpose(perm = var_3872, x = var_3867)[name = string("transpose_350")]; tensor x_173 = reshape(shape = var_3882, x = var_3873)[name = string("x_173")]; int32 var_3888 = const()[name = string("op_3888"), val = int32(-1)]; fp16 const_94_promoted_to_fp16 = const()[name = string("const_94_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3894_cast_fp16 = mul(x = x_173, y = const_94_promoted_to_fp16)[name = string("op_3894_cast_fp16")]; bool input_107_interleave_0 = const()[name = string("input_107_interleave_0"), val = bool(false)]; tensor input_107_cast_fp16 = concat(axis = var_3888, interleave = input_107_interleave_0, values = (x_173, var_3894_cast_fp16))[name = string("input_107_cast_fp16")]; tensor normed_173_axes_0 = const()[name = string("normed_173_axes_0"), val = tensor([-1])]; fp16 var_3886_to_fp16 = const()[name = string("op_3886_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_173_cast_fp16 = layer_norm(axes = normed_173_axes_0, epsilon = var_3886_to_fp16, x = input_107_cast_fp16)[name = string("normed_173_cast_fp16")]; tensor var_3899_split_sizes_0 = const()[name = string("op_3899_split_sizes_0"), val = tensor([256, 256])]; int32 var_3899_axis_0 = const()[name = string("op_3899_axis_0"), val = int32(-1)]; tensor var_3899_cast_fp16_0, tensor var_3899_cast_fp16_1 = split(axis = var_3899_axis_0, split_sizes = var_3899_split_sizes_0, x = normed_173_cast_fp16)[name = string("op_3899_cast_fp16")]; tensor const_95_to_fp16 = const()[name = string("const_95_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2251941184)))]; tensor var_3902_cast_fp16 = mul(x = var_3899_cast_fp16_0, y = const_95_to_fp16)[name = string("op_3902_cast_fp16")]; tensor var_3908 = const()[name = string("op_3908"), val = tensor([1, 8, 1, 256])]; tensor q_11 = reshape(shape = var_3908, x = var_3902_cast_fp16)[name = string("q_11")]; tensor var_3910 = mul(x = q_11, y = cos_1)[name = string("op_3910")]; tensor var_3911_split_sizes_0 = const()[name = string("op_3911_split_sizes_0"), val = tensor([128, 128])]; int32 var_3911_axis_0 = const()[name = string("op_3911_axis_0"), val = int32(-1)]; tensor var_3911_0, tensor var_3911_1 = split(axis = var_3911_axis_0, split_sizes = var_3911_split_sizes_0, x = q_11)[name = string("op_3911")]; fp16 const_96_promoted = const()[name = string("const_96_promoted"), val = fp16(-0x1p+0)]; tensor var_3913 = mul(x = var_3911_1, y = const_96_promoted)[name = string("op_3913")]; int32 var_3915 = const()[name = string("op_3915"), val = int32(-1)]; bool var_3916_interleave_0 = const()[name = string("op_3916_interleave_0"), val = bool(false)]; tensor var_3916 = concat(axis = var_3915, interleave = var_3916_interleave_0, values = (var_3913, var_3911_0))[name = string("op_3916")]; tensor var_3917 = mul(x = var_3916, y = sin_1)[name = string("op_3917")]; tensor q_15 = add(x = var_3910, y = var_3917)[name = string("q_15")]; string var_3930_pad_type_0 = const()[name = string("op_3930_pad_type_0"), val = string("valid")]; tensor var_3930_strides_0 = const()[name = string("op_3930_strides_0"), val = tensor([1, 1])]; tensor var_3930_pad_0 = const()[name = string("op_3930_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3930_dilations_0 = const()[name = string("op_3930_dilations_0"), val = tensor([1, 1])]; int32 var_3930_groups_0 = const()[name = string("op_3930_groups_0"), val = int32(1)]; tensor var_3930 = conv(dilations = var_3930_dilations_0, groups = var_3930_groups_0, pad = var_3930_pad_0, pad_type = var_3930_pad_type_0, strides = var_3930_strides_0, weight = layers_1_self_attn_k_proj_weight_palettized, x = var_3845_cast_fp16)[name = string("op_3930")]; tensor var_3935 = const()[name = string("op_3935"), val = tensor([1, 1, 256, 1])]; tensor var_3936 = reshape(shape = var_3935, x = var_3930)[name = string("op_3936")]; tensor var_3941 = const()[name = string("op_3941"), val = tensor([0, 1, 3, 2])]; string var_3958_pad_type_0 = const()[name = string("op_3958_pad_type_0"), val = string("valid")]; tensor var_3958_strides_0 = const()[name = string("op_3958_strides_0"), val = tensor([1, 1])]; tensor var_3958_pad_0 = const()[name = string("op_3958_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3958_dilations_0 = const()[name = string("op_3958_dilations_0"), val = tensor([1, 1])]; int32 var_3958_groups_0 = const()[name = string("op_3958_groups_0"), val = int32(1)]; tensor var_3958 = conv(dilations = var_3958_dilations_0, groups = var_3958_groups_0, pad = var_3958_pad_0, pad_type = var_3958_pad_type_0, strides = var_3958_strides_0, weight = layers_1_self_attn_v_proj_weight_palettized, x = var_3845_cast_fp16)[name = string("op_3958")]; tensor var_3963 = const()[name = string("op_3963"), val = tensor([1, 1, 256, 1])]; tensor var_3964 = reshape(shape = var_3963, x = var_3958)[name = string("op_3964")]; tensor var_3969 = const()[name = string("op_3969"), val = tensor([0, 1, 3, 2])]; tensor var_3979 = const()[name = string("op_3979"), val = tensor([1, 1, 256])]; tensor var_3942 = transpose(perm = var_3941, x = var_3936)[name = string("transpose_349")]; tensor x_177 = reshape(shape = var_3979, x = var_3942)[name = string("x_177")]; int32 var_3985 = const()[name = string("op_3985"), val = int32(-1)]; fp16 const_97_promoted_to_fp16 = const()[name = string("const_97_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3991_cast_fp16 = mul(x = x_177, y = const_97_promoted_to_fp16)[name = string("op_3991_cast_fp16")]; bool input_109_interleave_0 = const()[name = string("input_109_interleave_0"), val = bool(false)]; tensor input_109_cast_fp16 = concat(axis = var_3985, interleave = input_109_interleave_0, values = (x_177, var_3991_cast_fp16))[name = string("input_109_cast_fp16")]; tensor normed_177_axes_0 = const()[name = string("normed_177_axes_0"), val = tensor([-1])]; fp16 var_3983_to_fp16 = const()[name = string("op_3983_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_177_cast_fp16 = layer_norm(axes = normed_177_axes_0, epsilon = var_3983_to_fp16, x = input_109_cast_fp16)[name = string("normed_177_cast_fp16")]; tensor var_3996_split_sizes_0 = const()[name = string("op_3996_split_sizes_0"), val = tensor([256, 256])]; int32 var_3996_axis_0 = const()[name = string("op_3996_axis_0"), val = int32(-1)]; tensor var_3996_cast_fp16_0, tensor var_3996_cast_fp16_1 = split(axis = var_3996_axis_0, split_sizes = var_3996_split_sizes_0, x = normed_177_cast_fp16)[name = string("op_3996_cast_fp16")]; tensor const_98_to_fp16 = const()[name = string("const_98_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2251941760)))]; tensor var_3999_cast_fp16 = mul(x = var_3996_cast_fp16_0, y = const_98_to_fp16)[name = string("op_3999_cast_fp16")]; tensor var_4005 = const()[name = string("op_4005"), val = tensor([1, 1, 1, 256])]; tensor q_13 = reshape(shape = var_4005, x = var_3999_cast_fp16)[name = string("q_13")]; fp16 var_4012_promoted_to_fp16 = const()[name = string("op_4012_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3970 = transpose(perm = var_3969, x = var_3964)[name = string("transpose_348")]; tensor var_4013_cast_fp16 = pow(x = var_3970, y = var_4012_promoted_to_fp16)[name = string("op_4013_cast_fp16")]; tensor var_4018_axes_0 = const()[name = string("op_4018_axes_0"), val = tensor([-1])]; bool var_4018_keep_dims_0 = const()[name = string("op_4018_keep_dims_0"), val = bool(true)]; tensor var_4018_cast_fp16 = reduce_mean(axes = var_4018_axes_0, keep_dims = var_4018_keep_dims_0, x = var_4013_cast_fp16)[name = string("op_4018_cast_fp16")]; fp16 var_4020_to_fp16 = const()[name = string("op_4020_to_fp16"), val = fp16(0x1.1p-20)]; tensor mean_sq_3_cast_fp16 = add(x = var_4018_cast_fp16, y = var_4020_to_fp16)[name = string("mean_sq_3_cast_fp16")]; fp16 var_4027_to_fp16 = const()[name = string("op_4027_to_fp16"), val = fp16(-0x1p-1)]; tensor var_4028_cast_fp16 = pow(x = mean_sq_3_cast_fp16, y = var_4027_to_fp16)[name = string("op_4028_cast_fp16")]; tensor var_4029_cast_fp16 = mul(x = var_3970, y = var_4028_cast_fp16)[name = string("op_4029_cast_fp16")]; tensor var_4035 = mul(x = q_13, y = cos_1)[name = string("op_4035")]; tensor var_4036_split_sizes_0 = const()[name = string("op_4036_split_sizes_0"), val = tensor([128, 128])]; int32 var_4036_axis_0 = const()[name = string("op_4036_axis_0"), val = int32(-1)]; tensor var_4036_0, tensor var_4036_1 = split(axis = var_4036_axis_0, split_sizes = var_4036_split_sizes_0, x = q_13)[name = string("op_4036")]; fp16 const_99_promoted = const()[name = string("const_99_promoted"), val = fp16(-0x1p+0)]; tensor var_4038 = mul(x = var_4036_1, y = const_99_promoted)[name = string("op_4038")]; int32 var_4040 = const()[name = string("op_4040"), val = int32(-1)]; bool var_4041_interleave_0 = const()[name = string("op_4041_interleave_0"), val = bool(false)]; tensor var_4041 = concat(axis = var_4040, interleave = var_4041_interleave_0, values = (var_4038, var_4036_0))[name = string("op_4041")]; tensor var_4042 = mul(x = var_4041, y = sin_1)[name = string("op_4042")]; tensor input_111 = add(x = var_4035, y = var_4042)[name = string("input_111")]; tensor var_4047_begin_0 = const()[name = string("op_4047_begin_0"), val = tensor([1, 0, 0, 0])]; tensor var_4047_end_0 = const()[name = string("op_4047_end_0"), val = tensor([2, 1, 512, 512])]; tensor var_4047_end_mask_0 = const()[name = string("op_4047_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4047_squeeze_mask_0 = const()[name = string("op_4047_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_4047_cast_fp16 = slice_by_index(begin = var_4047_begin_0, end = var_4047_end_0, end_mask = var_4047_end_mask_0, squeeze_mask = var_4047_squeeze_mask_0, x = coreml_update_state_31)[name = string("op_4047_cast_fp16")]; tensor K_cache_3_axes_0 = const()[name = string("K_cache_3_axes_0"), val = tensor([0])]; tensor K_cache_3_cast_fp16 = expand_dims(axes = K_cache_3_axes_0, x = var_4047_cast_fp16)[name = string("K_cache_3_cast_fp16")]; tensor var_4052_begin_0 = const()[name = string("op_4052_begin_0"), val = tensor([36, 0, 0, 0])]; tensor var_4052_end_0 = const()[name = string("op_4052_end_0"), val = tensor([37, 1, 512, 512])]; tensor var_4052_end_mask_0 = const()[name = string("op_4052_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4052_squeeze_mask_0 = const()[name = string("op_4052_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_4052_cast_fp16 = slice_by_index(begin = var_4052_begin_0, end = var_4052_end_0, end_mask = var_4052_end_mask_0, squeeze_mask = var_4052_squeeze_mask_0, x = coreml_update_state_31)[name = string("op_4052_cast_fp16")]; tensor V_cache_3_axes_0 = const()[name = string("V_cache_3_axes_0"), val = tensor([0])]; tensor V_cache_3_cast_fp16 = expand_dims(axes = V_cache_3_axes_0, x = var_4052_cast_fp16)[name = string("V_cache_3_cast_fp16")]; tensor k_padded_3_pad_0 = const()[name = string("k_padded_3_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; string k_padded_3_mode_0 = const()[name = string("k_padded_3_mode_0"), val = string("constant")]; fp16 const_100_to_fp16 = const()[name = string("const_100_to_fp16"), val = fp16(0x0p+0)]; tensor k_padded_3_cast_fp16 = pad(constant_val = const_100_to_fp16, mode = k_padded_3_mode_0, pad = k_padded_3_pad_0, x = input_111)[name = string("k_padded_3_cast_fp16")]; tensor v_padded_3_pad_0 = const()[name = string("v_padded_3_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; string v_padded_3_mode_0 = const()[name = string("v_padded_3_mode_0"), val = string("constant")]; fp16 const_101_to_fp16 = const()[name = string("const_101_to_fp16"), val = fp16(0x0p+0)]; tensor v_padded_3_cast_fp16 = pad(constant_val = const_101_to_fp16, mode = v_padded_3_mode_0, pad = v_padded_3_pad_0, x = var_4029_cast_fp16)[name = string("v_padded_3_cast_fp16")]; tensor var_4070_cast_fp16 = mul(x = K_cache_3_cast_fp16, y = var_3515_cast_fp16)[name = string("op_4070_cast_fp16")]; tensor var_4071_reps_0 = const()[name = string("op_4071_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_4071_cast_fp16 = tile(reps = var_4071_reps_0, x = k_padded_3_cast_fp16)[name = string("op_4071_cast_fp16")]; tensor var_4072_cast_fp16 = mul(x = var_4071_cast_fp16, y = update_mask)[name = string("op_4072_cast_fp16")]; tensor K_new_3_cast_fp16 = add(x = var_4070_cast_fp16, y = var_4072_cast_fp16)[name = string("K_new_3_cast_fp16")]; tensor var_4078_cast_fp16 = mul(x = V_cache_3_cast_fp16, y = var_3515_cast_fp16)[name = string("op_4078_cast_fp16")]; tensor var_4079_reps_0 = const()[name = string("op_4079_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_4079_cast_fp16 = tile(reps = var_4079_reps_0, x = v_padded_3_cast_fp16)[name = string("op_4079_cast_fp16")]; tensor var_4080_cast_fp16 = mul(x = var_4079_cast_fp16, y = update_mask)[name = string("op_4080_cast_fp16")]; tensor V_new_3_cast_fp16 = add(x = var_4078_cast_fp16, y = var_4080_cast_fp16)[name = string("V_new_3_cast_fp16")]; tensor var_4084_axes_0 = const()[name = string("op_4084_axes_0"), val = tensor([0])]; tensor var_4084_cast_fp16 = squeeze(axes = var_4084_axes_0, x = K_new_3_cast_fp16)[name = string("op_4084_cast_fp16")]; tensor concat_8 = const()[name = string("concat_8"), val = tensor([1, 0, 0, 0])]; tensor concat_9 = const()[name = string("concat_9"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_8, begin_mask = kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_9, end_mask = kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_3_stride_0, update = var_4084_cast_fp16, x = coreml_update_state_31)[name = string("kv_cache_0_internal_tensor_assign_3_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_3_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_32_write_state")]; tensor coreml_update_state_32 = read_state(input = kv_cache_0)[name = string("coreml_update_state_32")]; tensor var_4091_axes_0 = const()[name = string("op_4091_axes_0"), val = tensor([0])]; tensor var_4091_cast_fp16 = squeeze(axes = var_4091_axes_0, x = V_new_3_cast_fp16)[name = string("op_4091_cast_fp16")]; tensor concat_10 = const()[name = string("concat_10"), val = tensor([36, 0, 0, 0])]; tensor concat_11 = const()[name = string("concat_11"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_10, begin_mask = kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_11, end_mask = kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_4_stride_0, update = var_4091_cast_fp16, x = coreml_update_state_32)[name = string("kv_cache_0_internal_tensor_assign_4_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_4_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_33_write_state")]; tensor coreml_update_state_33 = read_state(input = kv_cache_0)[name = string("coreml_update_state_33")]; tensor K_for_attn_3_begin_0 = const()[name = string("K_for_attn_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor K_for_attn_3_end_0 = const()[name = string("K_for_attn_3_end_0"), val = tensor([1, 1, 512, 256])]; tensor K_for_attn_3_end_mask_0 = const()[name = string("K_for_attn_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor K_for_attn_3_cast_fp16 = slice_by_index(begin = K_for_attn_3_begin_0, end = K_for_attn_3_end_0, end_mask = K_for_attn_3_end_mask_0, x = K_new_3_cast_fp16)[name = string("K_for_attn_3_cast_fp16")]; tensor V_for_attn_3_begin_0 = const()[name = string("V_for_attn_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor V_for_attn_3_end_0 = const()[name = string("V_for_attn_3_end_0"), val = tensor([1, 1, 512, 256])]; tensor V_for_attn_3_end_mask_0 = const()[name = string("V_for_attn_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor V_for_attn_3_cast_fp16 = slice_by_index(begin = V_for_attn_3_begin_0, end = V_for_attn_3_end_0, end_mask = V_for_attn_3_end_mask_0, x = V_new_3_cast_fp16)[name = string("V_for_attn_3_cast_fp16")]; tensor transpose_4_perm_0 = const()[name = string("transpose_4_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_2_reps_0 = const()[name = string("tile_2_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_4_cast_fp16 = transpose(perm = transpose_4_perm_0, x = K_for_attn_3_cast_fp16)[name = string("transpose_347")]; tensor tile_2_cast_fp16 = tile(reps = tile_2_reps_0, x = transpose_4_cast_fp16)[name = string("tile_2_cast_fp16")]; tensor concat_12 = const()[name = string("concat_12"), val = tensor([8, 1, 1, 512, 256])]; tensor reshape_4_cast_fp16 = reshape(shape = concat_12, x = tile_2_cast_fp16)[name = string("reshape_4_cast_fp16")]; tensor transpose_5_perm_0 = const()[name = string("transpose_5_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_13 = const()[name = string("concat_13"), val = tensor([-1, 1, 512, 256])]; tensor transpose_5_cast_fp16 = transpose(perm = transpose_5_perm_0, x = reshape_4_cast_fp16)[name = string("transpose_346")]; tensor reshape_5_cast_fp16 = reshape(shape = concat_13, x = transpose_5_cast_fp16)[name = string("reshape_5_cast_fp16")]; tensor transpose_141_perm_0 = const()[name = string("transpose_141_perm_0"), val = tensor([1, 0, -1, -2])]; tensor transpose_6_perm_0 = const()[name = string("transpose_6_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_3_reps_0 = const()[name = string("tile_3_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_6_cast_fp16 = transpose(perm = transpose_6_perm_0, x = V_for_attn_3_cast_fp16)[name = string("transpose_345")]; tensor tile_3_cast_fp16 = tile(reps = tile_3_reps_0, x = transpose_6_cast_fp16)[name = string("tile_3_cast_fp16")]; tensor concat_14 = const()[name = string("concat_14"), val = tensor([8, 1, 1, 512, 256])]; tensor reshape_6_cast_fp16 = reshape(shape = concat_14, x = tile_3_cast_fp16)[name = string("reshape_6_cast_fp16")]; tensor transpose_7_perm_0 = const()[name = string("transpose_7_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_15 = const()[name = string("concat_15"), val = tensor([-1, 1, 512, 256])]; tensor transpose_7_cast_fp16 = transpose(perm = transpose_7_perm_0, x = reshape_6_cast_fp16)[name = string("transpose_344")]; tensor reshape_7_cast_fp16 = reshape(shape = concat_15, x = transpose_7_cast_fp16)[name = string("reshape_7_cast_fp16")]; tensor V_expanded_3_perm_0 = const()[name = string("V_expanded_3_perm_0"), val = tensor([1, 0, -2, -1])]; bool var_4128_transpose_x_0 = const()[name = string("op_4128_transpose_x_0"), val = bool(false)]; bool var_4128_transpose_y_0 = const()[name = string("op_4128_transpose_y_0"), val = bool(false)]; tensor transpose_141_cast_fp16 = transpose(perm = transpose_141_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_343")]; tensor var_4128_cast_fp16 = matmul(transpose_x = var_4128_transpose_x_0, transpose_y = var_4128_transpose_y_0, x = q_15, y = transpose_141_cast_fp16)[name = string("op_4128_cast_fp16")]; tensor attn_weights_9_cast_fp16 = add(x = var_4128_cast_fp16, y = causal_mask)[name = string("attn_weights_9_cast_fp16")]; int32 var_4138 = const()[name = string("op_4138"), val = int32(-1)]; tensor var_4140_cast_fp16 = softmax(axis = var_4138, x = attn_weights_9_cast_fp16)[name = string("op_4140_cast_fp16")]; bool var_4156_transpose_x_0 = const()[name = string("op_4156_transpose_x_0"), val = bool(false)]; bool var_4156_transpose_y_0 = const()[name = string("op_4156_transpose_y_0"), val = bool(false)]; tensor V_expanded_3_cast_fp16 = transpose(perm = V_expanded_3_perm_0, x = reshape_7_cast_fp16)[name = string("transpose_342")]; tensor var_4156_cast_fp16 = matmul(transpose_x = var_4156_transpose_x_0, transpose_y = var_4156_transpose_y_0, x = var_4140_cast_fp16, y = V_expanded_3_cast_fp16)[name = string("op_4156_cast_fp16")]; tensor var_4166 = const()[name = string("op_4166"), val = tensor([0, 2, 1, 3])]; tensor var_4173 = const()[name = string("op_4173"), val = tensor([1, 1, -1])]; tensor var_4167 = transpose(perm = var_4166, x = var_4156_cast_fp16)[name = string("transpose_341")]; tensor attn_output_9 = reshape(shape = var_4173, x = var_4167)[name = string("attn_output_9")]; tensor var_4178 = const()[name = string("op_4178"), val = tensor([0, 2, 1])]; tensor squeeze_1_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2251942336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2253515264))))[name = string("squeeze_1_palettized")]; string var_4194_pad_type_0 = const()[name = string("op_4194_pad_type_0"), val = string("valid")]; int32 var_4194_groups_0 = const()[name = string("op_4194_groups_0"), val = int32(1)]; tensor var_4194_strides_0 = const()[name = string("op_4194_strides_0"), val = tensor([1])]; tensor var_4194_pad_0 = const()[name = string("op_4194_pad_0"), val = tensor([0, 0])]; tensor var_4194_dilations_0 = const()[name = string("op_4194_dilations_0"), val = tensor([1])]; tensor var_4179 = transpose(perm = var_4178, x = attn_output_9)[name = string("transpose_340")]; tensor var_4194 = conv(dilations = var_4194_dilations_0, groups = var_4194_groups_0, pad = var_4194_pad_0, pad_type = var_4194_pad_type_0, strides = var_4194_strides_0, weight = squeeze_1_palettized, x = var_4179)[name = string("op_4194")]; tensor var_4198 = const()[name = string("op_4198"), val = tensor([0, 2, 1])]; int32 var_4204 = const()[name = string("op_4204"), val = int32(-1)]; fp16 const_102_promoted_to_fp16 = const()[name = string("const_102_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_183 = transpose(perm = var_4198, x = var_4194)[name = string("transpose_339")]; tensor var_4210_cast_fp16 = mul(x = x_183, y = const_102_promoted_to_fp16)[name = string("op_4210_cast_fp16")]; bool input_117_interleave_0 = const()[name = string("input_117_interleave_0"), val = bool(false)]; tensor input_117_cast_fp16 = concat(axis = var_4204, interleave = input_117_interleave_0, values = (x_183, var_4210_cast_fp16))[name = string("input_117_cast_fp16")]; tensor normed_181_axes_0 = const()[name = string("normed_181_axes_0"), val = tensor([-1])]; fp16 var_4202_to_fp16 = const()[name = string("op_4202_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_181_cast_fp16 = layer_norm(axes = normed_181_axes_0, epsilon = var_4202_to_fp16, x = input_117_cast_fp16)[name = string("normed_181_cast_fp16")]; tensor var_4215_split_sizes_0 = const()[name = string("op_4215_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_4215_axis_0 = const()[name = string("op_4215_axis_0"), val = int32(-1)]; tensor var_4215_cast_fp16_0, tensor var_4215_cast_fp16_1 = split(axis = var_4215_axis_0, split_sizes = var_4215_split_sizes_0, x = normed_181_cast_fp16)[name = string("op_4215_cast_fp16")]; tensor const_103_to_fp16 = const()[name = string("const_103_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2253516864)))]; tensor var_4218_cast_fp16 = mul(x = var_4215_cast_fp16_0, y = const_103_to_fp16)[name = string("op_4218_cast_fp16")]; tensor x_187_cast_fp16 = add(x = x_169_cast_fp16, y = var_4218_cast_fp16)[name = string("x_187_cast_fp16")]; int32 var_4225 = const()[name = string("op_4225"), val = int32(-1)]; fp16 const_104_promoted_to_fp16 = const()[name = string("const_104_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4231_cast_fp16 = mul(x = x_187_cast_fp16, y = const_104_promoted_to_fp16)[name = string("op_4231_cast_fp16")]; bool input_119_interleave_0 = const()[name = string("input_119_interleave_0"), val = bool(false)]; tensor input_119_cast_fp16 = concat(axis = var_4225, interleave = input_119_interleave_0, values = (x_187_cast_fp16, var_4231_cast_fp16))[name = string("input_119_cast_fp16")]; tensor normed_185_axes_0 = const()[name = string("normed_185_axes_0"), val = tensor([-1])]; fp16 var_4223_to_fp16 = const()[name = string("op_4223_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_185_cast_fp16 = layer_norm(axes = normed_185_axes_0, epsilon = var_4223_to_fp16, x = input_119_cast_fp16)[name = string("normed_185_cast_fp16")]; tensor var_4236_split_sizes_0 = const()[name = string("op_4236_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_4236_axis_0 = const()[name = string("op_4236_axis_0"), val = int32(-1)]; tensor var_4236_cast_fp16_0, tensor var_4236_cast_fp16_1 = split(axis = var_4236_axis_0, split_sizes = var_4236_split_sizes_0, x = normed_185_cast_fp16)[name = string("op_4236_cast_fp16")]; tensor const_105_to_fp16 = const()[name = string("const_105_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2253520000)))]; tensor var_4239_cast_fp16 = mul(x = var_4236_cast_fp16_0, y = const_105_to_fp16)[name = string("op_4239_cast_fp16")]; tensor var_4252 = const()[name = string("op_4252"), val = tensor([0, 2, 1])]; tensor input_121_axes_0 = const()[name = string("input_121_axes_0"), val = tensor([2])]; tensor var_4253 = transpose(perm = var_4252, x = var_4239_cast_fp16)[name = string("transpose_338")]; tensor input_121 = expand_dims(axes = input_121_axes_0, x = var_4253)[name = string("input_121")]; string gate_5_pad_type_0 = const()[name = string("gate_5_pad_type_0"), val = string("valid")]; tensor gate_5_strides_0 = const()[name = string("gate_5_strides_0"), val = tensor([1, 1])]; tensor gate_5_pad_0 = const()[name = string("gate_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_5_dilations_0 = const()[name = string("gate_5_dilations_0"), val = tensor([1, 1])]; int32 gate_5_groups_0 = const()[name = string("gate_5_groups_0"), val = int32(1)]; tensor gate_5 = conv(dilations = gate_5_dilations_0, groups = gate_5_groups_0, pad = gate_5_pad_0, pad_type = gate_5_pad_type_0, strides = gate_5_strides_0, weight = layers_1_mlp_gate_proj_weight_palettized, x = input_121)[name = string("gate_5")]; string up_3_pad_type_0 = const()[name = string("up_3_pad_type_0"), val = string("valid")]; tensor up_3_strides_0 = const()[name = string("up_3_strides_0"), val = tensor([1, 1])]; tensor up_3_pad_0 = const()[name = string("up_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_3_dilations_0 = const()[name = string("up_3_dilations_0"), val = tensor([1, 1])]; int32 up_3_groups_0 = const()[name = string("up_3_groups_0"), val = int32(1)]; tensor up_3 = conv(dilations = up_3_dilations_0, groups = up_3_groups_0, pad = up_3_pad_0, pad_type = up_3_pad_type_0, strides = up_3_strides_0, weight = layers_1_mlp_up_proj_weight_palettized, x = input_121)[name = string("up_3")]; string gate_7_mode_0 = const()[name = string("gate_7_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_7 = gelu(mode = gate_7_mode_0, x = gate_5)[name = string("gate_7")]; tensor input_123 = mul(x = gate_7, y = up_3)[name = string("input_123")]; string mlp_out_3_pad_type_0 = const()[name = string("mlp_out_3_pad_type_0"), val = string("valid")]; tensor mlp_out_3_strides_0 = const()[name = string("mlp_out_3_strides_0"), val = tensor([1, 1])]; tensor mlp_out_3_pad_0 = const()[name = string("mlp_out_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_3_dilations_0 = const()[name = string("mlp_out_3_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_3_groups_0 = const()[name = string("mlp_out_3_groups_0"), val = int32(1)]; tensor mlp_out_3 = conv(dilations = mlp_out_3_dilations_0, groups = mlp_out_3_groups_0, pad = mlp_out_3_pad_0, pad_type = mlp_out_3_pad_type_0, strides = mlp_out_3_strides_0, weight = layers_1_mlp_down_proj_weight_palettized, x = input_123)[name = string("mlp_out_3")]; tensor var_4293_axes_0 = const()[name = string("op_4293_axes_0"), val = tensor([2])]; tensor var_4293 = squeeze(axes = var_4293_axes_0, x = mlp_out_3)[name = string("op_4293")]; tensor var_4297 = const()[name = string("op_4297"), val = tensor([0, 2, 1])]; int32 var_4303 = const()[name = string("op_4303"), val = int32(-1)]; fp16 const_106_promoted_to_fp16 = const()[name = string("const_106_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_191 = transpose(perm = var_4297, x = var_4293)[name = string("transpose_337")]; tensor var_4309_cast_fp16 = mul(x = x_191, y = const_106_promoted_to_fp16)[name = string("op_4309_cast_fp16")]; bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)]; tensor input_125_cast_fp16 = concat(axis = var_4303, interleave = input_125_interleave_0, values = (x_191, var_4309_cast_fp16))[name = string("input_125_cast_fp16")]; tensor normed_189_axes_0 = const()[name = string("normed_189_axes_0"), val = tensor([-1])]; fp16 var_4301_to_fp16 = const()[name = string("op_4301_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_189_cast_fp16 = layer_norm(axes = normed_189_axes_0, epsilon = var_4301_to_fp16, x = input_125_cast_fp16)[name = string("normed_189_cast_fp16")]; tensor var_4314_split_sizes_0 = const()[name = string("op_4314_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_4314_axis_0 = const()[name = string("op_4314_axis_0"), val = int32(-1)]; tensor var_4314_cast_fp16_0, tensor var_4314_cast_fp16_1 = split(axis = var_4314_axis_0, split_sizes = var_4314_split_sizes_0, x = normed_189_cast_fp16)[name = string("op_4314_cast_fp16")]; tensor const_107_to_fp16 = const()[name = string("const_107_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2253523136)))]; tensor var_4317_cast_fp16 = mul(x = var_4314_cast_fp16_0, y = const_107_to_fp16)[name = string("op_4317_cast_fp16")]; tensor hidden_states_25_cast_fp16 = add(x = x_187_cast_fp16, y = var_4317_cast_fp16)[name = string("hidden_states_25_cast_fp16")]; tensor per_layer_slice_3_begin_0 = const()[name = string("per_layer_slice_3_begin_0"), val = tensor([0, 0, 256])]; tensor per_layer_slice_3_end_0 = const()[name = string("per_layer_slice_3_end_0"), val = tensor([1, 1, 512])]; tensor per_layer_slice_3_end_mask_0 = const()[name = string("per_layer_slice_3_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_3 = slice_by_index(begin = per_layer_slice_3_begin_0, end = per_layer_slice_3_end_0, end_mask = per_layer_slice_3_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_3")]; tensor gated_5 = linear(bias = linear_1_bias_0, weight = layers_1_per_layer_input_gate_weight_palettized, x = hidden_states_25_cast_fp16)[name = string("linear_3")]; string gated_7_mode_0 = const()[name = string("gated_7_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_7 = gelu(mode = gated_7_mode_0, x = gated_5)[name = string("gated_7")]; tensor input_129 = mul(x = gated_7, y = per_layer_slice_3)[name = string("input_129")]; tensor x_195 = linear(bias = linear_2_bias_0, weight = layers_1_per_layer_projection_weight_palettized, x = input_129)[name = string("linear_4")]; int32 var_4354 = const()[name = string("op_4354"), val = int32(-1)]; fp16 const_108_promoted_to_fp16 = const()[name = string("const_108_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4360_cast_fp16 = mul(x = x_195, y = const_108_promoted_to_fp16)[name = string("op_4360_cast_fp16")]; bool input_131_interleave_0 = const()[name = string("input_131_interleave_0"), val = bool(false)]; tensor input_131_cast_fp16 = concat(axis = var_4354, interleave = input_131_interleave_0, values = (x_195, var_4360_cast_fp16))[name = string("input_131_cast_fp16")]; tensor normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor([-1])]; fp16 var_4352_to_fp16 = const()[name = string("op_4352_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_4352_to_fp16, x = input_131_cast_fp16)[name = string("normed_193_cast_fp16")]; tensor var_4365_split_sizes_0 = const()[name = string("op_4365_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_4365_axis_0 = const()[name = string("op_4365_axis_0"), val = int32(-1)]; tensor var_4365_cast_fp16_0, tensor var_4365_cast_fp16_1 = split(axis = var_4365_axis_0, split_sizes = var_4365_split_sizes_0, x = normed_193_cast_fp16)[name = string("op_4365_cast_fp16")]; tensor const_109_to_fp16 = const()[name = string("const_109_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2253526272)))]; tensor var_4368_cast_fp16 = mul(x = var_4365_cast_fp16_0, y = const_109_to_fp16)[name = string("op_4368_cast_fp16")]; tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = var_4368_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; tensor layers_1_layer_scalar_to_fp16 = const()[name = string("layers_1_layer_scalar_to_fp16"), val = tensor([0x1.c8p-3])]; tensor x_199_cast_fp16 = mul(x = hidden_states_29_cast_fp16, y = layers_1_layer_scalar_to_fp16)[name = string("x_199_cast_fp16")]; int32 var_4376 = const()[name = string("op_4376"), val = int32(-1)]; fp16 const_110_promoted_to_fp16 = const()[name = string("const_110_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4382_cast_fp16 = mul(x = x_199_cast_fp16, y = const_110_promoted_to_fp16)[name = string("op_4382_cast_fp16")]; bool input_133_interleave_0 = const()[name = string("input_133_interleave_0"), val = bool(false)]; tensor input_133_cast_fp16 = concat(axis = var_4376, interleave = input_133_interleave_0, values = (x_199_cast_fp16, var_4382_cast_fp16))[name = string("input_133_cast_fp16")]; tensor normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor([-1])]; fp16 var_4374_to_fp16 = const()[name = string("op_4374_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_4374_to_fp16, x = input_133_cast_fp16)[name = string("normed_197_cast_fp16")]; tensor var_4387_split_sizes_0 = const()[name = string("op_4387_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_4387_axis_0 = const()[name = string("op_4387_axis_0"), val = int32(-1)]; tensor var_4387_cast_fp16_0, tensor var_4387_cast_fp16_1 = split(axis = var_4387_axis_0, split_sizes = var_4387_split_sizes_0, x = normed_197_cast_fp16)[name = string("op_4387_cast_fp16")]; tensor const_111_to_fp16 = const()[name = string("const_111_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2253529408)))]; tensor var_4390_cast_fp16 = mul(x = var_4387_cast_fp16_0, y = const_111_to_fp16)[name = string("op_4390_cast_fp16")]; tensor var_4398 = const()[name = string("op_4398"), val = tensor([0, 2, 1])]; tensor var_4401_axes_0 = const()[name = string("op_4401_axes_0"), val = tensor([2])]; tensor var_4399_cast_fp16 = transpose(perm = var_4398, x = var_4390_cast_fp16)[name = string("transpose_336")]; tensor var_4401_cast_fp16 = expand_dims(axes = var_4401_axes_0, x = var_4399_cast_fp16)[name = string("op_4401_cast_fp16")]; string var_4417_pad_type_0 = const()[name = string("op_4417_pad_type_0"), val = string("valid")]; tensor var_4417_strides_0 = const()[name = string("op_4417_strides_0"), val = tensor([1, 1])]; tensor var_4417_pad_0 = const()[name = string("op_4417_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4417_dilations_0 = const()[name = string("op_4417_dilations_0"), val = tensor([1, 1])]; int32 var_4417_groups_0 = const()[name = string("op_4417_groups_0"), val = int32(1)]; tensor var_4417 = conv(dilations = var_4417_dilations_0, groups = var_4417_groups_0, pad = var_4417_pad_0, pad_type = var_4417_pad_type_0, strides = var_4417_strides_0, weight = layers_2_self_attn_q_proj_weight_palettized, x = var_4401_cast_fp16)[name = string("op_4417")]; tensor var_4422 = const()[name = string("op_4422"), val = tensor([1, 8, 256, 1])]; tensor var_4423 = reshape(shape = var_4422, x = var_4417)[name = string("op_4423")]; tensor var_4428 = const()[name = string("op_4428"), val = tensor([0, 1, 3, 2])]; tensor var_4438 = const()[name = string("op_4438"), val = tensor([1, 8, 256])]; tensor var_4429 = transpose(perm = var_4428, x = var_4423)[name = string("transpose_335")]; tensor x_203 = reshape(shape = var_4438, x = var_4429)[name = string("x_203")]; int32 var_4444 = const()[name = string("op_4444"), val = int32(-1)]; fp16 const_112_promoted_to_fp16 = const()[name = string("const_112_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4450_cast_fp16 = mul(x = x_203, y = const_112_promoted_to_fp16)[name = string("op_4450_cast_fp16")]; bool input_137_interleave_0 = const()[name = string("input_137_interleave_0"), val = bool(false)]; tensor input_137_cast_fp16 = concat(axis = var_4444, interleave = input_137_interleave_0, values = (x_203, var_4450_cast_fp16))[name = string("input_137_cast_fp16")]; tensor normed_201_axes_0 = const()[name = string("normed_201_axes_0"), val = tensor([-1])]; fp16 var_4442_to_fp16 = const()[name = string("op_4442_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_201_cast_fp16 = layer_norm(axes = normed_201_axes_0, epsilon = var_4442_to_fp16, x = input_137_cast_fp16)[name = string("normed_201_cast_fp16")]; tensor var_4455_split_sizes_0 = const()[name = string("op_4455_split_sizes_0"), val = tensor([256, 256])]; int32 var_4455_axis_0 = const()[name = string("op_4455_axis_0"), val = int32(-1)]; tensor var_4455_cast_fp16_0, tensor var_4455_cast_fp16_1 = split(axis = var_4455_axis_0, split_sizes = var_4455_split_sizes_0, x = normed_201_cast_fp16)[name = string("op_4455_cast_fp16")]; tensor const_113_to_fp16 = const()[name = string("const_113_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2253532544)))]; tensor var_4458_cast_fp16 = mul(x = var_4455_cast_fp16_0, y = const_113_to_fp16)[name = string("op_4458_cast_fp16")]; tensor var_4464 = const()[name = string("op_4464"), val = tensor([1, 8, 1, 256])]; tensor q_19 = reshape(shape = var_4464, x = var_4458_cast_fp16)[name = string("q_19")]; tensor var_4466 = mul(x = q_19, y = cos_1)[name = string("op_4466")]; tensor var_4467_split_sizes_0 = const()[name = string("op_4467_split_sizes_0"), val = tensor([128, 128])]; int32 var_4467_axis_0 = const()[name = string("op_4467_axis_0"), val = int32(-1)]; tensor var_4467_0, tensor var_4467_1 = split(axis = var_4467_axis_0, split_sizes = var_4467_split_sizes_0, x = q_19)[name = string("op_4467")]; fp16 const_114_promoted = const()[name = string("const_114_promoted"), val = fp16(-0x1p+0)]; tensor var_4469 = mul(x = var_4467_1, y = const_114_promoted)[name = string("op_4469")]; int32 var_4471 = const()[name = string("op_4471"), val = int32(-1)]; bool var_4472_interleave_0 = const()[name = string("op_4472_interleave_0"), val = bool(false)]; tensor var_4472 = concat(axis = var_4471, interleave = var_4472_interleave_0, values = (var_4469, var_4467_0))[name = string("op_4472")]; tensor var_4473 = mul(x = var_4472, y = sin_1)[name = string("op_4473")]; tensor q_23 = add(x = var_4466, y = var_4473)[name = string("q_23")]; string var_4486_pad_type_0 = const()[name = string("op_4486_pad_type_0"), val = string("valid")]; tensor var_4486_strides_0 = const()[name = string("op_4486_strides_0"), val = tensor([1, 1])]; tensor var_4486_pad_0 = const()[name = string("op_4486_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4486_dilations_0 = const()[name = string("op_4486_dilations_0"), val = tensor([1, 1])]; int32 var_4486_groups_0 = const()[name = string("op_4486_groups_0"), val = int32(1)]; tensor var_4486 = conv(dilations = var_4486_dilations_0, groups = var_4486_groups_0, pad = var_4486_pad_0, pad_type = var_4486_pad_type_0, strides = var_4486_strides_0, weight = layers_2_self_attn_k_proj_weight_palettized, x = var_4401_cast_fp16)[name = string("op_4486")]; tensor var_4491 = const()[name = string("op_4491"), val = tensor([1, 1, 256, 1])]; tensor var_4492 = reshape(shape = var_4491, x = var_4486)[name = string("op_4492")]; tensor var_4497 = const()[name = string("op_4497"), val = tensor([0, 1, 3, 2])]; string var_4514_pad_type_0 = const()[name = string("op_4514_pad_type_0"), val = string("valid")]; tensor var_4514_strides_0 = const()[name = string("op_4514_strides_0"), val = tensor([1, 1])]; tensor var_4514_pad_0 = const()[name = string("op_4514_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4514_dilations_0 = const()[name = string("op_4514_dilations_0"), val = tensor([1, 1])]; int32 var_4514_groups_0 = const()[name = string("op_4514_groups_0"), val = int32(1)]; tensor var_4514 = conv(dilations = var_4514_dilations_0, groups = var_4514_groups_0, pad = var_4514_pad_0, pad_type = var_4514_pad_type_0, strides = var_4514_strides_0, weight = layers_2_self_attn_v_proj_weight_palettized, x = var_4401_cast_fp16)[name = string("op_4514")]; tensor var_4519 = const()[name = string("op_4519"), val = tensor([1, 1, 256, 1])]; tensor var_4520 = reshape(shape = var_4519, x = var_4514)[name = string("op_4520")]; tensor var_4525 = const()[name = string("op_4525"), val = tensor([0, 1, 3, 2])]; tensor var_4535 = const()[name = string("op_4535"), val = tensor([1, 1, 256])]; tensor var_4498 = transpose(perm = var_4497, x = var_4492)[name = string("transpose_334")]; tensor x_207 = reshape(shape = var_4535, x = var_4498)[name = string("x_207")]; int32 var_4541 = const()[name = string("op_4541"), val = int32(-1)]; fp16 const_115_promoted_to_fp16 = const()[name = string("const_115_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4547_cast_fp16 = mul(x = x_207, y = const_115_promoted_to_fp16)[name = string("op_4547_cast_fp16")]; bool input_139_interleave_0 = const()[name = string("input_139_interleave_0"), val = bool(false)]; tensor input_139_cast_fp16 = concat(axis = var_4541, interleave = input_139_interleave_0, values = (x_207, var_4547_cast_fp16))[name = string("input_139_cast_fp16")]; tensor normed_205_axes_0 = const()[name = string("normed_205_axes_0"), val = tensor([-1])]; fp16 var_4539_to_fp16 = const()[name = string("op_4539_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_205_cast_fp16 = layer_norm(axes = normed_205_axes_0, epsilon = var_4539_to_fp16, x = input_139_cast_fp16)[name = string("normed_205_cast_fp16")]; tensor var_4552_split_sizes_0 = const()[name = string("op_4552_split_sizes_0"), val = tensor([256, 256])]; int32 var_4552_axis_0 = const()[name = string("op_4552_axis_0"), val = int32(-1)]; tensor var_4552_cast_fp16_0, tensor var_4552_cast_fp16_1 = split(axis = var_4552_axis_0, split_sizes = var_4552_split_sizes_0, x = normed_205_cast_fp16)[name = string("op_4552_cast_fp16")]; tensor const_116_to_fp16 = const()[name = string("const_116_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2253533120)))]; tensor var_4555_cast_fp16 = mul(x = var_4552_cast_fp16_0, y = const_116_to_fp16)[name = string("op_4555_cast_fp16")]; tensor var_4561 = const()[name = string("op_4561"), val = tensor([1, 1, 1, 256])]; tensor q_21 = reshape(shape = var_4561, x = var_4555_cast_fp16)[name = string("q_21")]; fp16 var_4568_promoted_to_fp16 = const()[name = string("op_4568_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4526 = transpose(perm = var_4525, x = var_4520)[name = string("transpose_333")]; tensor var_4569_cast_fp16 = pow(x = var_4526, y = var_4568_promoted_to_fp16)[name = string("op_4569_cast_fp16")]; tensor var_4574_axes_0 = const()[name = string("op_4574_axes_0"), val = tensor([-1])]; bool var_4574_keep_dims_0 = const()[name = string("op_4574_keep_dims_0"), val = bool(true)]; tensor var_4574_cast_fp16 = reduce_mean(axes = var_4574_axes_0, keep_dims = var_4574_keep_dims_0, x = var_4569_cast_fp16)[name = string("op_4574_cast_fp16")]; fp16 var_4576_to_fp16 = const()[name = string("op_4576_to_fp16"), val = fp16(0x1.1p-20)]; tensor mean_sq_5_cast_fp16 = add(x = var_4574_cast_fp16, y = var_4576_to_fp16)[name = string("mean_sq_5_cast_fp16")]; fp16 var_4583_to_fp16 = const()[name = string("op_4583_to_fp16"), val = fp16(-0x1p-1)]; tensor var_4584_cast_fp16 = pow(x = mean_sq_5_cast_fp16, y = var_4583_to_fp16)[name = string("op_4584_cast_fp16")]; tensor var_4585_cast_fp16 = mul(x = var_4526, y = var_4584_cast_fp16)[name = string("op_4585_cast_fp16")]; tensor var_4591 = mul(x = q_21, y = cos_1)[name = string("op_4591")]; tensor var_4592_split_sizes_0 = const()[name = string("op_4592_split_sizes_0"), val = tensor([128, 128])]; int32 var_4592_axis_0 = const()[name = string("op_4592_axis_0"), val = int32(-1)]; tensor var_4592_0, tensor var_4592_1 = split(axis = var_4592_axis_0, split_sizes = var_4592_split_sizes_0, x = q_21)[name = string("op_4592")]; fp16 const_117_promoted = const()[name = string("const_117_promoted"), val = fp16(-0x1p+0)]; tensor var_4594 = mul(x = var_4592_1, y = const_117_promoted)[name = string("op_4594")]; int32 var_4596 = const()[name = string("op_4596"), val = int32(-1)]; bool var_4597_interleave_0 = const()[name = string("op_4597_interleave_0"), val = bool(false)]; tensor var_4597 = concat(axis = var_4596, interleave = var_4597_interleave_0, values = (var_4594, var_4592_0))[name = string("op_4597")]; tensor var_4598 = mul(x = var_4597, y = sin_1)[name = string("op_4598")]; tensor input_141 = add(x = var_4591, y = var_4598)[name = string("input_141")]; tensor var_4603_begin_0 = const()[name = string("op_4603_begin_0"), val = tensor([2, 0, 0, 0])]; tensor var_4603_end_0 = const()[name = string("op_4603_end_0"), val = tensor([3, 1, 512, 512])]; tensor var_4603_end_mask_0 = const()[name = string("op_4603_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4603_squeeze_mask_0 = const()[name = string("op_4603_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_4603_cast_fp16 = slice_by_index(begin = var_4603_begin_0, end = var_4603_end_0, end_mask = var_4603_end_mask_0, squeeze_mask = var_4603_squeeze_mask_0, x = coreml_update_state_33)[name = string("op_4603_cast_fp16")]; tensor K_cache_5_axes_0 = const()[name = string("K_cache_5_axes_0"), val = tensor([0])]; tensor K_cache_5_cast_fp16 = expand_dims(axes = K_cache_5_axes_0, x = var_4603_cast_fp16)[name = string("K_cache_5_cast_fp16")]; tensor var_4608_begin_0 = const()[name = string("op_4608_begin_0"), val = tensor([37, 0, 0, 0])]; tensor var_4608_end_0 = const()[name = string("op_4608_end_0"), val = tensor([38, 1, 512, 512])]; tensor var_4608_end_mask_0 = const()[name = string("op_4608_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_4608_squeeze_mask_0 = const()[name = string("op_4608_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_4608_cast_fp16 = slice_by_index(begin = var_4608_begin_0, end = var_4608_end_0, end_mask = var_4608_end_mask_0, squeeze_mask = var_4608_squeeze_mask_0, x = coreml_update_state_33)[name = string("op_4608_cast_fp16")]; tensor V_cache_5_axes_0 = const()[name = string("V_cache_5_axes_0"), val = tensor([0])]; tensor V_cache_5_cast_fp16 = expand_dims(axes = V_cache_5_axes_0, x = var_4608_cast_fp16)[name = string("V_cache_5_cast_fp16")]; tensor k_padded_5_pad_0 = const()[name = string("k_padded_5_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; string k_padded_5_mode_0 = const()[name = string("k_padded_5_mode_0"), val = string("constant")]; fp16 const_118_to_fp16 = const()[name = string("const_118_to_fp16"), val = fp16(0x0p+0)]; tensor k_padded_5_cast_fp16 = pad(constant_val = const_118_to_fp16, mode = k_padded_5_mode_0, pad = k_padded_5_pad_0, x = input_141)[name = string("k_padded_5_cast_fp16")]; tensor v_padded_5_pad_0 = const()[name = string("v_padded_5_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; string v_padded_5_mode_0 = const()[name = string("v_padded_5_mode_0"), val = string("constant")]; fp16 const_119_to_fp16 = const()[name = string("const_119_to_fp16"), val = fp16(0x0p+0)]; tensor v_padded_5_cast_fp16 = pad(constant_val = const_119_to_fp16, mode = v_padded_5_mode_0, pad = v_padded_5_pad_0, x = var_4585_cast_fp16)[name = string("v_padded_5_cast_fp16")]; tensor var_4626_cast_fp16 = mul(x = K_cache_5_cast_fp16, y = var_3515_cast_fp16)[name = string("op_4626_cast_fp16")]; tensor var_4627_reps_0 = const()[name = string("op_4627_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_4627_cast_fp16 = tile(reps = var_4627_reps_0, x = k_padded_5_cast_fp16)[name = string("op_4627_cast_fp16")]; tensor var_4628_cast_fp16 = mul(x = var_4627_cast_fp16, y = update_mask)[name = string("op_4628_cast_fp16")]; tensor K_new_5_cast_fp16 = add(x = var_4626_cast_fp16, y = var_4628_cast_fp16)[name = string("K_new_5_cast_fp16")]; tensor var_4634_cast_fp16 = mul(x = V_cache_5_cast_fp16, y = var_3515_cast_fp16)[name = string("op_4634_cast_fp16")]; tensor var_4635_reps_0 = const()[name = string("op_4635_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_4635_cast_fp16 = tile(reps = var_4635_reps_0, x = v_padded_5_cast_fp16)[name = string("op_4635_cast_fp16")]; tensor var_4636_cast_fp16 = mul(x = var_4635_cast_fp16, y = update_mask)[name = string("op_4636_cast_fp16")]; tensor V_new_5_cast_fp16 = add(x = var_4634_cast_fp16, y = var_4636_cast_fp16)[name = string("V_new_5_cast_fp16")]; tensor var_4640_axes_0 = const()[name = string("op_4640_axes_0"), val = tensor([0])]; tensor var_4640_cast_fp16 = squeeze(axes = var_4640_axes_0, x = K_new_5_cast_fp16)[name = string("op_4640_cast_fp16")]; tensor concat_16 = const()[name = string("concat_16"), val = tensor([2, 0, 0, 0])]; tensor concat_17 = const()[name = string("concat_17"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_16, begin_mask = kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_17, end_mask = kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_5_stride_0, update = var_4640_cast_fp16, x = coreml_update_state_33)[name = string("kv_cache_0_internal_tensor_assign_5_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_5_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_34_write_state")]; tensor coreml_update_state_34 = read_state(input = kv_cache_0)[name = string("coreml_update_state_34")]; tensor var_4647_axes_0 = const()[name = string("op_4647_axes_0"), val = tensor([0])]; tensor var_4647_cast_fp16 = squeeze(axes = var_4647_axes_0, x = V_new_5_cast_fp16)[name = string("op_4647_cast_fp16")]; tensor concat_18 = const()[name = string("concat_18"), val = tensor([37, 0, 0, 0])]; tensor concat_19 = const()[name = string("concat_19"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_18, begin_mask = kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_19, end_mask = kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_6_stride_0, update = var_4647_cast_fp16, x = coreml_update_state_34)[name = string("kv_cache_0_internal_tensor_assign_6_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_6_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_35_write_state")]; tensor coreml_update_state_35 = read_state(input = kv_cache_0)[name = string("coreml_update_state_35")]; tensor K_for_attn_5_begin_0 = const()[name = string("K_for_attn_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor K_for_attn_5_end_0 = const()[name = string("K_for_attn_5_end_0"), val = tensor([1, 1, 512, 256])]; tensor K_for_attn_5_end_mask_0 = const()[name = string("K_for_attn_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor K_for_attn_5_cast_fp16 = slice_by_index(begin = K_for_attn_5_begin_0, end = K_for_attn_5_end_0, end_mask = K_for_attn_5_end_mask_0, x = K_new_5_cast_fp16)[name = string("K_for_attn_5_cast_fp16")]; tensor V_for_attn_5_begin_0 = const()[name = string("V_for_attn_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor V_for_attn_5_end_0 = const()[name = string("V_for_attn_5_end_0"), val = tensor([1, 1, 512, 256])]; tensor V_for_attn_5_end_mask_0 = const()[name = string("V_for_attn_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor V_for_attn_5_cast_fp16 = slice_by_index(begin = V_for_attn_5_begin_0, end = V_for_attn_5_end_0, end_mask = V_for_attn_5_end_mask_0, x = V_new_5_cast_fp16)[name = string("V_for_attn_5_cast_fp16")]; tensor transpose_8_perm_0 = const()[name = string("transpose_8_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_4_reps_0 = const()[name = string("tile_4_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_8_cast_fp16 = transpose(perm = transpose_8_perm_0, x = K_for_attn_5_cast_fp16)[name = string("transpose_332")]; tensor tile_4_cast_fp16 = tile(reps = tile_4_reps_0, x = transpose_8_cast_fp16)[name = string("tile_4_cast_fp16")]; tensor concat_20 = const()[name = string("concat_20"), val = tensor([8, 1, 1, 512, 256])]; tensor reshape_8_cast_fp16 = reshape(shape = concat_20, x = tile_4_cast_fp16)[name = string("reshape_8_cast_fp16")]; tensor transpose_9_perm_0 = const()[name = string("transpose_9_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_21 = const()[name = string("concat_21"), val = tensor([-1, 1, 512, 256])]; tensor transpose_9_cast_fp16 = transpose(perm = transpose_9_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_331")]; tensor reshape_9_cast_fp16 = reshape(shape = concat_21, x = transpose_9_cast_fp16)[name = string("reshape_9_cast_fp16")]; tensor transpose_142_perm_0 = const()[name = string("transpose_142_perm_0"), val = tensor([1, 0, -1, -2])]; tensor transpose_10_perm_0 = const()[name = string("transpose_10_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_5_reps_0 = const()[name = string("tile_5_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_10_cast_fp16 = transpose(perm = transpose_10_perm_0, x = V_for_attn_5_cast_fp16)[name = string("transpose_330")]; tensor tile_5_cast_fp16 = tile(reps = tile_5_reps_0, x = transpose_10_cast_fp16)[name = string("tile_5_cast_fp16")]; tensor concat_22 = const()[name = string("concat_22"), val = tensor([8, 1, 1, 512, 256])]; tensor reshape_10_cast_fp16 = reshape(shape = concat_22, x = tile_5_cast_fp16)[name = string("reshape_10_cast_fp16")]; tensor transpose_11_perm_0 = const()[name = string("transpose_11_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_23 = const()[name = string("concat_23"), val = tensor([-1, 1, 512, 256])]; tensor transpose_11_cast_fp16 = transpose(perm = transpose_11_perm_0, x = reshape_10_cast_fp16)[name = string("transpose_329")]; tensor reshape_11_cast_fp16 = reshape(shape = concat_23, x = transpose_11_cast_fp16)[name = string("reshape_11_cast_fp16")]; tensor V_expanded_5_perm_0 = const()[name = string("V_expanded_5_perm_0"), val = tensor([1, 0, -2, -1])]; bool var_4684_transpose_x_0 = const()[name = string("op_4684_transpose_x_0"), val = bool(false)]; bool var_4684_transpose_y_0 = const()[name = string("op_4684_transpose_y_0"), val = bool(false)]; tensor transpose_142_cast_fp16 = transpose(perm = transpose_142_perm_0, x = reshape_9_cast_fp16)[name = string("transpose_328")]; tensor var_4684_cast_fp16 = matmul(transpose_x = var_4684_transpose_x_0, transpose_y = var_4684_transpose_y_0, x = q_23, y = transpose_142_cast_fp16)[name = string("op_4684_cast_fp16")]; tensor attn_weights_15_cast_fp16 = add(x = var_4684_cast_fp16, y = causal_mask)[name = string("attn_weights_15_cast_fp16")]; int32 var_4694 = const()[name = string("op_4694"), val = int32(-1)]; tensor var_4696_cast_fp16 = softmax(axis = var_4694, x = attn_weights_15_cast_fp16)[name = string("op_4696_cast_fp16")]; bool var_4712_transpose_x_0 = const()[name = string("op_4712_transpose_x_0"), val = bool(false)]; bool var_4712_transpose_y_0 = const()[name = string("op_4712_transpose_y_0"), val = bool(false)]; tensor V_expanded_5_cast_fp16 = transpose(perm = V_expanded_5_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_327")]; tensor var_4712_cast_fp16 = matmul(transpose_x = var_4712_transpose_x_0, transpose_y = var_4712_transpose_y_0, x = var_4696_cast_fp16, y = V_expanded_5_cast_fp16)[name = string("op_4712_cast_fp16")]; tensor var_4722 = const()[name = string("op_4722"), val = tensor([0, 2, 1, 3])]; tensor var_4729 = const()[name = string("op_4729"), val = tensor([1, 1, -1])]; tensor var_4723 = transpose(perm = var_4722, x = var_4712_cast_fp16)[name = string("transpose_326")]; tensor attn_output_15 = reshape(shape = var_4729, x = var_4723)[name = string("attn_output_15")]; tensor var_4734 = const()[name = string("op_4734"), val = tensor([0, 2, 1])]; tensor squeeze_2_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2253533696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2255106624))))[name = string("squeeze_2_palettized")]; string var_4750_pad_type_0 = const()[name = string("op_4750_pad_type_0"), val = string("valid")]; int32 var_4750_groups_0 = const()[name = string("op_4750_groups_0"), val = int32(1)]; tensor var_4750_strides_0 = const()[name = string("op_4750_strides_0"), val = tensor([1])]; tensor var_4750_pad_0 = const()[name = string("op_4750_pad_0"), val = tensor([0, 0])]; tensor var_4750_dilations_0 = const()[name = string("op_4750_dilations_0"), val = tensor([1])]; tensor var_4735 = transpose(perm = var_4734, x = attn_output_15)[name = string("transpose_325")]; tensor var_4750 = conv(dilations = var_4750_dilations_0, groups = var_4750_groups_0, pad = var_4750_pad_0, pad_type = var_4750_pad_type_0, strides = var_4750_strides_0, weight = squeeze_2_palettized, x = var_4735)[name = string("op_4750")]; tensor var_4754 = const()[name = string("op_4754"), val = tensor([0, 2, 1])]; int32 var_4760 = const()[name = string("op_4760"), val = int32(-1)]; fp16 const_120_promoted_to_fp16 = const()[name = string("const_120_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_213 = transpose(perm = var_4754, x = var_4750)[name = string("transpose_324")]; tensor var_4766_cast_fp16 = mul(x = x_213, y = const_120_promoted_to_fp16)[name = string("op_4766_cast_fp16")]; bool input_147_interleave_0 = const()[name = string("input_147_interleave_0"), val = bool(false)]; tensor input_147_cast_fp16 = concat(axis = var_4760, interleave = input_147_interleave_0, values = (x_213, var_4766_cast_fp16))[name = string("input_147_cast_fp16")]; tensor normed_209_axes_0 = const()[name = string("normed_209_axes_0"), val = tensor([-1])]; fp16 var_4758_to_fp16 = const()[name = string("op_4758_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_209_cast_fp16 = layer_norm(axes = normed_209_axes_0, epsilon = var_4758_to_fp16, x = input_147_cast_fp16)[name = string("normed_209_cast_fp16")]; tensor var_4771_split_sizes_0 = const()[name = string("op_4771_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_4771_axis_0 = const()[name = string("op_4771_axis_0"), val = int32(-1)]; tensor var_4771_cast_fp16_0, tensor var_4771_cast_fp16_1 = split(axis = var_4771_axis_0, split_sizes = var_4771_split_sizes_0, x = normed_209_cast_fp16)[name = string("op_4771_cast_fp16")]; tensor const_121_to_fp16 = const()[name = string("const_121_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2255108224)))]; tensor var_4774_cast_fp16 = mul(x = var_4771_cast_fp16_0, y = const_121_to_fp16)[name = string("op_4774_cast_fp16")]; tensor x_217_cast_fp16 = add(x = x_199_cast_fp16, y = var_4774_cast_fp16)[name = string("x_217_cast_fp16")]; int32 var_4781 = const()[name = string("op_4781"), val = int32(-1)]; fp16 const_122_promoted_to_fp16 = const()[name = string("const_122_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4787_cast_fp16 = mul(x = x_217_cast_fp16, y = const_122_promoted_to_fp16)[name = string("op_4787_cast_fp16")]; bool input_149_interleave_0 = const()[name = string("input_149_interleave_0"), val = bool(false)]; tensor input_149_cast_fp16 = concat(axis = var_4781, interleave = input_149_interleave_0, values = (x_217_cast_fp16, var_4787_cast_fp16))[name = string("input_149_cast_fp16")]; tensor normed_213_axes_0 = const()[name = string("normed_213_axes_0"), val = tensor([-1])]; fp16 var_4779_to_fp16 = const()[name = string("op_4779_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_213_cast_fp16 = layer_norm(axes = normed_213_axes_0, epsilon = var_4779_to_fp16, x = input_149_cast_fp16)[name = string("normed_213_cast_fp16")]; tensor var_4792_split_sizes_0 = const()[name = string("op_4792_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_4792_axis_0 = const()[name = string("op_4792_axis_0"), val = int32(-1)]; tensor var_4792_cast_fp16_0, tensor var_4792_cast_fp16_1 = split(axis = var_4792_axis_0, split_sizes = var_4792_split_sizes_0, x = normed_213_cast_fp16)[name = string("op_4792_cast_fp16")]; tensor const_123_to_fp16 = const()[name = string("const_123_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2255111360)))]; tensor var_4795_cast_fp16 = mul(x = var_4792_cast_fp16_0, y = const_123_to_fp16)[name = string("op_4795_cast_fp16")]; tensor var_4808 = const()[name = string("op_4808"), val = tensor([0, 2, 1])]; tensor input_151_axes_0 = const()[name = string("input_151_axes_0"), val = tensor([2])]; tensor var_4809 = transpose(perm = var_4808, x = var_4795_cast_fp16)[name = string("transpose_323")]; tensor input_151 = expand_dims(axes = input_151_axes_0, x = var_4809)[name = string("input_151")]; string gate_9_pad_type_0 = const()[name = string("gate_9_pad_type_0"), val = string("valid")]; tensor gate_9_strides_0 = const()[name = string("gate_9_strides_0"), val = tensor([1, 1])]; tensor gate_9_pad_0 = const()[name = string("gate_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_9_dilations_0 = const()[name = string("gate_9_dilations_0"), val = tensor([1, 1])]; int32 gate_9_groups_0 = const()[name = string("gate_9_groups_0"), val = int32(1)]; tensor gate_9 = conv(dilations = gate_9_dilations_0, groups = gate_9_groups_0, pad = gate_9_pad_0, pad_type = gate_9_pad_type_0, strides = gate_9_strides_0, weight = layers_2_mlp_gate_proj_weight_palettized, x = input_151)[name = string("gate_9")]; string up_5_pad_type_0 = const()[name = string("up_5_pad_type_0"), val = string("valid")]; tensor up_5_strides_0 = const()[name = string("up_5_strides_0"), val = tensor([1, 1])]; tensor up_5_pad_0 = const()[name = string("up_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_5_dilations_0 = const()[name = string("up_5_dilations_0"), val = tensor([1, 1])]; int32 up_5_groups_0 = const()[name = string("up_5_groups_0"), val = int32(1)]; tensor up_5 = conv(dilations = up_5_dilations_0, groups = up_5_groups_0, pad = up_5_pad_0, pad_type = up_5_pad_type_0, strides = up_5_strides_0, weight = layers_2_mlp_up_proj_weight_palettized, x = input_151)[name = string("up_5")]; string gate_11_mode_0 = const()[name = string("gate_11_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_11 = gelu(mode = gate_11_mode_0, x = gate_9)[name = string("gate_11")]; tensor input_153 = mul(x = gate_11, y = up_5)[name = string("input_153")]; string mlp_out_5_pad_type_0 = const()[name = string("mlp_out_5_pad_type_0"), val = string("valid")]; tensor mlp_out_5_strides_0 = const()[name = string("mlp_out_5_strides_0"), val = tensor([1, 1])]; tensor mlp_out_5_pad_0 = const()[name = string("mlp_out_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_5_dilations_0 = const()[name = string("mlp_out_5_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_5_groups_0 = const()[name = string("mlp_out_5_groups_0"), val = int32(1)]; tensor mlp_out_5 = conv(dilations = mlp_out_5_dilations_0, groups = mlp_out_5_groups_0, pad = mlp_out_5_pad_0, pad_type = mlp_out_5_pad_type_0, strides = mlp_out_5_strides_0, weight = layers_2_mlp_down_proj_weight_palettized, x = input_153)[name = string("mlp_out_5")]; tensor var_4849_axes_0 = const()[name = string("op_4849_axes_0"), val = tensor([2])]; tensor var_4849 = squeeze(axes = var_4849_axes_0, x = mlp_out_5)[name = string("op_4849")]; tensor var_4853 = const()[name = string("op_4853"), val = tensor([0, 2, 1])]; int32 var_4859 = const()[name = string("op_4859"), val = int32(-1)]; fp16 const_124_promoted_to_fp16 = const()[name = string("const_124_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_221 = transpose(perm = var_4853, x = var_4849)[name = string("transpose_322")]; tensor var_4865_cast_fp16 = mul(x = x_221, y = const_124_promoted_to_fp16)[name = string("op_4865_cast_fp16")]; bool input_155_interleave_0 = const()[name = string("input_155_interleave_0"), val = bool(false)]; tensor input_155_cast_fp16 = concat(axis = var_4859, interleave = input_155_interleave_0, values = (x_221, var_4865_cast_fp16))[name = string("input_155_cast_fp16")]; tensor normed_217_axes_0 = const()[name = string("normed_217_axes_0"), val = tensor([-1])]; fp16 var_4857_to_fp16 = const()[name = string("op_4857_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_217_cast_fp16 = layer_norm(axes = normed_217_axes_0, epsilon = var_4857_to_fp16, x = input_155_cast_fp16)[name = string("normed_217_cast_fp16")]; tensor var_4870_split_sizes_0 = const()[name = string("op_4870_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_4870_axis_0 = const()[name = string("op_4870_axis_0"), val = int32(-1)]; tensor var_4870_cast_fp16_0, tensor var_4870_cast_fp16_1 = split(axis = var_4870_axis_0, split_sizes = var_4870_split_sizes_0, x = normed_217_cast_fp16)[name = string("op_4870_cast_fp16")]; tensor const_125_to_fp16 = const()[name = string("const_125_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2255114496)))]; tensor var_4873_cast_fp16 = mul(x = var_4870_cast_fp16_0, y = const_125_to_fp16)[name = string("op_4873_cast_fp16")]; tensor hidden_states_37_cast_fp16 = add(x = x_217_cast_fp16, y = var_4873_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; tensor per_layer_slice_5_begin_0 = const()[name = string("per_layer_slice_5_begin_0"), val = tensor([0, 0, 512])]; tensor per_layer_slice_5_end_0 = const()[name = string("per_layer_slice_5_end_0"), val = tensor([1, 1, 768])]; tensor per_layer_slice_5_end_mask_0 = const()[name = string("per_layer_slice_5_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_5 = slice_by_index(begin = per_layer_slice_5_begin_0, end = per_layer_slice_5_end_0, end_mask = per_layer_slice_5_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_5")]; tensor gated_9 = linear(bias = linear_1_bias_0, weight = layers_2_per_layer_input_gate_weight_palettized, x = hidden_states_37_cast_fp16)[name = string("linear_5")]; string gated_11_mode_0 = const()[name = string("gated_11_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_11 = gelu(mode = gated_11_mode_0, x = gated_9)[name = string("gated_11")]; tensor input_159 = mul(x = gated_11, y = per_layer_slice_5)[name = string("input_159")]; tensor x_225 = linear(bias = linear_2_bias_0, weight = layers_2_per_layer_projection_weight_palettized, x = input_159)[name = string("linear_6")]; int32 var_4910 = const()[name = string("op_4910"), val = int32(-1)]; fp16 const_126_promoted_to_fp16 = const()[name = string("const_126_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4916_cast_fp16 = mul(x = x_225, y = const_126_promoted_to_fp16)[name = string("op_4916_cast_fp16")]; bool input_161_interleave_0 = const()[name = string("input_161_interleave_0"), val = bool(false)]; tensor input_161_cast_fp16 = concat(axis = var_4910, interleave = input_161_interleave_0, values = (x_225, var_4916_cast_fp16))[name = string("input_161_cast_fp16")]; tensor normed_221_axes_0 = const()[name = string("normed_221_axes_0"), val = tensor([-1])]; fp16 var_4908_to_fp16 = const()[name = string("op_4908_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_221_cast_fp16 = layer_norm(axes = normed_221_axes_0, epsilon = var_4908_to_fp16, x = input_161_cast_fp16)[name = string("normed_221_cast_fp16")]; tensor var_4921_split_sizes_0 = const()[name = string("op_4921_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_4921_axis_0 = const()[name = string("op_4921_axis_0"), val = int32(-1)]; tensor var_4921_cast_fp16_0, tensor var_4921_cast_fp16_1 = split(axis = var_4921_axis_0, split_sizes = var_4921_split_sizes_0, x = normed_221_cast_fp16)[name = string("op_4921_cast_fp16")]; tensor const_127_to_fp16 = const()[name = string("const_127_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2255117632)))]; tensor var_4924_cast_fp16 = mul(x = var_4921_cast_fp16_0, y = const_127_to_fp16)[name = string("op_4924_cast_fp16")]; tensor hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = var_4924_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; tensor layers_2_layer_scalar_to_fp16 = const()[name = string("layers_2_layer_scalar_to_fp16"), val = tensor([0x1.96p-1])]; tensor x_229_cast_fp16 = mul(x = hidden_states_41_cast_fp16, y = layers_2_layer_scalar_to_fp16)[name = string("x_229_cast_fp16")]; int32 var_4932 = const()[name = string("op_4932"), val = int32(-1)]; fp16 const_128_promoted_to_fp16 = const()[name = string("const_128_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4938_cast_fp16 = mul(x = x_229_cast_fp16, y = const_128_promoted_to_fp16)[name = string("op_4938_cast_fp16")]; bool input_163_interleave_0 = const()[name = string("input_163_interleave_0"), val = bool(false)]; tensor input_163_cast_fp16 = concat(axis = var_4932, interleave = input_163_interleave_0, values = (x_229_cast_fp16, var_4938_cast_fp16))[name = string("input_163_cast_fp16")]; tensor normed_225_axes_0 = const()[name = string("normed_225_axes_0"), val = tensor([-1])]; fp16 var_4930_to_fp16 = const()[name = string("op_4930_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_225_cast_fp16 = layer_norm(axes = normed_225_axes_0, epsilon = var_4930_to_fp16, x = input_163_cast_fp16)[name = string("normed_225_cast_fp16")]; tensor var_4943_split_sizes_0 = const()[name = string("op_4943_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_4943_axis_0 = const()[name = string("op_4943_axis_0"), val = int32(-1)]; tensor var_4943_cast_fp16_0, tensor var_4943_cast_fp16_1 = split(axis = var_4943_axis_0, split_sizes = var_4943_split_sizes_0, x = normed_225_cast_fp16)[name = string("op_4943_cast_fp16")]; tensor const_129_to_fp16 = const()[name = string("const_129_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2255120768)))]; tensor var_4946_cast_fp16 = mul(x = var_4943_cast_fp16_0, y = const_129_to_fp16)[name = string("op_4946_cast_fp16")]; tensor var_4954 = const()[name = string("op_4954"), val = tensor([0, 2, 1])]; tensor var_4957_axes_0 = const()[name = string("op_4957_axes_0"), val = tensor([2])]; tensor var_4955_cast_fp16 = transpose(perm = var_4954, x = var_4946_cast_fp16)[name = string("transpose_321")]; tensor var_4957_cast_fp16 = expand_dims(axes = var_4957_axes_0, x = var_4955_cast_fp16)[name = string("op_4957_cast_fp16")]; string var_4973_pad_type_0 = const()[name = string("op_4973_pad_type_0"), val = string("valid")]; tensor var_4973_strides_0 = const()[name = string("op_4973_strides_0"), val = tensor([1, 1])]; tensor var_4973_pad_0 = const()[name = string("op_4973_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4973_dilations_0 = const()[name = string("op_4973_dilations_0"), val = tensor([1, 1])]; int32 var_4973_groups_0 = const()[name = string("op_4973_groups_0"), val = int32(1)]; tensor var_4973 = conv(dilations = var_4973_dilations_0, groups = var_4973_groups_0, pad = var_4973_pad_0, pad_type = var_4973_pad_type_0, strides = var_4973_strides_0, weight = layers_3_self_attn_q_proj_weight_palettized, x = var_4957_cast_fp16)[name = string("op_4973")]; tensor var_4978 = const()[name = string("op_4978"), val = tensor([1, 8, 256, 1])]; tensor var_4979 = reshape(shape = var_4978, x = var_4973)[name = string("op_4979")]; tensor var_4984 = const()[name = string("op_4984"), val = tensor([0, 1, 3, 2])]; tensor var_4994 = const()[name = string("op_4994"), val = tensor([1, 8, 256])]; tensor var_4985 = transpose(perm = var_4984, x = var_4979)[name = string("transpose_320")]; tensor x_233 = reshape(shape = var_4994, x = var_4985)[name = string("x_233")]; int32 var_5000 = const()[name = string("op_5000"), val = int32(-1)]; fp16 const_130_promoted_to_fp16 = const()[name = string("const_130_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5006_cast_fp16 = mul(x = x_233, y = const_130_promoted_to_fp16)[name = string("op_5006_cast_fp16")]; bool input_167_interleave_0 = const()[name = string("input_167_interleave_0"), val = bool(false)]; tensor input_167_cast_fp16 = concat(axis = var_5000, interleave = input_167_interleave_0, values = (x_233, var_5006_cast_fp16))[name = string("input_167_cast_fp16")]; tensor normed_229_axes_0 = const()[name = string("normed_229_axes_0"), val = tensor([-1])]; fp16 var_4998_to_fp16 = const()[name = string("op_4998_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_229_cast_fp16 = layer_norm(axes = normed_229_axes_0, epsilon = var_4998_to_fp16, x = input_167_cast_fp16)[name = string("normed_229_cast_fp16")]; tensor var_5011_split_sizes_0 = const()[name = string("op_5011_split_sizes_0"), val = tensor([256, 256])]; int32 var_5011_axis_0 = const()[name = string("op_5011_axis_0"), val = int32(-1)]; tensor var_5011_cast_fp16_0, tensor var_5011_cast_fp16_1 = split(axis = var_5011_axis_0, split_sizes = var_5011_split_sizes_0, x = normed_229_cast_fp16)[name = string("op_5011_cast_fp16")]; tensor const_131_to_fp16 = const()[name = string("const_131_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2255123904)))]; tensor var_5014_cast_fp16 = mul(x = var_5011_cast_fp16_0, y = const_131_to_fp16)[name = string("op_5014_cast_fp16")]; tensor var_5020 = const()[name = string("op_5020"), val = tensor([1, 8, 1, 256])]; tensor q_27 = reshape(shape = var_5020, x = var_5014_cast_fp16)[name = string("q_27")]; tensor var_5022 = mul(x = q_27, y = cos_1)[name = string("op_5022")]; tensor var_5023_split_sizes_0 = const()[name = string("op_5023_split_sizes_0"), val = tensor([128, 128])]; int32 var_5023_axis_0 = const()[name = string("op_5023_axis_0"), val = int32(-1)]; tensor var_5023_0, tensor var_5023_1 = split(axis = var_5023_axis_0, split_sizes = var_5023_split_sizes_0, x = q_27)[name = string("op_5023")]; fp16 const_132_promoted = const()[name = string("const_132_promoted"), val = fp16(-0x1p+0)]; tensor var_5025 = mul(x = var_5023_1, y = const_132_promoted)[name = string("op_5025")]; int32 var_5027 = const()[name = string("op_5027"), val = int32(-1)]; bool var_5028_interleave_0 = const()[name = string("op_5028_interleave_0"), val = bool(false)]; tensor var_5028 = concat(axis = var_5027, interleave = var_5028_interleave_0, values = (var_5025, var_5023_0))[name = string("op_5028")]; tensor var_5029 = mul(x = var_5028, y = sin_1)[name = string("op_5029")]; tensor q_31 = add(x = var_5022, y = var_5029)[name = string("q_31")]; string var_5042_pad_type_0 = const()[name = string("op_5042_pad_type_0"), val = string("valid")]; tensor var_5042_strides_0 = const()[name = string("op_5042_strides_0"), val = tensor([1, 1])]; tensor var_5042_pad_0 = const()[name = string("op_5042_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5042_dilations_0 = const()[name = string("op_5042_dilations_0"), val = tensor([1, 1])]; int32 var_5042_groups_0 = const()[name = string("op_5042_groups_0"), val = int32(1)]; tensor var_5042 = conv(dilations = var_5042_dilations_0, groups = var_5042_groups_0, pad = var_5042_pad_0, pad_type = var_5042_pad_type_0, strides = var_5042_strides_0, weight = layers_3_self_attn_k_proj_weight_palettized, x = var_4957_cast_fp16)[name = string("op_5042")]; tensor var_5047 = const()[name = string("op_5047"), val = tensor([1, 1, 256, 1])]; tensor var_5048 = reshape(shape = var_5047, x = var_5042)[name = string("op_5048")]; tensor var_5053 = const()[name = string("op_5053"), val = tensor([0, 1, 3, 2])]; string var_5070_pad_type_0 = const()[name = string("op_5070_pad_type_0"), val = string("valid")]; tensor var_5070_strides_0 = const()[name = string("op_5070_strides_0"), val = tensor([1, 1])]; tensor var_5070_pad_0 = const()[name = string("op_5070_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5070_dilations_0 = const()[name = string("op_5070_dilations_0"), val = tensor([1, 1])]; int32 var_5070_groups_0 = const()[name = string("op_5070_groups_0"), val = int32(1)]; tensor var_5070 = conv(dilations = var_5070_dilations_0, groups = var_5070_groups_0, pad = var_5070_pad_0, pad_type = var_5070_pad_type_0, strides = var_5070_strides_0, weight = layers_3_self_attn_v_proj_weight_palettized, x = var_4957_cast_fp16)[name = string("op_5070")]; tensor var_5075 = const()[name = string("op_5075"), val = tensor([1, 1, 256, 1])]; tensor var_5076 = reshape(shape = var_5075, x = var_5070)[name = string("op_5076")]; tensor var_5081 = const()[name = string("op_5081"), val = tensor([0, 1, 3, 2])]; tensor var_5091 = const()[name = string("op_5091"), val = tensor([1, 1, 256])]; tensor var_5054 = transpose(perm = var_5053, x = var_5048)[name = string("transpose_319")]; tensor x_237 = reshape(shape = var_5091, x = var_5054)[name = string("x_237")]; int32 var_5097 = const()[name = string("op_5097"), val = int32(-1)]; fp16 const_133_promoted_to_fp16 = const()[name = string("const_133_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5103_cast_fp16 = mul(x = x_237, y = const_133_promoted_to_fp16)[name = string("op_5103_cast_fp16")]; bool input_169_interleave_0 = const()[name = string("input_169_interleave_0"), val = bool(false)]; tensor input_169_cast_fp16 = concat(axis = var_5097, interleave = input_169_interleave_0, values = (x_237, var_5103_cast_fp16))[name = string("input_169_cast_fp16")]; tensor normed_233_axes_0 = const()[name = string("normed_233_axes_0"), val = tensor([-1])]; fp16 var_5095_to_fp16 = const()[name = string("op_5095_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_233_cast_fp16 = layer_norm(axes = normed_233_axes_0, epsilon = var_5095_to_fp16, x = input_169_cast_fp16)[name = string("normed_233_cast_fp16")]; tensor var_5108_split_sizes_0 = const()[name = string("op_5108_split_sizes_0"), val = tensor([256, 256])]; int32 var_5108_axis_0 = const()[name = string("op_5108_axis_0"), val = int32(-1)]; tensor var_5108_cast_fp16_0, tensor var_5108_cast_fp16_1 = split(axis = var_5108_axis_0, split_sizes = var_5108_split_sizes_0, x = normed_233_cast_fp16)[name = string("op_5108_cast_fp16")]; tensor const_134_to_fp16 = const()[name = string("const_134_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2255124480)))]; tensor var_5111_cast_fp16 = mul(x = var_5108_cast_fp16_0, y = const_134_to_fp16)[name = string("op_5111_cast_fp16")]; tensor var_5117 = const()[name = string("op_5117"), val = tensor([1, 1, 1, 256])]; tensor q_29 = reshape(shape = var_5117, x = var_5111_cast_fp16)[name = string("q_29")]; fp16 var_5124_promoted_to_fp16 = const()[name = string("op_5124_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5082 = transpose(perm = var_5081, x = var_5076)[name = string("transpose_318")]; tensor var_5125_cast_fp16 = pow(x = var_5082, y = var_5124_promoted_to_fp16)[name = string("op_5125_cast_fp16")]; tensor var_5130_axes_0 = const()[name = string("op_5130_axes_0"), val = tensor([-1])]; bool var_5130_keep_dims_0 = const()[name = string("op_5130_keep_dims_0"), val = bool(true)]; tensor var_5130_cast_fp16 = reduce_mean(axes = var_5130_axes_0, keep_dims = var_5130_keep_dims_0, x = var_5125_cast_fp16)[name = string("op_5130_cast_fp16")]; fp16 var_5132_to_fp16 = const()[name = string("op_5132_to_fp16"), val = fp16(0x1.1p-20)]; tensor mean_sq_7_cast_fp16 = add(x = var_5130_cast_fp16, y = var_5132_to_fp16)[name = string("mean_sq_7_cast_fp16")]; fp16 var_5139_to_fp16 = const()[name = string("op_5139_to_fp16"), val = fp16(-0x1p-1)]; tensor var_5140_cast_fp16 = pow(x = mean_sq_7_cast_fp16, y = var_5139_to_fp16)[name = string("op_5140_cast_fp16")]; tensor var_5141_cast_fp16 = mul(x = var_5082, y = var_5140_cast_fp16)[name = string("op_5141_cast_fp16")]; tensor var_5147 = mul(x = q_29, y = cos_1)[name = string("op_5147")]; tensor var_5148_split_sizes_0 = const()[name = string("op_5148_split_sizes_0"), val = tensor([128, 128])]; int32 var_5148_axis_0 = const()[name = string("op_5148_axis_0"), val = int32(-1)]; tensor var_5148_0, tensor var_5148_1 = split(axis = var_5148_axis_0, split_sizes = var_5148_split_sizes_0, x = q_29)[name = string("op_5148")]; fp16 const_135_promoted = const()[name = string("const_135_promoted"), val = fp16(-0x1p+0)]; tensor var_5150 = mul(x = var_5148_1, y = const_135_promoted)[name = string("op_5150")]; int32 var_5152 = const()[name = string("op_5152"), val = int32(-1)]; bool var_5153_interleave_0 = const()[name = string("op_5153_interleave_0"), val = bool(false)]; tensor var_5153 = concat(axis = var_5152, interleave = var_5153_interleave_0, values = (var_5150, var_5148_0))[name = string("op_5153")]; tensor var_5154 = mul(x = var_5153, y = sin_1)[name = string("op_5154")]; tensor input_171 = add(x = var_5147, y = var_5154)[name = string("input_171")]; tensor var_5159_begin_0 = const()[name = string("op_5159_begin_0"), val = tensor([3, 0, 0, 0])]; tensor var_5159_end_0 = const()[name = string("op_5159_end_0"), val = tensor([4, 1, 512, 512])]; tensor var_5159_end_mask_0 = const()[name = string("op_5159_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5159_squeeze_mask_0 = const()[name = string("op_5159_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_5159_cast_fp16 = slice_by_index(begin = var_5159_begin_0, end = var_5159_end_0, end_mask = var_5159_end_mask_0, squeeze_mask = var_5159_squeeze_mask_0, x = coreml_update_state_35)[name = string("op_5159_cast_fp16")]; tensor K_cache_7_axes_0 = const()[name = string("K_cache_7_axes_0"), val = tensor([0])]; tensor K_cache_7_cast_fp16 = expand_dims(axes = K_cache_7_axes_0, x = var_5159_cast_fp16)[name = string("K_cache_7_cast_fp16")]; tensor var_5164_begin_0 = const()[name = string("op_5164_begin_0"), val = tensor([38, 0, 0, 0])]; tensor var_5164_end_0 = const()[name = string("op_5164_end_0"), val = tensor([39, 1, 512, 512])]; tensor var_5164_end_mask_0 = const()[name = string("op_5164_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5164_squeeze_mask_0 = const()[name = string("op_5164_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_5164_cast_fp16 = slice_by_index(begin = var_5164_begin_0, end = var_5164_end_0, end_mask = var_5164_end_mask_0, squeeze_mask = var_5164_squeeze_mask_0, x = coreml_update_state_35)[name = string("op_5164_cast_fp16")]; tensor V_cache_7_axes_0 = const()[name = string("V_cache_7_axes_0"), val = tensor([0])]; tensor V_cache_7_cast_fp16 = expand_dims(axes = V_cache_7_axes_0, x = var_5164_cast_fp16)[name = string("V_cache_7_cast_fp16")]; tensor k_padded_7_pad_0 = const()[name = string("k_padded_7_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; string k_padded_7_mode_0 = const()[name = string("k_padded_7_mode_0"), val = string("constant")]; fp16 const_136_to_fp16 = const()[name = string("const_136_to_fp16"), val = fp16(0x0p+0)]; tensor k_padded_7_cast_fp16 = pad(constant_val = const_136_to_fp16, mode = k_padded_7_mode_0, pad = k_padded_7_pad_0, x = input_171)[name = string("k_padded_7_cast_fp16")]; tensor v_padded_7_pad_0 = const()[name = string("v_padded_7_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; string v_padded_7_mode_0 = const()[name = string("v_padded_7_mode_0"), val = string("constant")]; fp16 const_137_to_fp16 = const()[name = string("const_137_to_fp16"), val = fp16(0x0p+0)]; tensor v_padded_7_cast_fp16 = pad(constant_val = const_137_to_fp16, mode = v_padded_7_mode_0, pad = v_padded_7_pad_0, x = var_5141_cast_fp16)[name = string("v_padded_7_cast_fp16")]; tensor var_5182_cast_fp16 = mul(x = K_cache_7_cast_fp16, y = var_3515_cast_fp16)[name = string("op_5182_cast_fp16")]; tensor var_5183_reps_0 = const()[name = string("op_5183_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_5183_cast_fp16 = tile(reps = var_5183_reps_0, x = k_padded_7_cast_fp16)[name = string("op_5183_cast_fp16")]; tensor var_5184_cast_fp16 = mul(x = var_5183_cast_fp16, y = update_mask)[name = string("op_5184_cast_fp16")]; tensor K_new_7_cast_fp16 = add(x = var_5182_cast_fp16, y = var_5184_cast_fp16)[name = string("K_new_7_cast_fp16")]; tensor var_5190_cast_fp16 = mul(x = V_cache_7_cast_fp16, y = var_3515_cast_fp16)[name = string("op_5190_cast_fp16")]; tensor var_5191_reps_0 = const()[name = string("op_5191_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_5191_cast_fp16 = tile(reps = var_5191_reps_0, x = v_padded_7_cast_fp16)[name = string("op_5191_cast_fp16")]; tensor var_5192_cast_fp16 = mul(x = var_5191_cast_fp16, y = update_mask)[name = string("op_5192_cast_fp16")]; tensor V_new_7_cast_fp16 = add(x = var_5190_cast_fp16, y = var_5192_cast_fp16)[name = string("V_new_7_cast_fp16")]; tensor var_5196_axes_0 = const()[name = string("op_5196_axes_0"), val = tensor([0])]; tensor var_5196_cast_fp16 = squeeze(axes = var_5196_axes_0, x = K_new_7_cast_fp16)[name = string("op_5196_cast_fp16")]; tensor concat_24 = const()[name = string("concat_24"), val = tensor([3, 0, 0, 0])]; tensor concat_25 = const()[name = string("concat_25"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_24, begin_mask = kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_25, end_mask = kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_7_stride_0, update = var_5196_cast_fp16, x = coreml_update_state_35)[name = string("kv_cache_0_internal_tensor_assign_7_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_7_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_36_write_state")]; tensor coreml_update_state_36 = read_state(input = kv_cache_0)[name = string("coreml_update_state_36")]; tensor var_5203_axes_0 = const()[name = string("op_5203_axes_0"), val = tensor([0])]; tensor var_5203_cast_fp16 = squeeze(axes = var_5203_axes_0, x = V_new_7_cast_fp16)[name = string("op_5203_cast_fp16")]; tensor concat_26 = const()[name = string("concat_26"), val = tensor([38, 0, 0, 0])]; tensor concat_27 = const()[name = string("concat_27"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_26, begin_mask = kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_27, end_mask = kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_8_stride_0, update = var_5203_cast_fp16, x = coreml_update_state_36)[name = string("kv_cache_0_internal_tensor_assign_8_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_8_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_37_write_state")]; tensor coreml_update_state_37 = read_state(input = kv_cache_0)[name = string("coreml_update_state_37")]; tensor K_for_attn_7_begin_0 = const()[name = string("K_for_attn_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor K_for_attn_7_end_0 = const()[name = string("K_for_attn_7_end_0"), val = tensor([1, 1, 512, 256])]; tensor K_for_attn_7_end_mask_0 = const()[name = string("K_for_attn_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor K_for_attn_7_cast_fp16 = slice_by_index(begin = K_for_attn_7_begin_0, end = K_for_attn_7_end_0, end_mask = K_for_attn_7_end_mask_0, x = K_new_7_cast_fp16)[name = string("K_for_attn_7_cast_fp16")]; tensor V_for_attn_7_begin_0 = const()[name = string("V_for_attn_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor V_for_attn_7_end_0 = const()[name = string("V_for_attn_7_end_0"), val = tensor([1, 1, 512, 256])]; tensor V_for_attn_7_end_mask_0 = const()[name = string("V_for_attn_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor V_for_attn_7_cast_fp16 = slice_by_index(begin = V_for_attn_7_begin_0, end = V_for_attn_7_end_0, end_mask = V_for_attn_7_end_mask_0, x = V_new_7_cast_fp16)[name = string("V_for_attn_7_cast_fp16")]; tensor transpose_12_perm_0 = const()[name = string("transpose_12_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_6_reps_0 = const()[name = string("tile_6_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_12_cast_fp16 = transpose(perm = transpose_12_perm_0, x = K_for_attn_7_cast_fp16)[name = string("transpose_317")]; tensor tile_6_cast_fp16 = tile(reps = tile_6_reps_0, x = transpose_12_cast_fp16)[name = string("tile_6_cast_fp16")]; tensor concat_28 = const()[name = string("concat_28"), val = tensor([8, 1, 1, 512, 256])]; tensor reshape_12_cast_fp16 = reshape(shape = concat_28, x = tile_6_cast_fp16)[name = string("reshape_12_cast_fp16")]; tensor transpose_13_perm_0 = const()[name = string("transpose_13_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_29 = const()[name = string("concat_29"), val = tensor([-1, 1, 512, 256])]; tensor transpose_13_cast_fp16 = transpose(perm = transpose_13_perm_0, x = reshape_12_cast_fp16)[name = string("transpose_316")]; tensor reshape_13_cast_fp16 = reshape(shape = concat_29, x = transpose_13_cast_fp16)[name = string("reshape_13_cast_fp16")]; tensor transpose_143_perm_0 = const()[name = string("transpose_143_perm_0"), val = tensor([1, 0, -1, -2])]; tensor transpose_14_perm_0 = const()[name = string("transpose_14_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_7_reps_0 = const()[name = string("tile_7_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_14_cast_fp16 = transpose(perm = transpose_14_perm_0, x = V_for_attn_7_cast_fp16)[name = string("transpose_315")]; tensor tile_7_cast_fp16 = tile(reps = tile_7_reps_0, x = transpose_14_cast_fp16)[name = string("tile_7_cast_fp16")]; tensor concat_30 = const()[name = string("concat_30"), val = tensor([8, 1, 1, 512, 256])]; tensor reshape_14_cast_fp16 = reshape(shape = concat_30, x = tile_7_cast_fp16)[name = string("reshape_14_cast_fp16")]; tensor transpose_15_perm_0 = const()[name = string("transpose_15_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_31 = const()[name = string("concat_31"), val = tensor([-1, 1, 512, 256])]; tensor transpose_15_cast_fp16 = transpose(perm = transpose_15_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_314")]; tensor reshape_15_cast_fp16 = reshape(shape = concat_31, x = transpose_15_cast_fp16)[name = string("reshape_15_cast_fp16")]; tensor V_expanded_7_perm_0 = const()[name = string("V_expanded_7_perm_0"), val = tensor([1, 0, -2, -1])]; bool var_5240_transpose_x_0 = const()[name = string("op_5240_transpose_x_0"), val = bool(false)]; bool var_5240_transpose_y_0 = const()[name = string("op_5240_transpose_y_0"), val = bool(false)]; tensor transpose_143_cast_fp16 = transpose(perm = transpose_143_perm_0, x = reshape_13_cast_fp16)[name = string("transpose_313")]; tensor var_5240_cast_fp16 = matmul(transpose_x = var_5240_transpose_x_0, transpose_y = var_5240_transpose_y_0, x = q_31, y = transpose_143_cast_fp16)[name = string("op_5240_cast_fp16")]; tensor attn_weights_21_cast_fp16 = add(x = var_5240_cast_fp16, y = causal_mask)[name = string("attn_weights_21_cast_fp16")]; int32 var_5250 = const()[name = string("op_5250"), val = int32(-1)]; tensor var_5252_cast_fp16 = softmax(axis = var_5250, x = attn_weights_21_cast_fp16)[name = string("op_5252_cast_fp16")]; bool var_5268_transpose_x_0 = const()[name = string("op_5268_transpose_x_0"), val = bool(false)]; bool var_5268_transpose_y_0 = const()[name = string("op_5268_transpose_y_0"), val = bool(false)]; tensor V_expanded_7_cast_fp16 = transpose(perm = V_expanded_7_perm_0, x = reshape_15_cast_fp16)[name = string("transpose_312")]; tensor var_5268_cast_fp16 = matmul(transpose_x = var_5268_transpose_x_0, transpose_y = var_5268_transpose_y_0, x = var_5252_cast_fp16, y = V_expanded_7_cast_fp16)[name = string("op_5268_cast_fp16")]; tensor var_5278 = const()[name = string("op_5278"), val = tensor([0, 2, 1, 3])]; tensor var_5285 = const()[name = string("op_5285"), val = tensor([1, 1, -1])]; tensor var_5279 = transpose(perm = var_5278, x = var_5268_cast_fp16)[name = string("transpose_311")]; tensor attn_output_21 = reshape(shape = var_5285, x = var_5279)[name = string("attn_output_21")]; tensor var_5290 = const()[name = string("op_5290"), val = tensor([0, 2, 1])]; tensor squeeze_3_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2255125056))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2256697984))))[name = string("squeeze_3_palettized")]; string var_5306_pad_type_0 = const()[name = string("op_5306_pad_type_0"), val = string("valid")]; int32 var_5306_groups_0 = const()[name = string("op_5306_groups_0"), val = int32(1)]; tensor var_5306_strides_0 = const()[name = string("op_5306_strides_0"), val = tensor([1])]; tensor var_5306_pad_0 = const()[name = string("op_5306_pad_0"), val = tensor([0, 0])]; tensor var_5306_dilations_0 = const()[name = string("op_5306_dilations_0"), val = tensor([1])]; tensor var_5291 = transpose(perm = var_5290, x = attn_output_21)[name = string("transpose_310")]; tensor var_5306 = conv(dilations = var_5306_dilations_0, groups = var_5306_groups_0, pad = var_5306_pad_0, pad_type = var_5306_pad_type_0, strides = var_5306_strides_0, weight = squeeze_3_palettized, x = var_5291)[name = string("op_5306")]; tensor var_5310 = const()[name = string("op_5310"), val = tensor([0, 2, 1])]; int32 var_5316 = const()[name = string("op_5316"), val = int32(-1)]; fp16 const_138_promoted_to_fp16 = const()[name = string("const_138_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_243 = transpose(perm = var_5310, x = var_5306)[name = string("transpose_309")]; tensor var_5322_cast_fp16 = mul(x = x_243, y = const_138_promoted_to_fp16)[name = string("op_5322_cast_fp16")]; bool input_177_interleave_0 = const()[name = string("input_177_interleave_0"), val = bool(false)]; tensor input_177_cast_fp16 = concat(axis = var_5316, interleave = input_177_interleave_0, values = (x_243, var_5322_cast_fp16))[name = string("input_177_cast_fp16")]; tensor normed_237_axes_0 = const()[name = string("normed_237_axes_0"), val = tensor([-1])]; fp16 var_5314_to_fp16 = const()[name = string("op_5314_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_237_cast_fp16 = layer_norm(axes = normed_237_axes_0, epsilon = var_5314_to_fp16, x = input_177_cast_fp16)[name = string("normed_237_cast_fp16")]; tensor var_5327_split_sizes_0 = const()[name = string("op_5327_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_5327_axis_0 = const()[name = string("op_5327_axis_0"), val = int32(-1)]; tensor var_5327_cast_fp16_0, tensor var_5327_cast_fp16_1 = split(axis = var_5327_axis_0, split_sizes = var_5327_split_sizes_0, x = normed_237_cast_fp16)[name = string("op_5327_cast_fp16")]; tensor const_139_to_fp16 = const()[name = string("const_139_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2256699584)))]; tensor var_5330_cast_fp16 = mul(x = var_5327_cast_fp16_0, y = const_139_to_fp16)[name = string("op_5330_cast_fp16")]; tensor x_247_cast_fp16 = add(x = x_229_cast_fp16, y = var_5330_cast_fp16)[name = string("x_247_cast_fp16")]; int32 var_5337 = const()[name = string("op_5337"), val = int32(-1)]; fp16 const_140_promoted_to_fp16 = const()[name = string("const_140_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5343_cast_fp16 = mul(x = x_247_cast_fp16, y = const_140_promoted_to_fp16)[name = string("op_5343_cast_fp16")]; bool input_179_interleave_0 = const()[name = string("input_179_interleave_0"), val = bool(false)]; tensor input_179_cast_fp16 = concat(axis = var_5337, interleave = input_179_interleave_0, values = (x_247_cast_fp16, var_5343_cast_fp16))[name = string("input_179_cast_fp16")]; tensor normed_241_axes_0 = const()[name = string("normed_241_axes_0"), val = tensor([-1])]; fp16 var_5335_to_fp16 = const()[name = string("op_5335_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_241_cast_fp16 = layer_norm(axes = normed_241_axes_0, epsilon = var_5335_to_fp16, x = input_179_cast_fp16)[name = string("normed_241_cast_fp16")]; tensor var_5348_split_sizes_0 = const()[name = string("op_5348_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_5348_axis_0 = const()[name = string("op_5348_axis_0"), val = int32(-1)]; tensor var_5348_cast_fp16_0, tensor var_5348_cast_fp16_1 = split(axis = var_5348_axis_0, split_sizes = var_5348_split_sizes_0, x = normed_241_cast_fp16)[name = string("op_5348_cast_fp16")]; tensor const_141_to_fp16 = const()[name = string("const_141_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2256702720)))]; tensor var_5351_cast_fp16 = mul(x = var_5348_cast_fp16_0, y = const_141_to_fp16)[name = string("op_5351_cast_fp16")]; tensor var_5364 = const()[name = string("op_5364"), val = tensor([0, 2, 1])]; tensor input_181_axes_0 = const()[name = string("input_181_axes_0"), val = tensor([2])]; tensor var_5365 = transpose(perm = var_5364, x = var_5351_cast_fp16)[name = string("transpose_308")]; tensor input_181 = expand_dims(axes = input_181_axes_0, x = var_5365)[name = string("input_181")]; string gate_13_pad_type_0 = const()[name = string("gate_13_pad_type_0"), val = string("valid")]; tensor gate_13_strides_0 = const()[name = string("gate_13_strides_0"), val = tensor([1, 1])]; tensor gate_13_pad_0 = const()[name = string("gate_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_13_dilations_0 = const()[name = string("gate_13_dilations_0"), val = tensor([1, 1])]; int32 gate_13_groups_0 = const()[name = string("gate_13_groups_0"), val = int32(1)]; tensor gate_13 = conv(dilations = gate_13_dilations_0, groups = gate_13_groups_0, pad = gate_13_pad_0, pad_type = gate_13_pad_type_0, strides = gate_13_strides_0, weight = layers_3_mlp_gate_proj_weight_palettized, x = input_181)[name = string("gate_13")]; string up_7_pad_type_0 = const()[name = string("up_7_pad_type_0"), val = string("valid")]; tensor up_7_strides_0 = const()[name = string("up_7_strides_0"), val = tensor([1, 1])]; tensor up_7_pad_0 = const()[name = string("up_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_7_dilations_0 = const()[name = string("up_7_dilations_0"), val = tensor([1, 1])]; int32 up_7_groups_0 = const()[name = string("up_7_groups_0"), val = int32(1)]; tensor up_7 = conv(dilations = up_7_dilations_0, groups = up_7_groups_0, pad = up_7_pad_0, pad_type = up_7_pad_type_0, strides = up_7_strides_0, weight = layers_3_mlp_up_proj_weight_palettized, x = input_181)[name = string("up_7")]; string gate_15_mode_0 = const()[name = string("gate_15_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_15 = gelu(mode = gate_15_mode_0, x = gate_13)[name = string("gate_15")]; tensor input_183 = mul(x = gate_15, y = up_7)[name = string("input_183")]; string mlp_out_7_pad_type_0 = const()[name = string("mlp_out_7_pad_type_0"), val = string("valid")]; tensor mlp_out_7_strides_0 = const()[name = string("mlp_out_7_strides_0"), val = tensor([1, 1])]; tensor mlp_out_7_pad_0 = const()[name = string("mlp_out_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_7_dilations_0 = const()[name = string("mlp_out_7_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_7_groups_0 = const()[name = string("mlp_out_7_groups_0"), val = int32(1)]; tensor mlp_out_7 = conv(dilations = mlp_out_7_dilations_0, groups = mlp_out_7_groups_0, pad = mlp_out_7_pad_0, pad_type = mlp_out_7_pad_type_0, strides = mlp_out_7_strides_0, weight = layers_3_mlp_down_proj_weight_palettized, x = input_183)[name = string("mlp_out_7")]; tensor var_5405_axes_0 = const()[name = string("op_5405_axes_0"), val = tensor([2])]; tensor var_5405 = squeeze(axes = var_5405_axes_0, x = mlp_out_7)[name = string("op_5405")]; tensor var_5409 = const()[name = string("op_5409"), val = tensor([0, 2, 1])]; int32 var_5415 = const()[name = string("op_5415"), val = int32(-1)]; fp16 const_142_promoted_to_fp16 = const()[name = string("const_142_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_251 = transpose(perm = var_5409, x = var_5405)[name = string("transpose_307")]; tensor var_5421_cast_fp16 = mul(x = x_251, y = const_142_promoted_to_fp16)[name = string("op_5421_cast_fp16")]; bool input_185_interleave_0 = const()[name = string("input_185_interleave_0"), val = bool(false)]; tensor input_185_cast_fp16 = concat(axis = var_5415, interleave = input_185_interleave_0, values = (x_251, var_5421_cast_fp16))[name = string("input_185_cast_fp16")]; tensor normed_245_axes_0 = const()[name = string("normed_245_axes_0"), val = tensor([-1])]; fp16 var_5413_to_fp16 = const()[name = string("op_5413_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_245_cast_fp16 = layer_norm(axes = normed_245_axes_0, epsilon = var_5413_to_fp16, x = input_185_cast_fp16)[name = string("normed_245_cast_fp16")]; tensor var_5426_split_sizes_0 = const()[name = string("op_5426_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_5426_axis_0 = const()[name = string("op_5426_axis_0"), val = int32(-1)]; tensor var_5426_cast_fp16_0, tensor var_5426_cast_fp16_1 = split(axis = var_5426_axis_0, split_sizes = var_5426_split_sizes_0, x = normed_245_cast_fp16)[name = string("op_5426_cast_fp16")]; tensor const_143_to_fp16 = const()[name = string("const_143_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2256705856)))]; tensor var_5429_cast_fp16 = mul(x = var_5426_cast_fp16_0, y = const_143_to_fp16)[name = string("op_5429_cast_fp16")]; tensor hidden_states_49_cast_fp16 = add(x = x_247_cast_fp16, y = var_5429_cast_fp16)[name = string("hidden_states_49_cast_fp16")]; tensor per_layer_slice_7_begin_0 = const()[name = string("per_layer_slice_7_begin_0"), val = tensor([0, 0, 768])]; tensor per_layer_slice_7_end_0 = const()[name = string("per_layer_slice_7_end_0"), val = tensor([1, 1, 1024])]; tensor per_layer_slice_7_end_mask_0 = const()[name = string("per_layer_slice_7_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_7 = slice_by_index(begin = per_layer_slice_7_begin_0, end = per_layer_slice_7_end_0, end_mask = per_layer_slice_7_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_7")]; tensor gated_13 = linear(bias = linear_1_bias_0, weight = layers_3_per_layer_input_gate_weight_palettized, x = hidden_states_49_cast_fp16)[name = string("linear_7")]; string gated_15_mode_0 = const()[name = string("gated_15_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_15 = gelu(mode = gated_15_mode_0, x = gated_13)[name = string("gated_15")]; tensor input_189 = mul(x = gated_15, y = per_layer_slice_7)[name = string("input_189")]; tensor x_255 = linear(bias = linear_2_bias_0, weight = layers_3_per_layer_projection_weight_palettized, x = input_189)[name = string("linear_8")]; int32 var_5466 = const()[name = string("op_5466"), val = int32(-1)]; fp16 const_144_promoted_to_fp16 = const()[name = string("const_144_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5472_cast_fp16 = mul(x = x_255, y = const_144_promoted_to_fp16)[name = string("op_5472_cast_fp16")]; bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)]; tensor input_191_cast_fp16 = concat(axis = var_5466, interleave = input_191_interleave_0, values = (x_255, var_5472_cast_fp16))[name = string("input_191_cast_fp16")]; tensor normed_249_axes_0 = const()[name = string("normed_249_axes_0"), val = tensor([-1])]; fp16 var_5464_to_fp16 = const()[name = string("op_5464_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_249_cast_fp16 = layer_norm(axes = normed_249_axes_0, epsilon = var_5464_to_fp16, x = input_191_cast_fp16)[name = string("normed_249_cast_fp16")]; tensor var_5477_split_sizes_0 = const()[name = string("op_5477_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_5477_axis_0 = const()[name = string("op_5477_axis_0"), val = int32(-1)]; tensor var_5477_cast_fp16_0, tensor var_5477_cast_fp16_1 = split(axis = var_5477_axis_0, split_sizes = var_5477_split_sizes_0, x = normed_249_cast_fp16)[name = string("op_5477_cast_fp16")]; tensor const_145_to_fp16 = const()[name = string("const_145_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2256708992)))]; tensor var_5480_cast_fp16 = mul(x = var_5477_cast_fp16_0, y = const_145_to_fp16)[name = string("op_5480_cast_fp16")]; tensor hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = var_5480_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; tensor layers_3_layer_scalar_to_fp16 = const()[name = string("layers_3_layer_scalar_to_fp16"), val = tensor([0x1.26p-2])]; tensor x_259_cast_fp16 = mul(x = hidden_states_53_cast_fp16, y = layers_3_layer_scalar_to_fp16)[name = string("x_259_cast_fp16")]; int32 var_5488 = const()[name = string("op_5488"), val = int32(-1)]; fp16 const_146_promoted_to_fp16 = const()[name = string("const_146_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5494_cast_fp16 = mul(x = x_259_cast_fp16, y = const_146_promoted_to_fp16)[name = string("op_5494_cast_fp16")]; bool input_193_interleave_0 = const()[name = string("input_193_interleave_0"), val = bool(false)]; tensor input_193_cast_fp16 = concat(axis = var_5488, interleave = input_193_interleave_0, values = (x_259_cast_fp16, var_5494_cast_fp16))[name = string("input_193_cast_fp16")]; tensor normed_253_axes_0 = const()[name = string("normed_253_axes_0"), val = tensor([-1])]; fp16 var_5486_to_fp16 = const()[name = string("op_5486_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_253_cast_fp16 = layer_norm(axes = normed_253_axes_0, epsilon = var_5486_to_fp16, x = input_193_cast_fp16)[name = string("normed_253_cast_fp16")]; tensor var_5499_split_sizes_0 = const()[name = string("op_5499_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_5499_axis_0 = const()[name = string("op_5499_axis_0"), val = int32(-1)]; tensor var_5499_cast_fp16_0, tensor var_5499_cast_fp16_1 = split(axis = var_5499_axis_0, split_sizes = var_5499_split_sizes_0, x = normed_253_cast_fp16)[name = string("op_5499_cast_fp16")]; tensor const_147_to_fp16 = const()[name = string("const_147_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2256712128)))]; tensor var_5502_cast_fp16 = mul(x = var_5499_cast_fp16_0, y = const_147_to_fp16)[name = string("op_5502_cast_fp16")]; tensor var_5510 = const()[name = string("op_5510"), val = tensor([0, 2, 1])]; tensor var_5513_axes_0 = const()[name = string("op_5513_axes_0"), val = tensor([2])]; tensor var_5511_cast_fp16 = transpose(perm = var_5510, x = var_5502_cast_fp16)[name = string("transpose_306")]; tensor var_5513_cast_fp16 = expand_dims(axes = var_5513_axes_0, x = var_5511_cast_fp16)[name = string("op_5513_cast_fp16")]; string var_5529_pad_type_0 = const()[name = string("op_5529_pad_type_0"), val = string("valid")]; tensor var_5529_strides_0 = const()[name = string("op_5529_strides_0"), val = tensor([1, 1])]; tensor var_5529_pad_0 = const()[name = string("op_5529_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5529_dilations_0 = const()[name = string("op_5529_dilations_0"), val = tensor([1, 1])]; int32 var_5529_groups_0 = const()[name = string("op_5529_groups_0"), val = int32(1)]; tensor var_5529 = conv(dilations = var_5529_dilations_0, groups = var_5529_groups_0, pad = var_5529_pad_0, pad_type = var_5529_pad_type_0, strides = var_5529_strides_0, weight = layers_4_self_attn_q_proj_weight_palettized, x = var_5513_cast_fp16)[name = string("op_5529")]; tensor var_5534 = const()[name = string("op_5534"), val = tensor([1, 8, 512, 1])]; tensor var_5535 = reshape(shape = var_5534, x = var_5529)[name = string("op_5535")]; tensor var_5540 = const()[name = string("op_5540"), val = tensor([0, 1, 3, 2])]; tensor var_5550 = const()[name = string("op_5550"), val = tensor([1, 8, 512])]; tensor var_5541 = transpose(perm = var_5540, x = var_5535)[name = string("transpose_305")]; tensor x_263 = reshape(shape = var_5550, x = var_5541)[name = string("x_263")]; int32 var_5556 = const()[name = string("op_5556"), val = int32(-1)]; fp16 const_148_promoted_to_fp16 = const()[name = string("const_148_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5562_cast_fp16 = mul(x = x_263, y = const_148_promoted_to_fp16)[name = string("op_5562_cast_fp16")]; bool input_197_interleave_0 = const()[name = string("input_197_interleave_0"), val = bool(false)]; tensor input_197_cast_fp16 = concat(axis = var_5556, interleave = input_197_interleave_0, values = (x_263, var_5562_cast_fp16))[name = string("input_197_cast_fp16")]; tensor normed_257_axes_0 = const()[name = string("normed_257_axes_0"), val = tensor([-1])]; fp16 var_5554_to_fp16 = const()[name = string("op_5554_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_257_cast_fp16 = layer_norm(axes = normed_257_axes_0, epsilon = var_5554_to_fp16, x = input_197_cast_fp16)[name = string("normed_257_cast_fp16")]; tensor var_5567_split_sizes_0 = const()[name = string("op_5567_split_sizes_0"), val = tensor([512, 512])]; int32 var_5567_axis_0 = const()[name = string("op_5567_axis_0"), val = int32(-1)]; tensor var_5567_cast_fp16_0, tensor var_5567_cast_fp16_1 = split(axis = var_5567_axis_0, split_sizes = var_5567_split_sizes_0, x = normed_257_cast_fp16)[name = string("op_5567_cast_fp16")]; tensor const_149_to_fp16 = const()[name = string("const_149_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2256715264)))]; tensor var_5570_cast_fp16 = mul(x = var_5567_cast_fp16_0, y = const_149_to_fp16)[name = string("op_5570_cast_fp16")]; tensor var_5576 = const()[name = string("op_5576"), val = tensor([1, 8, 1, 512])]; tensor q_35 = reshape(shape = var_5576, x = var_5570_cast_fp16)[name = string("q_35")]; tensor var_5578 = mul(x = q_35, y = cos)[name = string("op_5578")]; tensor var_5579_split_sizes_0 = const()[name = string("op_5579_split_sizes_0"), val = tensor([256, 256])]; int32 var_5579_axis_0 = const()[name = string("op_5579_axis_0"), val = int32(-1)]; tensor var_5579_0, tensor var_5579_1 = split(axis = var_5579_axis_0, split_sizes = var_5579_split_sizes_0, x = q_35)[name = string("op_5579")]; fp16 const_150_promoted = const()[name = string("const_150_promoted"), val = fp16(-0x1p+0)]; tensor var_5581 = mul(x = var_5579_1, y = const_150_promoted)[name = string("op_5581")]; int32 var_5583 = const()[name = string("op_5583"), val = int32(-1)]; bool var_5584_interleave_0 = const()[name = string("op_5584_interleave_0"), val = bool(false)]; tensor var_5584 = concat(axis = var_5583, interleave = var_5584_interleave_0, values = (var_5581, var_5579_0))[name = string("op_5584")]; tensor var_5585 = mul(x = var_5584, y = sin)[name = string("op_5585")]; tensor q_39 = add(x = var_5578, y = var_5585)[name = string("q_39")]; string var_5598_pad_type_0 = const()[name = string("op_5598_pad_type_0"), val = string("valid")]; tensor var_5598_strides_0 = const()[name = string("op_5598_strides_0"), val = tensor([1, 1])]; tensor var_5598_pad_0 = const()[name = string("op_5598_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5598_dilations_0 = const()[name = string("op_5598_dilations_0"), val = tensor([1, 1])]; int32 var_5598_groups_0 = const()[name = string("op_5598_groups_0"), val = int32(1)]; tensor var_5598 = conv(dilations = var_5598_dilations_0, groups = var_5598_groups_0, pad = var_5598_pad_0, pad_type = var_5598_pad_type_0, strides = var_5598_strides_0, weight = layers_4_self_attn_k_proj_weight_palettized, x = var_5513_cast_fp16)[name = string("op_5598")]; tensor var_5603 = const()[name = string("op_5603"), val = tensor([1, 1, 512, 1])]; tensor var_5604 = reshape(shape = var_5603, x = var_5598)[name = string("op_5604")]; tensor var_5609 = const()[name = string("op_5609"), val = tensor([0, 1, 3, 2])]; string var_5626_pad_type_0 = const()[name = string("op_5626_pad_type_0"), val = string("valid")]; tensor var_5626_strides_0 = const()[name = string("op_5626_strides_0"), val = tensor([1, 1])]; tensor var_5626_pad_0 = const()[name = string("op_5626_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5626_dilations_0 = const()[name = string("op_5626_dilations_0"), val = tensor([1, 1])]; int32 var_5626_groups_0 = const()[name = string("op_5626_groups_0"), val = int32(1)]; tensor var_5626 = conv(dilations = var_5626_dilations_0, groups = var_5626_groups_0, pad = var_5626_pad_0, pad_type = var_5626_pad_type_0, strides = var_5626_strides_0, weight = layers_4_self_attn_v_proj_weight_palettized, x = var_5513_cast_fp16)[name = string("op_5626")]; tensor var_5631 = const()[name = string("op_5631"), val = tensor([1, 1, 512, 1])]; tensor var_5632 = reshape(shape = var_5631, x = var_5626)[name = string("op_5632")]; tensor var_5637 = const()[name = string("op_5637"), val = tensor([0, 1, 3, 2])]; tensor var_5647 = const()[name = string("op_5647"), val = tensor([1, 1, 512])]; tensor var_5610 = transpose(perm = var_5609, x = var_5604)[name = string("transpose_304")]; tensor x_267 = reshape(shape = var_5647, x = var_5610)[name = string("x_267")]; int32 var_5653 = const()[name = string("op_5653"), val = int32(-1)]; fp16 const_151_promoted_to_fp16 = const()[name = string("const_151_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5659_cast_fp16 = mul(x = x_267, y = const_151_promoted_to_fp16)[name = string("op_5659_cast_fp16")]; bool input_199_interleave_0 = const()[name = string("input_199_interleave_0"), val = bool(false)]; tensor input_199_cast_fp16 = concat(axis = var_5653, interleave = input_199_interleave_0, values = (x_267, var_5659_cast_fp16))[name = string("input_199_cast_fp16")]; tensor normed_261_axes_0 = const()[name = string("normed_261_axes_0"), val = tensor([-1])]; fp16 var_5651_to_fp16 = const()[name = string("op_5651_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_261_cast_fp16 = layer_norm(axes = normed_261_axes_0, epsilon = var_5651_to_fp16, x = input_199_cast_fp16)[name = string("normed_261_cast_fp16")]; tensor var_5664_split_sizes_0 = const()[name = string("op_5664_split_sizes_0"), val = tensor([512, 512])]; int32 var_5664_axis_0 = const()[name = string("op_5664_axis_0"), val = int32(-1)]; tensor var_5664_cast_fp16_0, tensor var_5664_cast_fp16_1 = split(axis = var_5664_axis_0, split_sizes = var_5664_split_sizes_0, x = normed_261_cast_fp16)[name = string("op_5664_cast_fp16")]; tensor const_152_to_fp16 = const()[name = string("const_152_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2256716352)))]; tensor var_5667_cast_fp16 = mul(x = var_5664_cast_fp16_0, y = const_152_to_fp16)[name = string("op_5667_cast_fp16")]; tensor var_5673 = const()[name = string("op_5673"), val = tensor([1, 1, 1, 512])]; tensor q_37 = reshape(shape = var_5673, x = var_5667_cast_fp16)[name = string("q_37")]; fp16 var_5680_promoted_to_fp16 = const()[name = string("op_5680_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5638 = transpose(perm = var_5637, x = var_5632)[name = string("transpose_303")]; tensor var_5681_cast_fp16 = pow(x = var_5638, y = var_5680_promoted_to_fp16)[name = string("op_5681_cast_fp16")]; tensor var_5686_axes_0 = const()[name = string("op_5686_axes_0"), val = tensor([-1])]; bool var_5686_keep_dims_0 = const()[name = string("op_5686_keep_dims_0"), val = bool(true)]; tensor var_5686_cast_fp16 = reduce_mean(axes = var_5686_axes_0, keep_dims = var_5686_keep_dims_0, x = var_5681_cast_fp16)[name = string("op_5686_cast_fp16")]; fp16 var_5688_to_fp16 = const()[name = string("op_5688_to_fp16"), val = fp16(0x1.1p-20)]; tensor mean_sq_9_cast_fp16 = add(x = var_5686_cast_fp16, y = var_5688_to_fp16)[name = string("mean_sq_9_cast_fp16")]; fp16 var_5695_to_fp16 = const()[name = string("op_5695_to_fp16"), val = fp16(-0x1p-1)]; tensor var_5696_cast_fp16 = pow(x = mean_sq_9_cast_fp16, y = var_5695_to_fp16)[name = string("op_5696_cast_fp16")]; tensor var_5697_cast_fp16 = mul(x = var_5638, y = var_5696_cast_fp16)[name = string("op_5697_cast_fp16")]; tensor var_5703 = mul(x = q_37, y = cos)[name = string("op_5703")]; tensor var_5704_split_sizes_0 = const()[name = string("op_5704_split_sizes_0"), val = tensor([256, 256])]; int32 var_5704_axis_0 = const()[name = string("op_5704_axis_0"), val = int32(-1)]; tensor var_5704_0, tensor var_5704_1 = split(axis = var_5704_axis_0, split_sizes = var_5704_split_sizes_0, x = q_37)[name = string("op_5704")]; fp16 const_153_promoted = const()[name = string("const_153_promoted"), val = fp16(-0x1p+0)]; tensor var_5706 = mul(x = var_5704_1, y = const_153_promoted)[name = string("op_5706")]; int32 var_5708 = const()[name = string("op_5708"), val = int32(-1)]; bool var_5709_interleave_0 = const()[name = string("op_5709_interleave_0"), val = bool(false)]; tensor var_5709 = concat(axis = var_5708, interleave = var_5709_interleave_0, values = (var_5706, var_5704_0))[name = string("op_5709")]; tensor var_5710 = mul(x = var_5709, y = sin)[name = string("op_5710")]; tensor k_11 = add(x = var_5703, y = var_5710)[name = string("k_11")]; tensor var_5715_begin_0 = const()[name = string("op_5715_begin_0"), val = tensor([4, 0, 0, 0])]; tensor var_5715_end_0 = const()[name = string("op_5715_end_0"), val = tensor([5, 1, 512, 512])]; tensor var_5715_end_mask_0 = const()[name = string("op_5715_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5715_squeeze_mask_0 = const()[name = string("op_5715_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_5715_cast_fp16 = slice_by_index(begin = var_5715_begin_0, end = var_5715_end_0, end_mask = var_5715_end_mask_0, squeeze_mask = var_5715_squeeze_mask_0, x = coreml_update_state_37)[name = string("op_5715_cast_fp16")]; tensor K_cache_9_axes_0 = const()[name = string("K_cache_9_axes_0"), val = tensor([0])]; tensor K_cache_9_cast_fp16 = expand_dims(axes = K_cache_9_axes_0, x = var_5715_cast_fp16)[name = string("K_cache_9_cast_fp16")]; tensor var_5720_begin_0 = const()[name = string("op_5720_begin_0"), val = tensor([39, 0, 0, 0])]; tensor var_5720_end_0 = const()[name = string("op_5720_end_0"), val = tensor([40, 1, 512, 512])]; tensor var_5720_end_mask_0 = const()[name = string("op_5720_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_5720_squeeze_mask_0 = const()[name = string("op_5720_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_5720_cast_fp16 = slice_by_index(begin = var_5720_begin_0, end = var_5720_end_0, end_mask = var_5720_end_mask_0, squeeze_mask = var_5720_squeeze_mask_0, x = coreml_update_state_37)[name = string("op_5720_cast_fp16")]; tensor V_cache_9_axes_0 = const()[name = string("V_cache_9_axes_0"), val = tensor([0])]; tensor V_cache_9_cast_fp16 = expand_dims(axes = V_cache_9_axes_0, x = var_5720_cast_fp16)[name = string("V_cache_9_cast_fp16")]; tensor var_5726_cast_fp16 = mul(x = K_cache_9_cast_fp16, y = var_3515_cast_fp16)[name = string("op_5726_cast_fp16")]; tensor var_5727_reps_0 = const()[name = string("op_5727_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_5727 = tile(reps = var_5727_reps_0, x = k_11)[name = string("op_5727")]; tensor var_5728_cast_fp16 = mul(x = var_5727, y = update_mask)[name = string("op_5728_cast_fp16")]; tensor K_new_9_cast_fp16 = add(x = var_5726_cast_fp16, y = var_5728_cast_fp16)[name = string("K_new_9_cast_fp16")]; tensor var_5734_cast_fp16 = mul(x = V_cache_9_cast_fp16, y = var_3515_cast_fp16)[name = string("op_5734_cast_fp16")]; tensor var_5735_reps_0 = const()[name = string("op_5735_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_5735 = tile(reps = var_5735_reps_0, x = var_5697_cast_fp16)[name = string("op_5735")]; tensor var_5736_cast_fp16 = mul(x = var_5735, y = update_mask)[name = string("op_5736_cast_fp16")]; tensor V_new_9_cast_fp16 = add(x = var_5734_cast_fp16, y = var_5736_cast_fp16)[name = string("V_new_9_cast_fp16")]; tensor var_5740_axes_0 = const()[name = string("op_5740_axes_0"), val = tensor([0])]; tensor var_5740_cast_fp16 = squeeze(axes = var_5740_axes_0, x = K_new_9_cast_fp16)[name = string("op_5740_cast_fp16")]; tensor concat_32 = const()[name = string("concat_32"), val = tensor([4, 0, 0, 0])]; tensor concat_33 = const()[name = string("concat_33"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_32, begin_mask = kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_33, end_mask = kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_9_stride_0, update = var_5740_cast_fp16, x = coreml_update_state_37)[name = string("kv_cache_0_internal_tensor_assign_9_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_9_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_38_write_state")]; tensor coreml_update_state_38 = read_state(input = kv_cache_0)[name = string("coreml_update_state_38")]; tensor var_5747_axes_0 = const()[name = string("op_5747_axes_0"), val = tensor([0])]; tensor var_5747_cast_fp16 = squeeze(axes = var_5747_axes_0, x = V_new_9_cast_fp16)[name = string("op_5747_cast_fp16")]; tensor concat_34 = const()[name = string("concat_34"), val = tensor([39, 0, 0, 0])]; tensor concat_35 = const()[name = string("concat_35"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_34, begin_mask = kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_35, end_mask = kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_10_stride_0, update = var_5747_cast_fp16, x = coreml_update_state_38)[name = string("kv_cache_0_internal_tensor_assign_10_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_10_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_39_write_state")]; tensor coreml_update_state_39 = read_state(input = kv_cache_0)[name = string("coreml_update_state_39")]; tensor transpose_16_perm_0 = const()[name = string("transpose_16_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_8_reps_0 = const()[name = string("tile_8_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_16_cast_fp16 = transpose(perm = transpose_16_perm_0, x = K_new_9_cast_fp16)[name = string("transpose_302")]; tensor tile_8_cast_fp16 = tile(reps = tile_8_reps_0, x = transpose_16_cast_fp16)[name = string("tile_8_cast_fp16")]; tensor concat_36 = const()[name = string("concat_36"), val = tensor([8, 1, 1, 512, 512])]; tensor reshape_16_cast_fp16 = reshape(shape = concat_36, x = tile_8_cast_fp16)[name = string("reshape_16_cast_fp16")]; tensor transpose_17_perm_0 = const()[name = string("transpose_17_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_37 = const()[name = string("concat_37"), val = tensor([-1, 1, 512, 512])]; tensor transpose_17_cast_fp16 = transpose(perm = transpose_17_perm_0, x = reshape_16_cast_fp16)[name = string("transpose_301")]; tensor reshape_17_cast_fp16 = reshape(shape = concat_37, x = transpose_17_cast_fp16)[name = string("reshape_17_cast_fp16")]; tensor transpose_144_perm_0 = const()[name = string("transpose_144_perm_0"), val = tensor([1, 0, -1, -2])]; tensor transpose_18_perm_0 = const()[name = string("transpose_18_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_9_reps_0 = const()[name = string("tile_9_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_18_cast_fp16 = transpose(perm = transpose_18_perm_0, x = V_new_9_cast_fp16)[name = string("transpose_300")]; tensor tile_9_cast_fp16 = tile(reps = tile_9_reps_0, x = transpose_18_cast_fp16)[name = string("tile_9_cast_fp16")]; tensor concat_38 = const()[name = string("concat_38"), val = tensor([8, 1, 1, 512, 512])]; tensor reshape_18_cast_fp16 = reshape(shape = concat_38, x = tile_9_cast_fp16)[name = string("reshape_18_cast_fp16")]; tensor transpose_19_perm_0 = const()[name = string("transpose_19_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_39 = const()[name = string("concat_39"), val = tensor([-1, 1, 512, 512])]; tensor transpose_19_cast_fp16 = transpose(perm = transpose_19_perm_0, x = reshape_18_cast_fp16)[name = string("transpose_299")]; tensor reshape_19_cast_fp16 = reshape(shape = concat_39, x = transpose_19_cast_fp16)[name = string("reshape_19_cast_fp16")]; tensor V_expanded_9_perm_0 = const()[name = string("V_expanded_9_perm_0"), val = tensor([1, 0, -2, -1])]; bool var_5784_transpose_x_0 = const()[name = string("op_5784_transpose_x_0"), val = bool(false)]; bool var_5784_transpose_y_0 = const()[name = string("op_5784_transpose_y_0"), val = bool(false)]; tensor transpose_144_cast_fp16 = transpose(perm = transpose_144_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_298")]; tensor var_5784_cast_fp16 = matmul(transpose_x = var_5784_transpose_x_0, transpose_y = var_5784_transpose_y_0, x = q_39, y = transpose_144_cast_fp16)[name = string("op_5784_cast_fp16")]; tensor attn_weights_27_cast_fp16 = add(x = var_5784_cast_fp16, y = causal_mask)[name = string("attn_weights_27_cast_fp16")]; int32 var_5794 = const()[name = string("op_5794"), val = int32(-1)]; tensor var_5796_cast_fp16 = softmax(axis = var_5794, x = attn_weights_27_cast_fp16)[name = string("op_5796_cast_fp16")]; bool var_5812_transpose_x_0 = const()[name = string("op_5812_transpose_x_0"), val = bool(false)]; bool var_5812_transpose_y_0 = const()[name = string("op_5812_transpose_y_0"), val = bool(false)]; tensor V_expanded_9_cast_fp16 = transpose(perm = V_expanded_9_perm_0, x = reshape_19_cast_fp16)[name = string("transpose_297")]; tensor var_5812_cast_fp16 = matmul(transpose_x = var_5812_transpose_x_0, transpose_y = var_5812_transpose_y_0, x = var_5796_cast_fp16, y = V_expanded_9_cast_fp16)[name = string("op_5812_cast_fp16")]; tensor var_5822 = const()[name = string("op_5822"), val = tensor([0, 2, 1, 3])]; tensor var_5829 = const()[name = string("op_5829"), val = tensor([1, 1, -1])]; tensor var_5823 = transpose(perm = var_5822, x = var_5812_cast_fp16)[name = string("transpose_296")]; tensor attn_output_27 = reshape(shape = var_5829, x = var_5823)[name = string("attn_output_27")]; tensor var_5834 = const()[name = string("op_5834"), val = tensor([0, 2, 1])]; tensor squeeze_4_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2256717440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2259863232))))[name = string("squeeze_4_palettized")]; string var_5850_pad_type_0 = const()[name = string("op_5850_pad_type_0"), val = string("valid")]; int32 var_5850_groups_0 = const()[name = string("op_5850_groups_0"), val = int32(1)]; tensor var_5850_strides_0 = const()[name = string("op_5850_strides_0"), val = tensor([1])]; tensor var_5850_pad_0 = const()[name = string("op_5850_pad_0"), val = tensor([0, 0])]; tensor var_5850_dilations_0 = const()[name = string("op_5850_dilations_0"), val = tensor([1])]; tensor var_5835 = transpose(perm = var_5834, x = attn_output_27)[name = string("transpose_295")]; tensor var_5850 = conv(dilations = var_5850_dilations_0, groups = var_5850_groups_0, pad = var_5850_pad_0, pad_type = var_5850_pad_type_0, strides = var_5850_strides_0, weight = squeeze_4_palettized, x = var_5835)[name = string("op_5850")]; tensor var_5854 = const()[name = string("op_5854"), val = tensor([0, 2, 1])]; int32 var_5860 = const()[name = string("op_5860"), val = int32(-1)]; fp16 const_154_promoted_to_fp16 = const()[name = string("const_154_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_273 = transpose(perm = var_5854, x = var_5850)[name = string("transpose_294")]; tensor var_5866_cast_fp16 = mul(x = x_273, y = const_154_promoted_to_fp16)[name = string("op_5866_cast_fp16")]; bool input_203_interleave_0 = const()[name = string("input_203_interleave_0"), val = bool(false)]; tensor input_203_cast_fp16 = concat(axis = var_5860, interleave = input_203_interleave_0, values = (x_273, var_5866_cast_fp16))[name = string("input_203_cast_fp16")]; tensor normed_265_axes_0 = const()[name = string("normed_265_axes_0"), val = tensor([-1])]; fp16 var_5858_to_fp16 = const()[name = string("op_5858_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_265_cast_fp16 = layer_norm(axes = normed_265_axes_0, epsilon = var_5858_to_fp16, x = input_203_cast_fp16)[name = string("normed_265_cast_fp16")]; tensor var_5871_split_sizes_0 = const()[name = string("op_5871_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_5871_axis_0 = const()[name = string("op_5871_axis_0"), val = int32(-1)]; tensor var_5871_cast_fp16_0, tensor var_5871_cast_fp16_1 = split(axis = var_5871_axis_0, split_sizes = var_5871_split_sizes_0, x = normed_265_cast_fp16)[name = string("op_5871_cast_fp16")]; tensor const_155_to_fp16 = const()[name = string("const_155_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2259864832)))]; tensor var_5874_cast_fp16 = mul(x = var_5871_cast_fp16_0, y = const_155_to_fp16)[name = string("op_5874_cast_fp16")]; tensor x_277_cast_fp16 = add(x = x_259_cast_fp16, y = var_5874_cast_fp16)[name = string("x_277_cast_fp16")]; int32 var_5881 = const()[name = string("op_5881"), val = int32(-1)]; fp16 const_156_promoted_to_fp16 = const()[name = string("const_156_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5887_cast_fp16 = mul(x = x_277_cast_fp16, y = const_156_promoted_to_fp16)[name = string("op_5887_cast_fp16")]; bool input_205_interleave_0 = const()[name = string("input_205_interleave_0"), val = bool(false)]; tensor input_205_cast_fp16 = concat(axis = var_5881, interleave = input_205_interleave_0, values = (x_277_cast_fp16, var_5887_cast_fp16))[name = string("input_205_cast_fp16")]; tensor normed_269_axes_0 = const()[name = string("normed_269_axes_0"), val = tensor([-1])]; fp16 var_5879_to_fp16 = const()[name = string("op_5879_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_269_cast_fp16 = layer_norm(axes = normed_269_axes_0, epsilon = var_5879_to_fp16, x = input_205_cast_fp16)[name = string("normed_269_cast_fp16")]; tensor var_5892_split_sizes_0 = const()[name = string("op_5892_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_5892_axis_0 = const()[name = string("op_5892_axis_0"), val = int32(-1)]; tensor var_5892_cast_fp16_0, tensor var_5892_cast_fp16_1 = split(axis = var_5892_axis_0, split_sizes = var_5892_split_sizes_0, x = normed_269_cast_fp16)[name = string("op_5892_cast_fp16")]; tensor const_157_to_fp16 = const()[name = string("const_157_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2259867968)))]; tensor var_5895_cast_fp16 = mul(x = var_5892_cast_fp16_0, y = const_157_to_fp16)[name = string("op_5895_cast_fp16")]; tensor var_5908 = const()[name = string("op_5908"), val = tensor([0, 2, 1])]; tensor input_207_axes_0 = const()[name = string("input_207_axes_0"), val = tensor([2])]; tensor var_5909 = transpose(perm = var_5908, x = var_5895_cast_fp16)[name = string("transpose_293")]; tensor input_207 = expand_dims(axes = input_207_axes_0, x = var_5909)[name = string("input_207")]; string gate_17_pad_type_0 = const()[name = string("gate_17_pad_type_0"), val = string("valid")]; tensor gate_17_strides_0 = const()[name = string("gate_17_strides_0"), val = tensor([1, 1])]; tensor gate_17_pad_0 = const()[name = string("gate_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_17_dilations_0 = const()[name = string("gate_17_dilations_0"), val = tensor([1, 1])]; int32 gate_17_groups_0 = const()[name = string("gate_17_groups_0"), val = int32(1)]; tensor gate_17 = conv(dilations = gate_17_dilations_0, groups = gate_17_groups_0, pad = gate_17_pad_0, pad_type = gate_17_pad_type_0, strides = gate_17_strides_0, weight = layers_4_mlp_gate_proj_weight_palettized, x = input_207)[name = string("gate_17")]; string up_9_pad_type_0 = const()[name = string("up_9_pad_type_0"), val = string("valid")]; tensor up_9_strides_0 = const()[name = string("up_9_strides_0"), val = tensor([1, 1])]; tensor up_9_pad_0 = const()[name = string("up_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_9_dilations_0 = const()[name = string("up_9_dilations_0"), val = tensor([1, 1])]; int32 up_9_groups_0 = const()[name = string("up_9_groups_0"), val = int32(1)]; tensor up_9 = conv(dilations = up_9_dilations_0, groups = up_9_groups_0, pad = up_9_pad_0, pad_type = up_9_pad_type_0, strides = up_9_strides_0, weight = layers_4_mlp_up_proj_weight_palettized, x = input_207)[name = string("up_9")]; string gate_19_mode_0 = const()[name = string("gate_19_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_19 = gelu(mode = gate_19_mode_0, x = gate_17)[name = string("gate_19")]; tensor input_209 = mul(x = gate_19, y = up_9)[name = string("input_209")]; string mlp_out_9_pad_type_0 = const()[name = string("mlp_out_9_pad_type_0"), val = string("valid")]; tensor mlp_out_9_strides_0 = const()[name = string("mlp_out_9_strides_0"), val = tensor([1, 1])]; tensor mlp_out_9_pad_0 = const()[name = string("mlp_out_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_9_dilations_0 = const()[name = string("mlp_out_9_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_9_groups_0 = const()[name = string("mlp_out_9_groups_0"), val = int32(1)]; tensor mlp_out_9 = conv(dilations = mlp_out_9_dilations_0, groups = mlp_out_9_groups_0, pad = mlp_out_9_pad_0, pad_type = mlp_out_9_pad_type_0, strides = mlp_out_9_strides_0, weight = layers_4_mlp_down_proj_weight_palettized, x = input_209)[name = string("mlp_out_9")]; tensor var_5949_axes_0 = const()[name = string("op_5949_axes_0"), val = tensor([2])]; tensor var_5949 = squeeze(axes = var_5949_axes_0, x = mlp_out_9)[name = string("op_5949")]; tensor var_5953 = const()[name = string("op_5953"), val = tensor([0, 2, 1])]; int32 var_5959 = const()[name = string("op_5959"), val = int32(-1)]; fp16 const_158_promoted_to_fp16 = const()[name = string("const_158_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_281 = transpose(perm = var_5953, x = var_5949)[name = string("transpose_292")]; tensor var_5965_cast_fp16 = mul(x = x_281, y = const_158_promoted_to_fp16)[name = string("op_5965_cast_fp16")]; bool input_211_interleave_0 = const()[name = string("input_211_interleave_0"), val = bool(false)]; tensor input_211_cast_fp16 = concat(axis = var_5959, interleave = input_211_interleave_0, values = (x_281, var_5965_cast_fp16))[name = string("input_211_cast_fp16")]; tensor normed_273_axes_0 = const()[name = string("normed_273_axes_0"), val = tensor([-1])]; fp16 var_5957_to_fp16 = const()[name = string("op_5957_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_273_cast_fp16 = layer_norm(axes = normed_273_axes_0, epsilon = var_5957_to_fp16, x = input_211_cast_fp16)[name = string("normed_273_cast_fp16")]; tensor var_5970_split_sizes_0 = const()[name = string("op_5970_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_5970_axis_0 = const()[name = string("op_5970_axis_0"), val = int32(-1)]; tensor var_5970_cast_fp16_0, tensor var_5970_cast_fp16_1 = split(axis = var_5970_axis_0, split_sizes = var_5970_split_sizes_0, x = normed_273_cast_fp16)[name = string("op_5970_cast_fp16")]; tensor const_159_to_fp16 = const()[name = string("const_159_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2259871104)))]; tensor var_5973_cast_fp16 = mul(x = var_5970_cast_fp16_0, y = const_159_to_fp16)[name = string("op_5973_cast_fp16")]; tensor hidden_states_61_cast_fp16 = add(x = x_277_cast_fp16, y = var_5973_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; tensor per_layer_slice_9_begin_0 = const()[name = string("per_layer_slice_9_begin_0"), val = tensor([0, 0, 1024])]; tensor per_layer_slice_9_end_0 = const()[name = string("per_layer_slice_9_end_0"), val = tensor([1, 1, 1280])]; tensor per_layer_slice_9_end_mask_0 = const()[name = string("per_layer_slice_9_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_9 = slice_by_index(begin = per_layer_slice_9_begin_0, end = per_layer_slice_9_end_0, end_mask = per_layer_slice_9_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_9")]; tensor gated_17 = linear(bias = linear_1_bias_0, weight = layers_4_per_layer_input_gate_weight_palettized, x = hidden_states_61_cast_fp16)[name = string("linear_9")]; string gated_19_mode_0 = const()[name = string("gated_19_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_19 = gelu(mode = gated_19_mode_0, x = gated_17)[name = string("gated_19")]; tensor input_215 = mul(x = gated_19, y = per_layer_slice_9)[name = string("input_215")]; tensor x_285 = linear(bias = linear_2_bias_0, weight = layers_4_per_layer_projection_weight_palettized, x = input_215)[name = string("linear_10")]; int32 var_6010 = const()[name = string("op_6010"), val = int32(-1)]; fp16 const_160_promoted_to_fp16 = const()[name = string("const_160_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6016_cast_fp16 = mul(x = x_285, y = const_160_promoted_to_fp16)[name = string("op_6016_cast_fp16")]; bool input_217_interleave_0 = const()[name = string("input_217_interleave_0"), val = bool(false)]; tensor input_217_cast_fp16 = concat(axis = var_6010, interleave = input_217_interleave_0, values = (x_285, var_6016_cast_fp16))[name = string("input_217_cast_fp16")]; tensor normed_277_axes_0 = const()[name = string("normed_277_axes_0"), val = tensor([-1])]; fp16 var_6008_to_fp16 = const()[name = string("op_6008_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_277_cast_fp16 = layer_norm(axes = normed_277_axes_0, epsilon = var_6008_to_fp16, x = input_217_cast_fp16)[name = string("normed_277_cast_fp16")]; tensor var_6021_split_sizes_0 = const()[name = string("op_6021_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_6021_axis_0 = const()[name = string("op_6021_axis_0"), val = int32(-1)]; tensor var_6021_cast_fp16_0, tensor var_6021_cast_fp16_1 = split(axis = var_6021_axis_0, split_sizes = var_6021_split_sizes_0, x = normed_277_cast_fp16)[name = string("op_6021_cast_fp16")]; tensor const_161_to_fp16 = const()[name = string("const_161_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2259874240)))]; tensor var_6024_cast_fp16 = mul(x = var_6021_cast_fp16_0, y = const_161_to_fp16)[name = string("op_6024_cast_fp16")]; tensor hidden_states_65_cast_fp16 = add(x = hidden_states_61_cast_fp16, y = var_6024_cast_fp16)[name = string("hidden_states_65_cast_fp16")]; tensor layers_4_layer_scalar_to_fp16 = const()[name = string("layers_4_layer_scalar_to_fp16"), val = tensor([0x1.fep-2])]; tensor x_289_cast_fp16 = mul(x = hidden_states_65_cast_fp16, y = layers_4_layer_scalar_to_fp16)[name = string("x_289_cast_fp16")]; int32 var_6032 = const()[name = string("op_6032"), val = int32(-1)]; fp16 const_162_promoted_to_fp16 = const()[name = string("const_162_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6038_cast_fp16 = mul(x = x_289_cast_fp16, y = const_162_promoted_to_fp16)[name = string("op_6038_cast_fp16")]; bool input_219_interleave_0 = const()[name = string("input_219_interleave_0"), val = bool(false)]; tensor input_219_cast_fp16 = concat(axis = var_6032, interleave = input_219_interleave_0, values = (x_289_cast_fp16, var_6038_cast_fp16))[name = string("input_219_cast_fp16")]; tensor normed_281_axes_0 = const()[name = string("normed_281_axes_0"), val = tensor([-1])]; fp16 var_6030_to_fp16 = const()[name = string("op_6030_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_281_cast_fp16 = layer_norm(axes = normed_281_axes_0, epsilon = var_6030_to_fp16, x = input_219_cast_fp16)[name = string("normed_281_cast_fp16")]; tensor var_6043_split_sizes_0 = const()[name = string("op_6043_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_6043_axis_0 = const()[name = string("op_6043_axis_0"), val = int32(-1)]; tensor var_6043_cast_fp16_0, tensor var_6043_cast_fp16_1 = split(axis = var_6043_axis_0, split_sizes = var_6043_split_sizes_0, x = normed_281_cast_fp16)[name = string("op_6043_cast_fp16")]; tensor const_163_to_fp16 = const()[name = string("const_163_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2259877376)))]; tensor var_6046_cast_fp16 = mul(x = var_6043_cast_fp16_0, y = const_163_to_fp16)[name = string("op_6046_cast_fp16")]; tensor var_6054 = const()[name = string("op_6054"), val = tensor([0, 2, 1])]; tensor var_6057_axes_0 = const()[name = string("op_6057_axes_0"), val = tensor([2])]; tensor var_6055_cast_fp16 = transpose(perm = var_6054, x = var_6046_cast_fp16)[name = string("transpose_291")]; tensor var_6057_cast_fp16 = expand_dims(axes = var_6057_axes_0, x = var_6055_cast_fp16)[name = string("op_6057_cast_fp16")]; string var_6073_pad_type_0 = const()[name = string("op_6073_pad_type_0"), val = string("valid")]; tensor var_6073_strides_0 = const()[name = string("op_6073_strides_0"), val = tensor([1, 1])]; tensor var_6073_pad_0 = const()[name = string("op_6073_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6073_dilations_0 = const()[name = string("op_6073_dilations_0"), val = tensor([1, 1])]; int32 var_6073_groups_0 = const()[name = string("op_6073_groups_0"), val = int32(1)]; tensor var_6073 = conv(dilations = var_6073_dilations_0, groups = var_6073_groups_0, pad = var_6073_pad_0, pad_type = var_6073_pad_type_0, strides = var_6073_strides_0, weight = layers_5_self_attn_q_proj_weight_palettized, x = var_6057_cast_fp16)[name = string("op_6073")]; tensor var_6078 = const()[name = string("op_6078"), val = tensor([1, 8, 256, 1])]; tensor var_6079 = reshape(shape = var_6078, x = var_6073)[name = string("op_6079")]; tensor var_6084 = const()[name = string("op_6084"), val = tensor([0, 1, 3, 2])]; tensor var_6094 = const()[name = string("op_6094"), val = tensor([1, 8, 256])]; tensor var_6085 = transpose(perm = var_6084, x = var_6079)[name = string("transpose_290")]; tensor x_293 = reshape(shape = var_6094, x = var_6085)[name = string("x_293")]; int32 var_6100 = const()[name = string("op_6100"), val = int32(-1)]; fp16 const_164_promoted_to_fp16 = const()[name = string("const_164_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6106_cast_fp16 = mul(x = x_293, y = const_164_promoted_to_fp16)[name = string("op_6106_cast_fp16")]; bool input_223_interleave_0 = const()[name = string("input_223_interleave_0"), val = bool(false)]; tensor input_223_cast_fp16 = concat(axis = var_6100, interleave = input_223_interleave_0, values = (x_293, var_6106_cast_fp16))[name = string("input_223_cast_fp16")]; tensor normed_285_axes_0 = const()[name = string("normed_285_axes_0"), val = tensor([-1])]; fp16 var_6098_to_fp16 = const()[name = string("op_6098_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_285_cast_fp16 = layer_norm(axes = normed_285_axes_0, epsilon = var_6098_to_fp16, x = input_223_cast_fp16)[name = string("normed_285_cast_fp16")]; tensor var_6111_split_sizes_0 = const()[name = string("op_6111_split_sizes_0"), val = tensor([256, 256])]; int32 var_6111_axis_0 = const()[name = string("op_6111_axis_0"), val = int32(-1)]; tensor var_6111_cast_fp16_0, tensor var_6111_cast_fp16_1 = split(axis = var_6111_axis_0, split_sizes = var_6111_split_sizes_0, x = normed_285_cast_fp16)[name = string("op_6111_cast_fp16")]; tensor const_165_to_fp16 = const()[name = string("const_165_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2259880512)))]; tensor var_6114_cast_fp16 = mul(x = var_6111_cast_fp16_0, y = const_165_to_fp16)[name = string("op_6114_cast_fp16")]; tensor var_6120 = const()[name = string("op_6120"), val = tensor([1, 8, 1, 256])]; tensor q_43 = reshape(shape = var_6120, x = var_6114_cast_fp16)[name = string("q_43")]; tensor var_6122 = mul(x = q_43, y = cos_1)[name = string("op_6122")]; tensor var_6123_split_sizes_0 = const()[name = string("op_6123_split_sizes_0"), val = tensor([128, 128])]; int32 var_6123_axis_0 = const()[name = string("op_6123_axis_0"), val = int32(-1)]; tensor var_6123_0, tensor var_6123_1 = split(axis = var_6123_axis_0, split_sizes = var_6123_split_sizes_0, x = q_43)[name = string("op_6123")]; fp16 const_166_promoted = const()[name = string("const_166_promoted"), val = fp16(-0x1p+0)]; tensor var_6125 = mul(x = var_6123_1, y = const_166_promoted)[name = string("op_6125")]; int32 var_6127 = const()[name = string("op_6127"), val = int32(-1)]; bool var_6128_interleave_0 = const()[name = string("op_6128_interleave_0"), val = bool(false)]; tensor var_6128 = concat(axis = var_6127, interleave = var_6128_interleave_0, values = (var_6125, var_6123_0))[name = string("op_6128")]; tensor var_6129 = mul(x = var_6128, y = sin_1)[name = string("op_6129")]; tensor q_47 = add(x = var_6122, y = var_6129)[name = string("q_47")]; string var_6142_pad_type_0 = const()[name = string("op_6142_pad_type_0"), val = string("valid")]; tensor var_6142_strides_0 = const()[name = string("op_6142_strides_0"), val = tensor([1, 1])]; tensor var_6142_pad_0 = const()[name = string("op_6142_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6142_dilations_0 = const()[name = string("op_6142_dilations_0"), val = tensor([1, 1])]; int32 var_6142_groups_0 = const()[name = string("op_6142_groups_0"), val = int32(1)]; tensor var_6142 = conv(dilations = var_6142_dilations_0, groups = var_6142_groups_0, pad = var_6142_pad_0, pad_type = var_6142_pad_type_0, strides = var_6142_strides_0, weight = layers_5_self_attn_k_proj_weight_palettized, x = var_6057_cast_fp16)[name = string("op_6142")]; tensor var_6147 = const()[name = string("op_6147"), val = tensor([1, 1, 256, 1])]; tensor var_6148 = reshape(shape = var_6147, x = var_6142)[name = string("op_6148")]; tensor var_6153 = const()[name = string("op_6153"), val = tensor([0, 1, 3, 2])]; string var_6170_pad_type_0 = const()[name = string("op_6170_pad_type_0"), val = string("valid")]; tensor var_6170_strides_0 = const()[name = string("op_6170_strides_0"), val = tensor([1, 1])]; tensor var_6170_pad_0 = const()[name = string("op_6170_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6170_dilations_0 = const()[name = string("op_6170_dilations_0"), val = tensor([1, 1])]; int32 var_6170_groups_0 = const()[name = string("op_6170_groups_0"), val = int32(1)]; tensor var_6170 = conv(dilations = var_6170_dilations_0, groups = var_6170_groups_0, pad = var_6170_pad_0, pad_type = var_6170_pad_type_0, strides = var_6170_strides_0, weight = layers_5_self_attn_v_proj_weight_palettized, x = var_6057_cast_fp16)[name = string("op_6170")]; tensor var_6175 = const()[name = string("op_6175"), val = tensor([1, 1, 256, 1])]; tensor var_6176 = reshape(shape = var_6175, x = var_6170)[name = string("op_6176")]; tensor var_6181 = const()[name = string("op_6181"), val = tensor([0, 1, 3, 2])]; tensor var_6191 = const()[name = string("op_6191"), val = tensor([1, 1, 256])]; tensor var_6154 = transpose(perm = var_6153, x = var_6148)[name = string("transpose_289")]; tensor x_297 = reshape(shape = var_6191, x = var_6154)[name = string("x_297")]; int32 var_6197 = const()[name = string("op_6197"), val = int32(-1)]; fp16 const_167_promoted_to_fp16 = const()[name = string("const_167_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6203_cast_fp16 = mul(x = x_297, y = const_167_promoted_to_fp16)[name = string("op_6203_cast_fp16")]; bool input_225_interleave_0 = const()[name = string("input_225_interleave_0"), val = bool(false)]; tensor input_225_cast_fp16 = concat(axis = var_6197, interleave = input_225_interleave_0, values = (x_297, var_6203_cast_fp16))[name = string("input_225_cast_fp16")]; tensor normed_289_axes_0 = const()[name = string("normed_289_axes_0"), val = tensor([-1])]; fp16 var_6195_to_fp16 = const()[name = string("op_6195_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_289_cast_fp16 = layer_norm(axes = normed_289_axes_0, epsilon = var_6195_to_fp16, x = input_225_cast_fp16)[name = string("normed_289_cast_fp16")]; tensor var_6208_split_sizes_0 = const()[name = string("op_6208_split_sizes_0"), val = tensor([256, 256])]; int32 var_6208_axis_0 = const()[name = string("op_6208_axis_0"), val = int32(-1)]; tensor var_6208_cast_fp16_0, tensor var_6208_cast_fp16_1 = split(axis = var_6208_axis_0, split_sizes = var_6208_split_sizes_0, x = normed_289_cast_fp16)[name = string("op_6208_cast_fp16")]; tensor var_6211_cast_fp16 = mul(x = var_6208_cast_fp16_0, y = const_80_to_fp16)[name = string("op_6211_cast_fp16")]; tensor var_6217 = const()[name = string("op_6217"), val = tensor([1, 1, 1, 256])]; tensor q_45 = reshape(shape = var_6217, x = var_6211_cast_fp16)[name = string("q_45")]; fp16 var_6224_promoted_to_fp16 = const()[name = string("op_6224_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_6182 = transpose(perm = var_6181, x = var_6176)[name = string("transpose_288")]; tensor var_6225_cast_fp16 = pow(x = var_6182, y = var_6224_promoted_to_fp16)[name = string("op_6225_cast_fp16")]; tensor var_6230_axes_0 = const()[name = string("op_6230_axes_0"), val = tensor([-1])]; bool var_6230_keep_dims_0 = const()[name = string("op_6230_keep_dims_0"), val = bool(true)]; tensor var_6230_cast_fp16 = reduce_mean(axes = var_6230_axes_0, keep_dims = var_6230_keep_dims_0, x = var_6225_cast_fp16)[name = string("op_6230_cast_fp16")]; fp16 var_6232_to_fp16 = const()[name = string("op_6232_to_fp16"), val = fp16(0x1.1p-20)]; tensor mean_sq_11_cast_fp16 = add(x = var_6230_cast_fp16, y = var_6232_to_fp16)[name = string("mean_sq_11_cast_fp16")]; fp16 var_6239_to_fp16 = const()[name = string("op_6239_to_fp16"), val = fp16(-0x1p-1)]; tensor var_6240_cast_fp16 = pow(x = mean_sq_11_cast_fp16, y = var_6239_to_fp16)[name = string("op_6240_cast_fp16")]; tensor var_6241_cast_fp16 = mul(x = var_6182, y = var_6240_cast_fp16)[name = string("op_6241_cast_fp16")]; tensor var_6247 = mul(x = q_45, y = cos_1)[name = string("op_6247")]; tensor var_6248_split_sizes_0 = const()[name = string("op_6248_split_sizes_0"), val = tensor([128, 128])]; int32 var_6248_axis_0 = const()[name = string("op_6248_axis_0"), val = int32(-1)]; tensor var_6248_0, tensor var_6248_1 = split(axis = var_6248_axis_0, split_sizes = var_6248_split_sizes_0, x = q_45)[name = string("op_6248")]; fp16 const_169_promoted = const()[name = string("const_169_promoted"), val = fp16(-0x1p+0)]; tensor var_6250 = mul(x = var_6248_1, y = const_169_promoted)[name = string("op_6250")]; int32 var_6252 = const()[name = string("op_6252"), val = int32(-1)]; bool var_6253_interleave_0 = const()[name = string("op_6253_interleave_0"), val = bool(false)]; tensor var_6253 = concat(axis = var_6252, interleave = var_6253_interleave_0, values = (var_6250, var_6248_0))[name = string("op_6253")]; tensor var_6254 = mul(x = var_6253, y = sin_1)[name = string("op_6254")]; tensor input_227 = add(x = var_6247, y = var_6254)[name = string("input_227")]; tensor var_6259_begin_0 = const()[name = string("op_6259_begin_0"), val = tensor([5, 0, 0, 0])]; tensor var_6259_end_0 = const()[name = string("op_6259_end_0"), val = tensor([6, 1, 512, 512])]; tensor var_6259_end_mask_0 = const()[name = string("op_6259_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6259_squeeze_mask_0 = const()[name = string("op_6259_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_6259_cast_fp16 = slice_by_index(begin = var_6259_begin_0, end = var_6259_end_0, end_mask = var_6259_end_mask_0, squeeze_mask = var_6259_squeeze_mask_0, x = coreml_update_state_39)[name = string("op_6259_cast_fp16")]; tensor K_cache_11_axes_0 = const()[name = string("K_cache_11_axes_0"), val = tensor([0])]; tensor K_cache_11_cast_fp16 = expand_dims(axes = K_cache_11_axes_0, x = var_6259_cast_fp16)[name = string("K_cache_11_cast_fp16")]; tensor var_6264_begin_0 = const()[name = string("op_6264_begin_0"), val = tensor([40, 0, 0, 0])]; tensor var_6264_end_0 = const()[name = string("op_6264_end_0"), val = tensor([41, 1, 512, 512])]; tensor var_6264_end_mask_0 = const()[name = string("op_6264_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6264_squeeze_mask_0 = const()[name = string("op_6264_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_6264_cast_fp16 = slice_by_index(begin = var_6264_begin_0, end = var_6264_end_0, end_mask = var_6264_end_mask_0, squeeze_mask = var_6264_squeeze_mask_0, x = coreml_update_state_39)[name = string("op_6264_cast_fp16")]; tensor V_cache_11_axes_0 = const()[name = string("V_cache_11_axes_0"), val = tensor([0])]; tensor V_cache_11_cast_fp16 = expand_dims(axes = V_cache_11_axes_0, x = var_6264_cast_fp16)[name = string("V_cache_11_cast_fp16")]; tensor k_padded_9_pad_0 = const()[name = string("k_padded_9_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; string k_padded_9_mode_0 = const()[name = string("k_padded_9_mode_0"), val = string("constant")]; fp16 const_170_to_fp16 = const()[name = string("const_170_to_fp16"), val = fp16(0x0p+0)]; tensor k_padded_9_cast_fp16 = pad(constant_val = const_170_to_fp16, mode = k_padded_9_mode_0, pad = k_padded_9_pad_0, x = input_227)[name = string("k_padded_9_cast_fp16")]; tensor v_padded_9_pad_0 = const()[name = string("v_padded_9_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; string v_padded_9_mode_0 = const()[name = string("v_padded_9_mode_0"), val = string("constant")]; fp16 const_171_to_fp16 = const()[name = string("const_171_to_fp16"), val = fp16(0x0p+0)]; tensor v_padded_9_cast_fp16 = pad(constant_val = const_171_to_fp16, mode = v_padded_9_mode_0, pad = v_padded_9_pad_0, x = var_6241_cast_fp16)[name = string("v_padded_9_cast_fp16")]; tensor var_6282_cast_fp16 = mul(x = K_cache_11_cast_fp16, y = var_3515_cast_fp16)[name = string("op_6282_cast_fp16")]; tensor var_6283_reps_0 = const()[name = string("op_6283_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_6283_cast_fp16 = tile(reps = var_6283_reps_0, x = k_padded_9_cast_fp16)[name = string("op_6283_cast_fp16")]; tensor var_6284_cast_fp16 = mul(x = var_6283_cast_fp16, y = update_mask)[name = string("op_6284_cast_fp16")]; tensor K_new_11_cast_fp16 = add(x = var_6282_cast_fp16, y = var_6284_cast_fp16)[name = string("K_new_11_cast_fp16")]; tensor var_6290_cast_fp16 = mul(x = V_cache_11_cast_fp16, y = var_3515_cast_fp16)[name = string("op_6290_cast_fp16")]; tensor var_6291_reps_0 = const()[name = string("op_6291_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_6291_cast_fp16 = tile(reps = var_6291_reps_0, x = v_padded_9_cast_fp16)[name = string("op_6291_cast_fp16")]; tensor var_6292_cast_fp16 = mul(x = var_6291_cast_fp16, y = update_mask)[name = string("op_6292_cast_fp16")]; tensor V_new_11_cast_fp16 = add(x = var_6290_cast_fp16, y = var_6292_cast_fp16)[name = string("V_new_11_cast_fp16")]; tensor var_6296_axes_0 = const()[name = string("op_6296_axes_0"), val = tensor([0])]; tensor var_6296_cast_fp16 = squeeze(axes = var_6296_axes_0, x = K_new_11_cast_fp16)[name = string("op_6296_cast_fp16")]; tensor concat_40 = const()[name = string("concat_40"), val = tensor([5, 0, 0, 0])]; tensor concat_41 = const()[name = string("concat_41"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_40, begin_mask = kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_41, end_mask = kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_11_stride_0, update = var_6296_cast_fp16, x = coreml_update_state_39)[name = string("kv_cache_0_internal_tensor_assign_11_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_11_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_40_write_state")]; tensor coreml_update_state_40 = read_state(input = kv_cache_0)[name = string("coreml_update_state_40")]; tensor var_6303_axes_0 = const()[name = string("op_6303_axes_0"), val = tensor([0])]; tensor var_6303_cast_fp16 = squeeze(axes = var_6303_axes_0, x = V_new_11_cast_fp16)[name = string("op_6303_cast_fp16")]; tensor concat_42 = const()[name = string("concat_42"), val = tensor([40, 0, 0, 0])]; tensor concat_43 = const()[name = string("concat_43"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_42, begin_mask = kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_43, end_mask = kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_12_stride_0, update = var_6303_cast_fp16, x = coreml_update_state_40)[name = string("kv_cache_0_internal_tensor_assign_12_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_12_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_41_write_state")]; tensor coreml_update_state_41 = read_state(input = kv_cache_0)[name = string("coreml_update_state_41")]; tensor K_for_attn_11_begin_0 = const()[name = string("K_for_attn_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor K_for_attn_11_end_0 = const()[name = string("K_for_attn_11_end_0"), val = tensor([1, 1, 512, 256])]; tensor K_for_attn_11_end_mask_0 = const()[name = string("K_for_attn_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor K_for_attn_11_cast_fp16 = slice_by_index(begin = K_for_attn_11_begin_0, end = K_for_attn_11_end_0, end_mask = K_for_attn_11_end_mask_0, x = K_new_11_cast_fp16)[name = string("K_for_attn_11_cast_fp16")]; tensor V_for_attn_11_begin_0 = const()[name = string("V_for_attn_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor V_for_attn_11_end_0 = const()[name = string("V_for_attn_11_end_0"), val = tensor([1, 1, 512, 256])]; tensor V_for_attn_11_end_mask_0 = const()[name = string("V_for_attn_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor V_for_attn_11_cast_fp16 = slice_by_index(begin = V_for_attn_11_begin_0, end = V_for_attn_11_end_0, end_mask = V_for_attn_11_end_mask_0, x = V_new_11_cast_fp16)[name = string("V_for_attn_11_cast_fp16")]; tensor transpose_20_perm_0 = const()[name = string("transpose_20_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_10_reps_0 = const()[name = string("tile_10_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_20_cast_fp16 = transpose(perm = transpose_20_perm_0, x = K_for_attn_11_cast_fp16)[name = string("transpose_287")]; tensor tile_10_cast_fp16 = tile(reps = tile_10_reps_0, x = transpose_20_cast_fp16)[name = string("tile_10_cast_fp16")]; tensor concat_44 = const()[name = string("concat_44"), val = tensor([8, 1, 1, 512, 256])]; tensor reshape_20_cast_fp16 = reshape(shape = concat_44, x = tile_10_cast_fp16)[name = string("reshape_20_cast_fp16")]; tensor transpose_21_perm_0 = const()[name = string("transpose_21_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_45 = const()[name = string("concat_45"), val = tensor([-1, 1, 512, 256])]; tensor transpose_21_cast_fp16 = transpose(perm = transpose_21_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_286")]; tensor reshape_21_cast_fp16 = reshape(shape = concat_45, x = transpose_21_cast_fp16)[name = string("reshape_21_cast_fp16")]; tensor transpose_145_perm_0 = const()[name = string("transpose_145_perm_0"), val = tensor([1, 0, -1, -2])]; tensor transpose_22_perm_0 = const()[name = string("transpose_22_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_11_reps_0 = const()[name = string("tile_11_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_22_cast_fp16 = transpose(perm = transpose_22_perm_0, x = V_for_attn_11_cast_fp16)[name = string("transpose_285")]; tensor tile_11_cast_fp16 = tile(reps = tile_11_reps_0, x = transpose_22_cast_fp16)[name = string("tile_11_cast_fp16")]; tensor concat_46 = const()[name = string("concat_46"), val = tensor([8, 1, 1, 512, 256])]; tensor reshape_22_cast_fp16 = reshape(shape = concat_46, x = tile_11_cast_fp16)[name = string("reshape_22_cast_fp16")]; tensor transpose_23_perm_0 = const()[name = string("transpose_23_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_47 = const()[name = string("concat_47"), val = tensor([-1, 1, 512, 256])]; tensor transpose_23_cast_fp16 = transpose(perm = transpose_23_perm_0, x = reshape_22_cast_fp16)[name = string("transpose_284")]; tensor reshape_23_cast_fp16 = reshape(shape = concat_47, x = transpose_23_cast_fp16)[name = string("reshape_23_cast_fp16")]; tensor V_expanded_11_perm_0 = const()[name = string("V_expanded_11_perm_0"), val = tensor([1, 0, -2, -1])]; bool var_6340_transpose_x_0 = const()[name = string("op_6340_transpose_x_0"), val = bool(false)]; bool var_6340_transpose_y_0 = const()[name = string("op_6340_transpose_y_0"), val = bool(false)]; tensor transpose_145_cast_fp16 = transpose(perm = transpose_145_perm_0, x = reshape_21_cast_fp16)[name = string("transpose_283")]; tensor var_6340_cast_fp16 = matmul(transpose_x = var_6340_transpose_x_0, transpose_y = var_6340_transpose_y_0, x = q_47, y = transpose_145_cast_fp16)[name = string("op_6340_cast_fp16")]; tensor attn_weights_33_cast_fp16 = add(x = var_6340_cast_fp16, y = causal_mask)[name = string("attn_weights_33_cast_fp16")]; int32 var_6350 = const()[name = string("op_6350"), val = int32(-1)]; tensor var_6352_cast_fp16 = softmax(axis = var_6350, x = attn_weights_33_cast_fp16)[name = string("op_6352_cast_fp16")]; bool var_6368_transpose_x_0 = const()[name = string("op_6368_transpose_x_0"), val = bool(false)]; bool var_6368_transpose_y_0 = const()[name = string("op_6368_transpose_y_0"), val = bool(false)]; tensor V_expanded_11_cast_fp16 = transpose(perm = V_expanded_11_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_282")]; tensor var_6368_cast_fp16 = matmul(transpose_x = var_6368_transpose_x_0, transpose_y = var_6368_transpose_y_0, x = var_6352_cast_fp16, y = V_expanded_11_cast_fp16)[name = string("op_6368_cast_fp16")]; tensor var_6378 = const()[name = string("op_6378"), val = tensor([0, 2, 1, 3])]; tensor var_6385 = const()[name = string("op_6385"), val = tensor([1, 1, -1])]; tensor var_6379 = transpose(perm = var_6378, x = var_6368_cast_fp16)[name = string("transpose_281")]; tensor attn_output_33 = reshape(shape = var_6385, x = var_6379)[name = string("attn_output_33")]; tensor var_6390 = const()[name = string("op_6390"), val = tensor([0, 2, 1])]; tensor squeeze_5_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2259881088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2261454016))))[name = string("squeeze_5_palettized")]; string var_6406_pad_type_0 = const()[name = string("op_6406_pad_type_0"), val = string("valid")]; int32 var_6406_groups_0 = const()[name = string("op_6406_groups_0"), val = int32(1)]; tensor var_6406_strides_0 = const()[name = string("op_6406_strides_0"), val = tensor([1])]; tensor var_6406_pad_0 = const()[name = string("op_6406_pad_0"), val = tensor([0, 0])]; tensor var_6406_dilations_0 = const()[name = string("op_6406_dilations_0"), val = tensor([1])]; tensor var_6391 = transpose(perm = var_6390, x = attn_output_33)[name = string("transpose_280")]; tensor var_6406 = conv(dilations = var_6406_dilations_0, groups = var_6406_groups_0, pad = var_6406_pad_0, pad_type = var_6406_pad_type_0, strides = var_6406_strides_0, weight = squeeze_5_palettized, x = var_6391)[name = string("op_6406")]; tensor var_6410 = const()[name = string("op_6410"), val = tensor([0, 2, 1])]; int32 var_6416 = const()[name = string("op_6416"), val = int32(-1)]; fp16 const_172_promoted_to_fp16 = const()[name = string("const_172_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_303 = transpose(perm = var_6410, x = var_6406)[name = string("transpose_279")]; tensor var_6422_cast_fp16 = mul(x = x_303, y = const_172_promoted_to_fp16)[name = string("op_6422_cast_fp16")]; bool input_233_interleave_0 = const()[name = string("input_233_interleave_0"), val = bool(false)]; tensor input_233_cast_fp16 = concat(axis = var_6416, interleave = input_233_interleave_0, values = (x_303, var_6422_cast_fp16))[name = string("input_233_cast_fp16")]; tensor normed_293_axes_0 = const()[name = string("normed_293_axes_0"), val = tensor([-1])]; fp16 var_6414_to_fp16 = const()[name = string("op_6414_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_293_cast_fp16 = layer_norm(axes = normed_293_axes_0, epsilon = var_6414_to_fp16, x = input_233_cast_fp16)[name = string("normed_293_cast_fp16")]; tensor var_6427_split_sizes_0 = const()[name = string("op_6427_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_6427_axis_0 = const()[name = string("op_6427_axis_0"), val = int32(-1)]; tensor var_6427_cast_fp16_0, tensor var_6427_cast_fp16_1 = split(axis = var_6427_axis_0, split_sizes = var_6427_split_sizes_0, x = normed_293_cast_fp16)[name = string("op_6427_cast_fp16")]; tensor const_173_to_fp16 = const()[name = string("const_173_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2261455616)))]; tensor var_6430_cast_fp16 = mul(x = var_6427_cast_fp16_0, y = const_173_to_fp16)[name = string("op_6430_cast_fp16")]; tensor x_307_cast_fp16 = add(x = x_289_cast_fp16, y = var_6430_cast_fp16)[name = string("x_307_cast_fp16")]; int32 var_6437 = const()[name = string("op_6437"), val = int32(-1)]; fp16 const_174_promoted_to_fp16 = const()[name = string("const_174_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6443_cast_fp16 = mul(x = x_307_cast_fp16, y = const_174_promoted_to_fp16)[name = string("op_6443_cast_fp16")]; bool input_235_interleave_0 = const()[name = string("input_235_interleave_0"), val = bool(false)]; tensor input_235_cast_fp16 = concat(axis = var_6437, interleave = input_235_interleave_0, values = (x_307_cast_fp16, var_6443_cast_fp16))[name = string("input_235_cast_fp16")]; tensor normed_297_axes_0 = const()[name = string("normed_297_axes_0"), val = tensor([-1])]; fp16 var_6435_to_fp16 = const()[name = string("op_6435_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_297_cast_fp16 = layer_norm(axes = normed_297_axes_0, epsilon = var_6435_to_fp16, x = input_235_cast_fp16)[name = string("normed_297_cast_fp16")]; tensor var_6448_split_sizes_0 = const()[name = string("op_6448_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_6448_axis_0 = const()[name = string("op_6448_axis_0"), val = int32(-1)]; tensor var_6448_cast_fp16_0, tensor var_6448_cast_fp16_1 = split(axis = var_6448_axis_0, split_sizes = var_6448_split_sizes_0, x = normed_297_cast_fp16)[name = string("op_6448_cast_fp16")]; tensor const_175_to_fp16 = const()[name = string("const_175_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2261458752)))]; tensor var_6451_cast_fp16 = mul(x = var_6448_cast_fp16_0, y = const_175_to_fp16)[name = string("op_6451_cast_fp16")]; tensor var_6464 = const()[name = string("op_6464"), val = tensor([0, 2, 1])]; tensor input_237_axes_0 = const()[name = string("input_237_axes_0"), val = tensor([2])]; tensor var_6465 = transpose(perm = var_6464, x = var_6451_cast_fp16)[name = string("transpose_278")]; tensor input_237 = expand_dims(axes = input_237_axes_0, x = var_6465)[name = string("input_237")]; string gate_21_pad_type_0 = const()[name = string("gate_21_pad_type_0"), val = string("valid")]; tensor gate_21_strides_0 = const()[name = string("gate_21_strides_0"), val = tensor([1, 1])]; tensor gate_21_pad_0 = const()[name = string("gate_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_21_dilations_0 = const()[name = string("gate_21_dilations_0"), val = tensor([1, 1])]; int32 gate_21_groups_0 = const()[name = string("gate_21_groups_0"), val = int32(1)]; tensor gate_21 = conv(dilations = gate_21_dilations_0, groups = gate_21_groups_0, pad = gate_21_pad_0, pad_type = gate_21_pad_type_0, strides = gate_21_strides_0, weight = layers_5_mlp_gate_proj_weight_palettized, x = input_237)[name = string("gate_21")]; string up_11_pad_type_0 = const()[name = string("up_11_pad_type_0"), val = string("valid")]; tensor up_11_strides_0 = const()[name = string("up_11_strides_0"), val = tensor([1, 1])]; tensor up_11_pad_0 = const()[name = string("up_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_11_dilations_0 = const()[name = string("up_11_dilations_0"), val = tensor([1, 1])]; int32 up_11_groups_0 = const()[name = string("up_11_groups_0"), val = int32(1)]; tensor up_11 = conv(dilations = up_11_dilations_0, groups = up_11_groups_0, pad = up_11_pad_0, pad_type = up_11_pad_type_0, strides = up_11_strides_0, weight = layers_5_mlp_up_proj_weight_palettized, x = input_237)[name = string("up_11")]; string gate_23_mode_0 = const()[name = string("gate_23_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_23 = gelu(mode = gate_23_mode_0, x = gate_21)[name = string("gate_23")]; tensor input_239 = mul(x = gate_23, y = up_11)[name = string("input_239")]; string mlp_out_11_pad_type_0 = const()[name = string("mlp_out_11_pad_type_0"), val = string("valid")]; tensor mlp_out_11_strides_0 = const()[name = string("mlp_out_11_strides_0"), val = tensor([1, 1])]; tensor mlp_out_11_pad_0 = const()[name = string("mlp_out_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_11_dilations_0 = const()[name = string("mlp_out_11_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_11_groups_0 = const()[name = string("mlp_out_11_groups_0"), val = int32(1)]; tensor mlp_out_11 = conv(dilations = mlp_out_11_dilations_0, groups = mlp_out_11_groups_0, pad = mlp_out_11_pad_0, pad_type = mlp_out_11_pad_type_0, strides = mlp_out_11_strides_0, weight = layers_5_mlp_down_proj_weight_palettized, x = input_239)[name = string("mlp_out_11")]; tensor var_6505_axes_0 = const()[name = string("op_6505_axes_0"), val = tensor([2])]; tensor var_6505 = squeeze(axes = var_6505_axes_0, x = mlp_out_11)[name = string("op_6505")]; tensor var_6509 = const()[name = string("op_6509"), val = tensor([0, 2, 1])]; int32 var_6515 = const()[name = string("op_6515"), val = int32(-1)]; fp16 const_176_promoted_to_fp16 = const()[name = string("const_176_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_311 = transpose(perm = var_6509, x = var_6505)[name = string("transpose_277")]; tensor var_6521_cast_fp16 = mul(x = x_311, y = const_176_promoted_to_fp16)[name = string("op_6521_cast_fp16")]; bool input_241_interleave_0 = const()[name = string("input_241_interleave_0"), val = bool(false)]; tensor input_241_cast_fp16 = concat(axis = var_6515, interleave = input_241_interleave_0, values = (x_311, var_6521_cast_fp16))[name = string("input_241_cast_fp16")]; tensor normed_301_axes_0 = const()[name = string("normed_301_axes_0"), val = tensor([-1])]; fp16 var_6513_to_fp16 = const()[name = string("op_6513_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_301_cast_fp16 = layer_norm(axes = normed_301_axes_0, epsilon = var_6513_to_fp16, x = input_241_cast_fp16)[name = string("normed_301_cast_fp16")]; tensor var_6526_split_sizes_0 = const()[name = string("op_6526_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_6526_axis_0 = const()[name = string("op_6526_axis_0"), val = int32(-1)]; tensor var_6526_cast_fp16_0, tensor var_6526_cast_fp16_1 = split(axis = var_6526_axis_0, split_sizes = var_6526_split_sizes_0, x = normed_301_cast_fp16)[name = string("op_6526_cast_fp16")]; tensor const_177_to_fp16 = const()[name = string("const_177_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2261461888)))]; tensor var_6529_cast_fp16 = mul(x = var_6526_cast_fp16_0, y = const_177_to_fp16)[name = string("op_6529_cast_fp16")]; tensor hidden_states_73_cast_fp16 = add(x = x_307_cast_fp16, y = var_6529_cast_fp16)[name = string("hidden_states_73_cast_fp16")]; tensor per_layer_slice_11_begin_0 = const()[name = string("per_layer_slice_11_begin_0"), val = tensor([0, 0, 1280])]; tensor per_layer_slice_11_end_0 = const()[name = string("per_layer_slice_11_end_0"), val = tensor([1, 1, 1536])]; tensor per_layer_slice_11_end_mask_0 = const()[name = string("per_layer_slice_11_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_11 = slice_by_index(begin = per_layer_slice_11_begin_0, end = per_layer_slice_11_end_0, end_mask = per_layer_slice_11_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_11")]; tensor gated_21 = linear(bias = linear_1_bias_0, weight = layers_5_per_layer_input_gate_weight_palettized, x = hidden_states_73_cast_fp16)[name = string("linear_11")]; string gated_23_mode_0 = const()[name = string("gated_23_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_23 = gelu(mode = gated_23_mode_0, x = gated_21)[name = string("gated_23")]; tensor input_245 = mul(x = gated_23, y = per_layer_slice_11)[name = string("input_245")]; tensor x_315 = linear(bias = linear_2_bias_0, weight = layers_5_per_layer_projection_weight_palettized, x = input_245)[name = string("linear_12")]; int32 var_6566 = const()[name = string("op_6566"), val = int32(-1)]; fp16 const_178_promoted_to_fp16 = const()[name = string("const_178_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6572_cast_fp16 = mul(x = x_315, y = const_178_promoted_to_fp16)[name = string("op_6572_cast_fp16")]; bool input_247_interleave_0 = const()[name = string("input_247_interleave_0"), val = bool(false)]; tensor input_247_cast_fp16 = concat(axis = var_6566, interleave = input_247_interleave_0, values = (x_315, var_6572_cast_fp16))[name = string("input_247_cast_fp16")]; tensor normed_305_axes_0 = const()[name = string("normed_305_axes_0"), val = tensor([-1])]; fp16 var_6564_to_fp16 = const()[name = string("op_6564_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_305_cast_fp16 = layer_norm(axes = normed_305_axes_0, epsilon = var_6564_to_fp16, x = input_247_cast_fp16)[name = string("normed_305_cast_fp16")]; tensor var_6577_split_sizes_0 = const()[name = string("op_6577_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_6577_axis_0 = const()[name = string("op_6577_axis_0"), val = int32(-1)]; tensor var_6577_cast_fp16_0, tensor var_6577_cast_fp16_1 = split(axis = var_6577_axis_0, split_sizes = var_6577_split_sizes_0, x = normed_305_cast_fp16)[name = string("op_6577_cast_fp16")]; tensor const_179_to_fp16 = const()[name = string("const_179_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2261465024)))]; tensor var_6580_cast_fp16 = mul(x = var_6577_cast_fp16_0, y = const_179_to_fp16)[name = string("op_6580_cast_fp16")]; tensor hidden_states_77_cast_fp16 = add(x = hidden_states_73_cast_fp16, y = var_6580_cast_fp16)[name = string("hidden_states_77_cast_fp16")]; tensor layers_5_layer_scalar_to_fp16 = const()[name = string("layers_5_layer_scalar_to_fp16"), val = tensor([0x1.46p-1])]; tensor x_319_cast_fp16 = mul(x = hidden_states_77_cast_fp16, y = layers_5_layer_scalar_to_fp16)[name = string("x_319_cast_fp16")]; int32 var_6588 = const()[name = string("op_6588"), val = int32(-1)]; fp16 const_180_promoted_to_fp16 = const()[name = string("const_180_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6594_cast_fp16 = mul(x = x_319_cast_fp16, y = const_180_promoted_to_fp16)[name = string("op_6594_cast_fp16")]; bool input_249_interleave_0 = const()[name = string("input_249_interleave_0"), val = bool(false)]; tensor input_249_cast_fp16 = concat(axis = var_6588, interleave = input_249_interleave_0, values = (x_319_cast_fp16, var_6594_cast_fp16))[name = string("input_249_cast_fp16")]; tensor normed_309_axes_0 = const()[name = string("normed_309_axes_0"), val = tensor([-1])]; fp16 var_6586_to_fp16 = const()[name = string("op_6586_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_309_cast_fp16 = layer_norm(axes = normed_309_axes_0, epsilon = var_6586_to_fp16, x = input_249_cast_fp16)[name = string("normed_309_cast_fp16")]; tensor var_6599_split_sizes_0 = const()[name = string("op_6599_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_6599_axis_0 = const()[name = string("op_6599_axis_0"), val = int32(-1)]; tensor var_6599_cast_fp16_0, tensor var_6599_cast_fp16_1 = split(axis = var_6599_axis_0, split_sizes = var_6599_split_sizes_0, x = normed_309_cast_fp16)[name = string("op_6599_cast_fp16")]; tensor const_181_to_fp16 = const()[name = string("const_181_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2261468160)))]; tensor var_6602_cast_fp16 = mul(x = var_6599_cast_fp16_0, y = const_181_to_fp16)[name = string("op_6602_cast_fp16")]; tensor var_6610 = const()[name = string("op_6610"), val = tensor([0, 2, 1])]; tensor var_6613_axes_0 = const()[name = string("op_6613_axes_0"), val = tensor([2])]; tensor var_6611_cast_fp16 = transpose(perm = var_6610, x = var_6602_cast_fp16)[name = string("transpose_276")]; tensor var_6613_cast_fp16 = expand_dims(axes = var_6613_axes_0, x = var_6611_cast_fp16)[name = string("op_6613_cast_fp16")]; string var_6629_pad_type_0 = const()[name = string("op_6629_pad_type_0"), val = string("valid")]; tensor var_6629_strides_0 = const()[name = string("op_6629_strides_0"), val = tensor([1, 1])]; tensor var_6629_pad_0 = const()[name = string("op_6629_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6629_dilations_0 = const()[name = string("op_6629_dilations_0"), val = tensor([1, 1])]; int32 var_6629_groups_0 = const()[name = string("op_6629_groups_0"), val = int32(1)]; tensor var_6629 = conv(dilations = var_6629_dilations_0, groups = var_6629_groups_0, pad = var_6629_pad_0, pad_type = var_6629_pad_type_0, strides = var_6629_strides_0, weight = layers_6_self_attn_q_proj_weight_palettized, x = var_6613_cast_fp16)[name = string("op_6629")]; tensor var_6634 = const()[name = string("op_6634"), val = tensor([1, 8, 256, 1])]; tensor var_6635 = reshape(shape = var_6634, x = var_6629)[name = string("op_6635")]; tensor var_6640 = const()[name = string("op_6640"), val = tensor([0, 1, 3, 2])]; tensor var_6650 = const()[name = string("op_6650"), val = tensor([1, 8, 256])]; tensor var_6641 = transpose(perm = var_6640, x = var_6635)[name = string("transpose_275")]; tensor x_323 = reshape(shape = var_6650, x = var_6641)[name = string("x_323")]; int32 var_6656 = const()[name = string("op_6656"), val = int32(-1)]; fp16 const_182_promoted_to_fp16 = const()[name = string("const_182_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6662_cast_fp16 = mul(x = x_323, y = const_182_promoted_to_fp16)[name = string("op_6662_cast_fp16")]; bool input_253_interleave_0 = const()[name = string("input_253_interleave_0"), val = bool(false)]; tensor input_253_cast_fp16 = concat(axis = var_6656, interleave = input_253_interleave_0, values = (x_323, var_6662_cast_fp16))[name = string("input_253_cast_fp16")]; tensor normed_313_axes_0 = const()[name = string("normed_313_axes_0"), val = tensor([-1])]; fp16 var_6654_to_fp16 = const()[name = string("op_6654_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_313_cast_fp16 = layer_norm(axes = normed_313_axes_0, epsilon = var_6654_to_fp16, x = input_253_cast_fp16)[name = string("normed_313_cast_fp16")]; tensor var_6667_split_sizes_0 = const()[name = string("op_6667_split_sizes_0"), val = tensor([256, 256])]; int32 var_6667_axis_0 = const()[name = string("op_6667_axis_0"), val = int32(-1)]; tensor var_6667_cast_fp16_0, tensor var_6667_cast_fp16_1 = split(axis = var_6667_axis_0, split_sizes = var_6667_split_sizes_0, x = normed_313_cast_fp16)[name = string("op_6667_cast_fp16")]; tensor var_6670_cast_fp16 = mul(x = var_6667_cast_fp16_0, y = const_95_to_fp16)[name = string("op_6670_cast_fp16")]; tensor var_6676 = const()[name = string("op_6676"), val = tensor([1, 8, 1, 256])]; tensor q_51 = reshape(shape = var_6676, x = var_6670_cast_fp16)[name = string("q_51")]; tensor var_6678 = mul(x = q_51, y = cos_1)[name = string("op_6678")]; tensor var_6679_split_sizes_0 = const()[name = string("op_6679_split_sizes_0"), val = tensor([128, 128])]; int32 var_6679_axis_0 = const()[name = string("op_6679_axis_0"), val = int32(-1)]; tensor var_6679_0, tensor var_6679_1 = split(axis = var_6679_axis_0, split_sizes = var_6679_split_sizes_0, x = q_51)[name = string("op_6679")]; fp16 const_184_promoted = const()[name = string("const_184_promoted"), val = fp16(-0x1p+0)]; tensor var_6681 = mul(x = var_6679_1, y = const_184_promoted)[name = string("op_6681")]; int32 var_6683 = const()[name = string("op_6683"), val = int32(-1)]; bool var_6684_interleave_0 = const()[name = string("op_6684_interleave_0"), val = bool(false)]; tensor var_6684 = concat(axis = var_6683, interleave = var_6684_interleave_0, values = (var_6681, var_6679_0))[name = string("op_6684")]; tensor var_6685 = mul(x = var_6684, y = sin_1)[name = string("op_6685")]; tensor q_55 = add(x = var_6678, y = var_6685)[name = string("q_55")]; string var_6698_pad_type_0 = const()[name = string("op_6698_pad_type_0"), val = string("valid")]; tensor var_6698_strides_0 = const()[name = string("op_6698_strides_0"), val = tensor([1, 1])]; tensor var_6698_pad_0 = const()[name = string("op_6698_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6698_dilations_0 = const()[name = string("op_6698_dilations_0"), val = tensor([1, 1])]; int32 var_6698_groups_0 = const()[name = string("op_6698_groups_0"), val = int32(1)]; tensor var_6698 = conv(dilations = var_6698_dilations_0, groups = var_6698_groups_0, pad = var_6698_pad_0, pad_type = var_6698_pad_type_0, strides = var_6698_strides_0, weight = layers_6_self_attn_k_proj_weight_palettized, x = var_6613_cast_fp16)[name = string("op_6698")]; tensor var_6703 = const()[name = string("op_6703"), val = tensor([1, 1, 256, 1])]; tensor var_6704 = reshape(shape = var_6703, x = var_6698)[name = string("op_6704")]; tensor var_6709 = const()[name = string("op_6709"), val = tensor([0, 1, 3, 2])]; string var_6726_pad_type_0 = const()[name = string("op_6726_pad_type_0"), val = string("valid")]; tensor var_6726_strides_0 = const()[name = string("op_6726_strides_0"), val = tensor([1, 1])]; tensor var_6726_pad_0 = const()[name = string("op_6726_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6726_dilations_0 = const()[name = string("op_6726_dilations_0"), val = tensor([1, 1])]; int32 var_6726_groups_0 = const()[name = string("op_6726_groups_0"), val = int32(1)]; tensor var_6726 = conv(dilations = var_6726_dilations_0, groups = var_6726_groups_0, pad = var_6726_pad_0, pad_type = var_6726_pad_type_0, strides = var_6726_strides_0, weight = layers_6_self_attn_v_proj_weight_palettized, x = var_6613_cast_fp16)[name = string("op_6726")]; tensor var_6731 = const()[name = string("op_6731"), val = tensor([1, 1, 256, 1])]; tensor var_6732 = reshape(shape = var_6731, x = var_6726)[name = string("op_6732")]; tensor var_6737 = const()[name = string("op_6737"), val = tensor([0, 1, 3, 2])]; tensor var_6747 = const()[name = string("op_6747"), val = tensor([1, 1, 256])]; tensor var_6710 = transpose(perm = var_6709, x = var_6704)[name = string("transpose_274")]; tensor x_327 = reshape(shape = var_6747, x = var_6710)[name = string("x_327")]; int32 var_6753 = const()[name = string("op_6753"), val = int32(-1)]; fp16 const_185_promoted_to_fp16 = const()[name = string("const_185_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6759_cast_fp16 = mul(x = x_327, y = const_185_promoted_to_fp16)[name = string("op_6759_cast_fp16")]; bool input_255_interleave_0 = const()[name = string("input_255_interleave_0"), val = bool(false)]; tensor input_255_cast_fp16 = concat(axis = var_6753, interleave = input_255_interleave_0, values = (x_327, var_6759_cast_fp16))[name = string("input_255_cast_fp16")]; tensor normed_317_axes_0 = const()[name = string("normed_317_axes_0"), val = tensor([-1])]; fp16 var_6751_to_fp16 = const()[name = string("op_6751_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_317_cast_fp16 = layer_norm(axes = normed_317_axes_0, epsilon = var_6751_to_fp16, x = input_255_cast_fp16)[name = string("normed_317_cast_fp16")]; tensor var_6764_split_sizes_0 = const()[name = string("op_6764_split_sizes_0"), val = tensor([256, 256])]; int32 var_6764_axis_0 = const()[name = string("op_6764_axis_0"), val = int32(-1)]; tensor var_6764_cast_fp16_0, tensor var_6764_cast_fp16_1 = split(axis = var_6764_axis_0, split_sizes = var_6764_split_sizes_0, x = normed_317_cast_fp16)[name = string("op_6764_cast_fp16")]; tensor var_6767_cast_fp16 = mul(x = var_6764_cast_fp16_0, y = const_98_to_fp16)[name = string("op_6767_cast_fp16")]; tensor var_6773 = const()[name = string("op_6773"), val = tensor([1, 1, 1, 256])]; tensor q_53 = reshape(shape = var_6773, x = var_6767_cast_fp16)[name = string("q_53")]; fp16 var_6780_promoted_to_fp16 = const()[name = string("op_6780_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_6738 = transpose(perm = var_6737, x = var_6732)[name = string("transpose_273")]; tensor var_6781_cast_fp16 = pow(x = var_6738, y = var_6780_promoted_to_fp16)[name = string("op_6781_cast_fp16")]; tensor var_6786_axes_0 = const()[name = string("op_6786_axes_0"), val = tensor([-1])]; bool var_6786_keep_dims_0 = const()[name = string("op_6786_keep_dims_0"), val = bool(true)]; tensor var_6786_cast_fp16 = reduce_mean(axes = var_6786_axes_0, keep_dims = var_6786_keep_dims_0, x = var_6781_cast_fp16)[name = string("op_6786_cast_fp16")]; fp16 var_6788_to_fp16 = const()[name = string("op_6788_to_fp16"), val = fp16(0x1.1p-20)]; tensor mean_sq_13_cast_fp16 = add(x = var_6786_cast_fp16, y = var_6788_to_fp16)[name = string("mean_sq_13_cast_fp16")]; fp16 var_6795_to_fp16 = const()[name = string("op_6795_to_fp16"), val = fp16(-0x1p-1)]; tensor var_6796_cast_fp16 = pow(x = mean_sq_13_cast_fp16, y = var_6795_to_fp16)[name = string("op_6796_cast_fp16")]; tensor var_6797_cast_fp16 = mul(x = var_6738, y = var_6796_cast_fp16)[name = string("op_6797_cast_fp16")]; tensor var_6803 = mul(x = q_53, y = cos_1)[name = string("op_6803")]; tensor var_6804_split_sizes_0 = const()[name = string("op_6804_split_sizes_0"), val = tensor([128, 128])]; int32 var_6804_axis_0 = const()[name = string("op_6804_axis_0"), val = int32(-1)]; tensor var_6804_0, tensor var_6804_1 = split(axis = var_6804_axis_0, split_sizes = var_6804_split_sizes_0, x = q_53)[name = string("op_6804")]; fp16 const_187_promoted = const()[name = string("const_187_promoted"), val = fp16(-0x1p+0)]; tensor var_6806 = mul(x = var_6804_1, y = const_187_promoted)[name = string("op_6806")]; int32 var_6808 = const()[name = string("op_6808"), val = int32(-1)]; bool var_6809_interleave_0 = const()[name = string("op_6809_interleave_0"), val = bool(false)]; tensor var_6809 = concat(axis = var_6808, interleave = var_6809_interleave_0, values = (var_6806, var_6804_0))[name = string("op_6809")]; tensor var_6810 = mul(x = var_6809, y = sin_1)[name = string("op_6810")]; tensor input_257 = add(x = var_6803, y = var_6810)[name = string("input_257")]; tensor var_6815_begin_0 = const()[name = string("op_6815_begin_0"), val = tensor([6, 0, 0, 0])]; tensor var_6815_end_0 = const()[name = string("op_6815_end_0"), val = tensor([7, 1, 512, 512])]; tensor var_6815_end_mask_0 = const()[name = string("op_6815_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6815_squeeze_mask_0 = const()[name = string("op_6815_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_6815_cast_fp16 = slice_by_index(begin = var_6815_begin_0, end = var_6815_end_0, end_mask = var_6815_end_mask_0, squeeze_mask = var_6815_squeeze_mask_0, x = coreml_update_state_41)[name = string("op_6815_cast_fp16")]; tensor K_cache_13_axes_0 = const()[name = string("K_cache_13_axes_0"), val = tensor([0])]; tensor K_cache_13_cast_fp16 = expand_dims(axes = K_cache_13_axes_0, x = var_6815_cast_fp16)[name = string("K_cache_13_cast_fp16")]; tensor var_6820_begin_0 = const()[name = string("op_6820_begin_0"), val = tensor([41, 0, 0, 0])]; tensor var_6820_end_0 = const()[name = string("op_6820_end_0"), val = tensor([42, 1, 512, 512])]; tensor var_6820_end_mask_0 = const()[name = string("op_6820_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_6820_squeeze_mask_0 = const()[name = string("op_6820_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_6820_cast_fp16 = slice_by_index(begin = var_6820_begin_0, end = var_6820_end_0, end_mask = var_6820_end_mask_0, squeeze_mask = var_6820_squeeze_mask_0, x = coreml_update_state_41)[name = string("op_6820_cast_fp16")]; tensor V_cache_13_axes_0 = const()[name = string("V_cache_13_axes_0"), val = tensor([0])]; tensor V_cache_13_cast_fp16 = expand_dims(axes = V_cache_13_axes_0, x = var_6820_cast_fp16)[name = string("V_cache_13_cast_fp16")]; tensor k_padded_11_pad_0 = const()[name = string("k_padded_11_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; string k_padded_11_mode_0 = const()[name = string("k_padded_11_mode_0"), val = string("constant")]; fp16 const_188_to_fp16 = const()[name = string("const_188_to_fp16"), val = fp16(0x0p+0)]; tensor k_padded_11_cast_fp16 = pad(constant_val = const_188_to_fp16, mode = k_padded_11_mode_0, pad = k_padded_11_pad_0, x = input_257)[name = string("k_padded_11_cast_fp16")]; tensor v_padded_11_pad_0 = const()[name = string("v_padded_11_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; string v_padded_11_mode_0 = const()[name = string("v_padded_11_mode_0"), val = string("constant")]; fp16 const_189_to_fp16 = const()[name = string("const_189_to_fp16"), val = fp16(0x0p+0)]; tensor v_padded_11_cast_fp16 = pad(constant_val = const_189_to_fp16, mode = v_padded_11_mode_0, pad = v_padded_11_pad_0, x = var_6797_cast_fp16)[name = string("v_padded_11_cast_fp16")]; tensor var_6838_cast_fp16 = mul(x = K_cache_13_cast_fp16, y = var_3515_cast_fp16)[name = string("op_6838_cast_fp16")]; tensor var_6839_reps_0 = const()[name = string("op_6839_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_6839_cast_fp16 = tile(reps = var_6839_reps_0, x = k_padded_11_cast_fp16)[name = string("op_6839_cast_fp16")]; tensor var_6840_cast_fp16 = mul(x = var_6839_cast_fp16, y = update_mask)[name = string("op_6840_cast_fp16")]; tensor K_new_13_cast_fp16 = add(x = var_6838_cast_fp16, y = var_6840_cast_fp16)[name = string("K_new_13_cast_fp16")]; tensor var_6846_cast_fp16 = mul(x = V_cache_13_cast_fp16, y = var_3515_cast_fp16)[name = string("op_6846_cast_fp16")]; tensor var_6847_reps_0 = const()[name = string("op_6847_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_6847_cast_fp16 = tile(reps = var_6847_reps_0, x = v_padded_11_cast_fp16)[name = string("op_6847_cast_fp16")]; tensor var_6848_cast_fp16 = mul(x = var_6847_cast_fp16, y = update_mask)[name = string("op_6848_cast_fp16")]; tensor V_new_13_cast_fp16 = add(x = var_6846_cast_fp16, y = var_6848_cast_fp16)[name = string("V_new_13_cast_fp16")]; tensor var_6852_axes_0 = const()[name = string("op_6852_axes_0"), val = tensor([0])]; tensor var_6852_cast_fp16 = squeeze(axes = var_6852_axes_0, x = K_new_13_cast_fp16)[name = string("op_6852_cast_fp16")]; tensor concat_48 = const()[name = string("concat_48"), val = tensor([6, 0, 0, 0])]; tensor concat_49 = const()[name = string("concat_49"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_48, begin_mask = kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_49, end_mask = kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_13_stride_0, update = var_6852_cast_fp16, x = coreml_update_state_41)[name = string("kv_cache_0_internal_tensor_assign_13_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_13_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_42_write_state")]; tensor coreml_update_state_42 = read_state(input = kv_cache_0)[name = string("coreml_update_state_42")]; tensor var_6859_axes_0 = const()[name = string("op_6859_axes_0"), val = tensor([0])]; tensor var_6859_cast_fp16 = squeeze(axes = var_6859_axes_0, x = V_new_13_cast_fp16)[name = string("op_6859_cast_fp16")]; tensor concat_50 = const()[name = string("concat_50"), val = tensor([41, 0, 0, 0])]; tensor concat_51 = const()[name = string("concat_51"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_50, begin_mask = kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_51, end_mask = kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_14_stride_0, update = var_6859_cast_fp16, x = coreml_update_state_42)[name = string("kv_cache_0_internal_tensor_assign_14_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_14_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_43_write_state")]; tensor coreml_update_state_43 = read_state(input = kv_cache_0)[name = string("coreml_update_state_43")]; tensor K_for_attn_13_begin_0 = const()[name = string("K_for_attn_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor K_for_attn_13_end_0 = const()[name = string("K_for_attn_13_end_0"), val = tensor([1, 1, 512, 256])]; tensor K_for_attn_13_end_mask_0 = const()[name = string("K_for_attn_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor K_for_attn_13_cast_fp16 = slice_by_index(begin = K_for_attn_13_begin_0, end = K_for_attn_13_end_0, end_mask = K_for_attn_13_end_mask_0, x = K_new_13_cast_fp16)[name = string("K_for_attn_13_cast_fp16")]; tensor V_for_attn_13_begin_0 = const()[name = string("V_for_attn_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor V_for_attn_13_end_0 = const()[name = string("V_for_attn_13_end_0"), val = tensor([1, 1, 512, 256])]; tensor V_for_attn_13_end_mask_0 = const()[name = string("V_for_attn_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor V_for_attn_13_cast_fp16 = slice_by_index(begin = V_for_attn_13_begin_0, end = V_for_attn_13_end_0, end_mask = V_for_attn_13_end_mask_0, x = V_new_13_cast_fp16)[name = string("V_for_attn_13_cast_fp16")]; tensor transpose_24_perm_0 = const()[name = string("transpose_24_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_12_reps_0 = const()[name = string("tile_12_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_24_cast_fp16 = transpose(perm = transpose_24_perm_0, x = K_for_attn_13_cast_fp16)[name = string("transpose_272")]; tensor tile_12_cast_fp16 = tile(reps = tile_12_reps_0, x = transpose_24_cast_fp16)[name = string("tile_12_cast_fp16")]; tensor concat_52 = const()[name = string("concat_52"), val = tensor([8, 1, 1, 512, 256])]; tensor reshape_24_cast_fp16 = reshape(shape = concat_52, x = tile_12_cast_fp16)[name = string("reshape_24_cast_fp16")]; tensor transpose_25_perm_0 = const()[name = string("transpose_25_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_53 = const()[name = string("concat_53"), val = tensor([-1, 1, 512, 256])]; tensor transpose_25_cast_fp16 = transpose(perm = transpose_25_perm_0, x = reshape_24_cast_fp16)[name = string("transpose_271")]; tensor reshape_25_cast_fp16 = reshape(shape = concat_53, x = transpose_25_cast_fp16)[name = string("reshape_25_cast_fp16")]; tensor transpose_146_perm_0 = const()[name = string("transpose_146_perm_0"), val = tensor([1, 0, -1, -2])]; tensor transpose_26_perm_0 = const()[name = string("transpose_26_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_13_reps_0 = const()[name = string("tile_13_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_26_cast_fp16 = transpose(perm = transpose_26_perm_0, x = V_for_attn_13_cast_fp16)[name = string("transpose_270")]; tensor tile_13_cast_fp16 = tile(reps = tile_13_reps_0, x = transpose_26_cast_fp16)[name = string("tile_13_cast_fp16")]; tensor concat_54 = const()[name = string("concat_54"), val = tensor([8, 1, 1, 512, 256])]; tensor reshape_26_cast_fp16 = reshape(shape = concat_54, x = tile_13_cast_fp16)[name = string("reshape_26_cast_fp16")]; tensor transpose_27_perm_0 = const()[name = string("transpose_27_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_55 = const()[name = string("concat_55"), val = tensor([-1, 1, 512, 256])]; tensor transpose_27_cast_fp16 = transpose(perm = transpose_27_perm_0, x = reshape_26_cast_fp16)[name = string("transpose_269")]; tensor reshape_27_cast_fp16 = reshape(shape = concat_55, x = transpose_27_cast_fp16)[name = string("reshape_27_cast_fp16")]; tensor V_expanded_13_perm_0 = const()[name = string("V_expanded_13_perm_0"), val = tensor([1, 0, -2, -1])]; bool var_6896_transpose_x_0 = const()[name = string("op_6896_transpose_x_0"), val = bool(false)]; bool var_6896_transpose_y_0 = const()[name = string("op_6896_transpose_y_0"), val = bool(false)]; tensor transpose_146_cast_fp16 = transpose(perm = transpose_146_perm_0, x = reshape_25_cast_fp16)[name = string("transpose_268")]; tensor var_6896_cast_fp16 = matmul(transpose_x = var_6896_transpose_x_0, transpose_y = var_6896_transpose_y_0, x = q_55, y = transpose_146_cast_fp16)[name = string("op_6896_cast_fp16")]; tensor attn_weights_39_cast_fp16 = add(x = var_6896_cast_fp16, y = causal_mask)[name = string("attn_weights_39_cast_fp16")]; int32 var_6906 = const()[name = string("op_6906"), val = int32(-1)]; tensor var_6908_cast_fp16 = softmax(axis = var_6906, x = attn_weights_39_cast_fp16)[name = string("op_6908_cast_fp16")]; bool var_6924_transpose_x_0 = const()[name = string("op_6924_transpose_x_0"), val = bool(false)]; bool var_6924_transpose_y_0 = const()[name = string("op_6924_transpose_y_0"), val = bool(false)]; tensor V_expanded_13_cast_fp16 = transpose(perm = V_expanded_13_perm_0, x = reshape_27_cast_fp16)[name = string("transpose_267")]; tensor var_6924_cast_fp16 = matmul(transpose_x = var_6924_transpose_x_0, transpose_y = var_6924_transpose_y_0, x = var_6908_cast_fp16, y = V_expanded_13_cast_fp16)[name = string("op_6924_cast_fp16")]; tensor var_6934 = const()[name = string("op_6934"), val = tensor([0, 2, 1, 3])]; tensor var_6941 = const()[name = string("op_6941"), val = tensor([1, 1, -1])]; tensor var_6935 = transpose(perm = var_6934, x = var_6924_cast_fp16)[name = string("transpose_266")]; tensor attn_output_39 = reshape(shape = var_6941, x = var_6935)[name = string("attn_output_39")]; tensor var_6946 = const()[name = string("op_6946"), val = tensor([0, 2, 1])]; tensor squeeze_6_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2261471296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2263044224))))[name = string("squeeze_6_palettized")]; string var_6962_pad_type_0 = const()[name = string("op_6962_pad_type_0"), val = string("valid")]; int32 var_6962_groups_0 = const()[name = string("op_6962_groups_0"), val = int32(1)]; tensor var_6962_strides_0 = const()[name = string("op_6962_strides_0"), val = tensor([1])]; tensor var_6962_pad_0 = const()[name = string("op_6962_pad_0"), val = tensor([0, 0])]; tensor var_6962_dilations_0 = const()[name = string("op_6962_dilations_0"), val = tensor([1])]; tensor var_6947 = transpose(perm = var_6946, x = attn_output_39)[name = string("transpose_265")]; tensor var_6962 = conv(dilations = var_6962_dilations_0, groups = var_6962_groups_0, pad = var_6962_pad_0, pad_type = var_6962_pad_type_0, strides = var_6962_strides_0, weight = squeeze_6_palettized, x = var_6947)[name = string("op_6962")]; tensor var_6966 = const()[name = string("op_6966"), val = tensor([0, 2, 1])]; int32 var_6972 = const()[name = string("op_6972"), val = int32(-1)]; fp16 const_190_promoted_to_fp16 = const()[name = string("const_190_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_333 = transpose(perm = var_6966, x = var_6962)[name = string("transpose_264")]; tensor var_6978_cast_fp16 = mul(x = x_333, y = const_190_promoted_to_fp16)[name = string("op_6978_cast_fp16")]; bool input_263_interleave_0 = const()[name = string("input_263_interleave_0"), val = bool(false)]; tensor input_263_cast_fp16 = concat(axis = var_6972, interleave = input_263_interleave_0, values = (x_333, var_6978_cast_fp16))[name = string("input_263_cast_fp16")]; tensor normed_321_axes_0 = const()[name = string("normed_321_axes_0"), val = tensor([-1])]; fp16 var_6970_to_fp16 = const()[name = string("op_6970_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_321_cast_fp16 = layer_norm(axes = normed_321_axes_0, epsilon = var_6970_to_fp16, x = input_263_cast_fp16)[name = string("normed_321_cast_fp16")]; tensor var_6983_split_sizes_0 = const()[name = string("op_6983_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_6983_axis_0 = const()[name = string("op_6983_axis_0"), val = int32(-1)]; tensor var_6983_cast_fp16_0, tensor var_6983_cast_fp16_1 = split(axis = var_6983_axis_0, split_sizes = var_6983_split_sizes_0, x = normed_321_cast_fp16)[name = string("op_6983_cast_fp16")]; tensor const_191_to_fp16 = const()[name = string("const_191_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2263045824)))]; tensor var_6986_cast_fp16 = mul(x = var_6983_cast_fp16_0, y = const_191_to_fp16)[name = string("op_6986_cast_fp16")]; tensor x_337_cast_fp16 = add(x = x_319_cast_fp16, y = var_6986_cast_fp16)[name = string("x_337_cast_fp16")]; int32 var_6993 = const()[name = string("op_6993"), val = int32(-1)]; fp16 const_192_promoted_to_fp16 = const()[name = string("const_192_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6999_cast_fp16 = mul(x = x_337_cast_fp16, y = const_192_promoted_to_fp16)[name = string("op_6999_cast_fp16")]; bool input_265_interleave_0 = const()[name = string("input_265_interleave_0"), val = bool(false)]; tensor input_265_cast_fp16 = concat(axis = var_6993, interleave = input_265_interleave_0, values = (x_337_cast_fp16, var_6999_cast_fp16))[name = string("input_265_cast_fp16")]; tensor normed_325_axes_0 = const()[name = string("normed_325_axes_0"), val = tensor([-1])]; fp16 var_6991_to_fp16 = const()[name = string("op_6991_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_325_cast_fp16 = layer_norm(axes = normed_325_axes_0, epsilon = var_6991_to_fp16, x = input_265_cast_fp16)[name = string("normed_325_cast_fp16")]; tensor var_7004_split_sizes_0 = const()[name = string("op_7004_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_7004_axis_0 = const()[name = string("op_7004_axis_0"), val = int32(-1)]; tensor var_7004_cast_fp16_0, tensor var_7004_cast_fp16_1 = split(axis = var_7004_axis_0, split_sizes = var_7004_split_sizes_0, x = normed_325_cast_fp16)[name = string("op_7004_cast_fp16")]; tensor const_193_to_fp16 = const()[name = string("const_193_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2263048960)))]; tensor var_7007_cast_fp16 = mul(x = var_7004_cast_fp16_0, y = const_193_to_fp16)[name = string("op_7007_cast_fp16")]; tensor var_7020 = const()[name = string("op_7020"), val = tensor([0, 2, 1])]; tensor input_267_axes_0 = const()[name = string("input_267_axes_0"), val = tensor([2])]; tensor var_7021 = transpose(perm = var_7020, x = var_7007_cast_fp16)[name = string("transpose_263")]; tensor input_267 = expand_dims(axes = input_267_axes_0, x = var_7021)[name = string("input_267")]; string gate_25_pad_type_0 = const()[name = string("gate_25_pad_type_0"), val = string("valid")]; tensor gate_25_strides_0 = const()[name = string("gate_25_strides_0"), val = tensor([1, 1])]; tensor gate_25_pad_0 = const()[name = string("gate_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_25_dilations_0 = const()[name = string("gate_25_dilations_0"), val = tensor([1, 1])]; int32 gate_25_groups_0 = const()[name = string("gate_25_groups_0"), val = int32(1)]; tensor gate_25 = conv(dilations = gate_25_dilations_0, groups = gate_25_groups_0, pad = gate_25_pad_0, pad_type = gate_25_pad_type_0, strides = gate_25_strides_0, weight = layers_6_mlp_gate_proj_weight_palettized, x = input_267)[name = string("gate_25")]; string up_13_pad_type_0 = const()[name = string("up_13_pad_type_0"), val = string("valid")]; tensor up_13_strides_0 = const()[name = string("up_13_strides_0"), val = tensor([1, 1])]; tensor up_13_pad_0 = const()[name = string("up_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_13_dilations_0 = const()[name = string("up_13_dilations_0"), val = tensor([1, 1])]; int32 up_13_groups_0 = const()[name = string("up_13_groups_0"), val = int32(1)]; tensor up_13 = conv(dilations = up_13_dilations_0, groups = up_13_groups_0, pad = up_13_pad_0, pad_type = up_13_pad_type_0, strides = up_13_strides_0, weight = layers_6_mlp_up_proj_weight_palettized, x = input_267)[name = string("up_13")]; string gate_27_mode_0 = const()[name = string("gate_27_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_27 = gelu(mode = gate_27_mode_0, x = gate_25)[name = string("gate_27")]; tensor input_269 = mul(x = gate_27, y = up_13)[name = string("input_269")]; string mlp_out_13_pad_type_0 = const()[name = string("mlp_out_13_pad_type_0"), val = string("valid")]; tensor mlp_out_13_strides_0 = const()[name = string("mlp_out_13_strides_0"), val = tensor([1, 1])]; tensor mlp_out_13_pad_0 = const()[name = string("mlp_out_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_13_dilations_0 = const()[name = string("mlp_out_13_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_13_groups_0 = const()[name = string("mlp_out_13_groups_0"), val = int32(1)]; tensor mlp_out_13 = conv(dilations = mlp_out_13_dilations_0, groups = mlp_out_13_groups_0, pad = mlp_out_13_pad_0, pad_type = mlp_out_13_pad_type_0, strides = mlp_out_13_strides_0, weight = layers_6_mlp_down_proj_weight_palettized, x = input_269)[name = string("mlp_out_13")]; tensor var_7061_axes_0 = const()[name = string("op_7061_axes_0"), val = tensor([2])]; tensor var_7061 = squeeze(axes = var_7061_axes_0, x = mlp_out_13)[name = string("op_7061")]; tensor var_7065 = const()[name = string("op_7065"), val = tensor([0, 2, 1])]; int32 var_7071 = const()[name = string("op_7071"), val = int32(-1)]; fp16 const_194_promoted_to_fp16 = const()[name = string("const_194_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_341 = transpose(perm = var_7065, x = var_7061)[name = string("transpose_262")]; tensor var_7077_cast_fp16 = mul(x = x_341, y = const_194_promoted_to_fp16)[name = string("op_7077_cast_fp16")]; bool input_271_interleave_0 = const()[name = string("input_271_interleave_0"), val = bool(false)]; tensor input_271_cast_fp16 = concat(axis = var_7071, interleave = input_271_interleave_0, values = (x_341, var_7077_cast_fp16))[name = string("input_271_cast_fp16")]; tensor normed_329_axes_0 = const()[name = string("normed_329_axes_0"), val = tensor([-1])]; fp16 var_7069_to_fp16 = const()[name = string("op_7069_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_329_cast_fp16 = layer_norm(axes = normed_329_axes_0, epsilon = var_7069_to_fp16, x = input_271_cast_fp16)[name = string("normed_329_cast_fp16")]; tensor var_7082_split_sizes_0 = const()[name = string("op_7082_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_7082_axis_0 = const()[name = string("op_7082_axis_0"), val = int32(-1)]; tensor var_7082_cast_fp16_0, tensor var_7082_cast_fp16_1 = split(axis = var_7082_axis_0, split_sizes = var_7082_split_sizes_0, x = normed_329_cast_fp16)[name = string("op_7082_cast_fp16")]; tensor const_195_to_fp16 = const()[name = string("const_195_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2263052096)))]; tensor var_7085_cast_fp16 = mul(x = var_7082_cast_fp16_0, y = const_195_to_fp16)[name = string("op_7085_cast_fp16")]; tensor hidden_states_85_cast_fp16 = add(x = x_337_cast_fp16, y = var_7085_cast_fp16)[name = string("hidden_states_85_cast_fp16")]; tensor per_layer_slice_13_begin_0 = const()[name = string("per_layer_slice_13_begin_0"), val = tensor([0, 0, 1536])]; tensor per_layer_slice_13_end_0 = const()[name = string("per_layer_slice_13_end_0"), val = tensor([1, 1, 1792])]; tensor per_layer_slice_13_end_mask_0 = const()[name = string("per_layer_slice_13_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_13 = slice_by_index(begin = per_layer_slice_13_begin_0, end = per_layer_slice_13_end_0, end_mask = per_layer_slice_13_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_13")]; tensor gated_25 = linear(bias = linear_1_bias_0, weight = layers_6_per_layer_input_gate_weight_palettized, x = hidden_states_85_cast_fp16)[name = string("linear_13")]; string gated_27_mode_0 = const()[name = string("gated_27_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_27 = gelu(mode = gated_27_mode_0, x = gated_25)[name = string("gated_27")]; tensor input_275 = mul(x = gated_27, y = per_layer_slice_13)[name = string("input_275")]; tensor x_345 = linear(bias = linear_2_bias_0, weight = layers_6_per_layer_projection_weight_palettized, x = input_275)[name = string("linear_14")]; int32 var_7122 = const()[name = string("op_7122"), val = int32(-1)]; fp16 const_196_promoted_to_fp16 = const()[name = string("const_196_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7128_cast_fp16 = mul(x = x_345, y = const_196_promoted_to_fp16)[name = string("op_7128_cast_fp16")]; bool input_277_interleave_0 = const()[name = string("input_277_interleave_0"), val = bool(false)]; tensor input_277_cast_fp16 = concat(axis = var_7122, interleave = input_277_interleave_0, values = (x_345, var_7128_cast_fp16))[name = string("input_277_cast_fp16")]; tensor normed_333_axes_0 = const()[name = string("normed_333_axes_0"), val = tensor([-1])]; fp16 var_7120_to_fp16 = const()[name = string("op_7120_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_333_cast_fp16 = layer_norm(axes = normed_333_axes_0, epsilon = var_7120_to_fp16, x = input_277_cast_fp16)[name = string("normed_333_cast_fp16")]; tensor var_7133_split_sizes_0 = const()[name = string("op_7133_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_7133_axis_0 = const()[name = string("op_7133_axis_0"), val = int32(-1)]; tensor var_7133_cast_fp16_0, tensor var_7133_cast_fp16_1 = split(axis = var_7133_axis_0, split_sizes = var_7133_split_sizes_0, x = normed_333_cast_fp16)[name = string("op_7133_cast_fp16")]; tensor const_197_to_fp16 = const()[name = string("const_197_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2263055232)))]; tensor var_7136_cast_fp16 = mul(x = var_7133_cast_fp16_0, y = const_197_to_fp16)[name = string("op_7136_cast_fp16")]; tensor hidden_states_89_cast_fp16 = add(x = hidden_states_85_cast_fp16, y = var_7136_cast_fp16)[name = string("hidden_states_89_cast_fp16")]; tensor layers_6_layer_scalar_to_fp16 = const()[name = string("layers_6_layer_scalar_to_fp16"), val = tensor([0x1.fep-2])]; tensor x_349_cast_fp16 = mul(x = hidden_states_89_cast_fp16, y = layers_6_layer_scalar_to_fp16)[name = string("x_349_cast_fp16")]; int32 var_7144 = const()[name = string("op_7144"), val = int32(-1)]; fp16 const_198_promoted_to_fp16 = const()[name = string("const_198_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7150_cast_fp16 = mul(x = x_349_cast_fp16, y = const_198_promoted_to_fp16)[name = string("op_7150_cast_fp16")]; bool input_279_interleave_0 = const()[name = string("input_279_interleave_0"), val = bool(false)]; tensor input_279_cast_fp16 = concat(axis = var_7144, interleave = input_279_interleave_0, values = (x_349_cast_fp16, var_7150_cast_fp16))[name = string("input_279_cast_fp16")]; tensor normed_337_axes_0 = const()[name = string("normed_337_axes_0"), val = tensor([-1])]; fp16 var_7142_to_fp16 = const()[name = string("op_7142_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_337_cast_fp16 = layer_norm(axes = normed_337_axes_0, epsilon = var_7142_to_fp16, x = input_279_cast_fp16)[name = string("normed_337_cast_fp16")]; tensor var_7155_split_sizes_0 = const()[name = string("op_7155_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_7155_axis_0 = const()[name = string("op_7155_axis_0"), val = int32(-1)]; tensor var_7155_cast_fp16_0, tensor var_7155_cast_fp16_1 = split(axis = var_7155_axis_0, split_sizes = var_7155_split_sizes_0, x = normed_337_cast_fp16)[name = string("op_7155_cast_fp16")]; tensor const_199_to_fp16 = const()[name = string("const_199_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2263058368)))]; tensor var_7158_cast_fp16 = mul(x = var_7155_cast_fp16_0, y = const_199_to_fp16)[name = string("op_7158_cast_fp16")]; tensor var_7166 = const()[name = string("op_7166"), val = tensor([0, 2, 1])]; tensor var_7169_axes_0 = const()[name = string("op_7169_axes_0"), val = tensor([2])]; tensor var_7167_cast_fp16 = transpose(perm = var_7166, x = var_7158_cast_fp16)[name = string("transpose_261")]; tensor var_7169_cast_fp16 = expand_dims(axes = var_7169_axes_0, x = var_7167_cast_fp16)[name = string("op_7169_cast_fp16")]; string var_7185_pad_type_0 = const()[name = string("op_7185_pad_type_0"), val = string("valid")]; tensor var_7185_strides_0 = const()[name = string("op_7185_strides_0"), val = tensor([1, 1])]; tensor var_7185_pad_0 = const()[name = string("op_7185_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7185_dilations_0 = const()[name = string("op_7185_dilations_0"), val = tensor([1, 1])]; int32 var_7185_groups_0 = const()[name = string("op_7185_groups_0"), val = int32(1)]; tensor var_7185 = conv(dilations = var_7185_dilations_0, groups = var_7185_groups_0, pad = var_7185_pad_0, pad_type = var_7185_pad_type_0, strides = var_7185_strides_0, weight = layers_7_self_attn_q_proj_weight_palettized, x = var_7169_cast_fp16)[name = string("op_7185")]; tensor var_7190 = const()[name = string("op_7190"), val = tensor([1, 8, 256, 1])]; tensor var_7191 = reshape(shape = var_7190, x = var_7185)[name = string("op_7191")]; tensor var_7196 = const()[name = string("op_7196"), val = tensor([0, 1, 3, 2])]; tensor var_7206 = const()[name = string("op_7206"), val = tensor([1, 8, 256])]; tensor var_7197 = transpose(perm = var_7196, x = var_7191)[name = string("transpose_260")]; tensor x_353 = reshape(shape = var_7206, x = var_7197)[name = string("x_353")]; int32 var_7212 = const()[name = string("op_7212"), val = int32(-1)]; fp16 const_200_promoted_to_fp16 = const()[name = string("const_200_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7218_cast_fp16 = mul(x = x_353, y = const_200_promoted_to_fp16)[name = string("op_7218_cast_fp16")]; bool input_283_interleave_0 = const()[name = string("input_283_interleave_0"), val = bool(false)]; tensor input_283_cast_fp16 = concat(axis = var_7212, interleave = input_283_interleave_0, values = (x_353, var_7218_cast_fp16))[name = string("input_283_cast_fp16")]; tensor normed_341_axes_0 = const()[name = string("normed_341_axes_0"), val = tensor([-1])]; fp16 var_7210_to_fp16 = const()[name = string("op_7210_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_341_cast_fp16 = layer_norm(axes = normed_341_axes_0, epsilon = var_7210_to_fp16, x = input_283_cast_fp16)[name = string("normed_341_cast_fp16")]; tensor var_7223_split_sizes_0 = const()[name = string("op_7223_split_sizes_0"), val = tensor([256, 256])]; int32 var_7223_axis_0 = const()[name = string("op_7223_axis_0"), val = int32(-1)]; tensor var_7223_cast_fp16_0, tensor var_7223_cast_fp16_1 = split(axis = var_7223_axis_0, split_sizes = var_7223_split_sizes_0, x = normed_341_cast_fp16)[name = string("op_7223_cast_fp16")]; tensor const_201_to_fp16 = const()[name = string("const_201_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2263061504)))]; tensor var_7226_cast_fp16 = mul(x = var_7223_cast_fp16_0, y = const_201_to_fp16)[name = string("op_7226_cast_fp16")]; tensor var_7232 = const()[name = string("op_7232"), val = tensor([1, 8, 1, 256])]; tensor q_59 = reshape(shape = var_7232, x = var_7226_cast_fp16)[name = string("q_59")]; tensor var_7234 = mul(x = q_59, y = cos_1)[name = string("op_7234")]; tensor var_7235_split_sizes_0 = const()[name = string("op_7235_split_sizes_0"), val = tensor([128, 128])]; int32 var_7235_axis_0 = const()[name = string("op_7235_axis_0"), val = int32(-1)]; tensor var_7235_0, tensor var_7235_1 = split(axis = var_7235_axis_0, split_sizes = var_7235_split_sizes_0, x = q_59)[name = string("op_7235")]; fp16 const_202_promoted = const()[name = string("const_202_promoted"), val = fp16(-0x1p+0)]; tensor var_7237 = mul(x = var_7235_1, y = const_202_promoted)[name = string("op_7237")]; int32 var_7239 = const()[name = string("op_7239"), val = int32(-1)]; bool var_7240_interleave_0 = const()[name = string("op_7240_interleave_0"), val = bool(false)]; tensor var_7240 = concat(axis = var_7239, interleave = var_7240_interleave_0, values = (var_7237, var_7235_0))[name = string("op_7240")]; tensor var_7241 = mul(x = var_7240, y = sin_1)[name = string("op_7241")]; tensor q_63 = add(x = var_7234, y = var_7241)[name = string("q_63")]; string var_7254_pad_type_0 = const()[name = string("op_7254_pad_type_0"), val = string("valid")]; tensor var_7254_strides_0 = const()[name = string("op_7254_strides_0"), val = tensor([1, 1])]; tensor var_7254_pad_0 = const()[name = string("op_7254_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7254_dilations_0 = const()[name = string("op_7254_dilations_0"), val = tensor([1, 1])]; int32 var_7254_groups_0 = const()[name = string("op_7254_groups_0"), val = int32(1)]; tensor var_7254 = conv(dilations = var_7254_dilations_0, groups = var_7254_groups_0, pad = var_7254_pad_0, pad_type = var_7254_pad_type_0, strides = var_7254_strides_0, weight = layers_7_self_attn_k_proj_weight_palettized, x = var_7169_cast_fp16)[name = string("op_7254")]; tensor var_7259 = const()[name = string("op_7259"), val = tensor([1, 1, 256, 1])]; tensor var_7260 = reshape(shape = var_7259, x = var_7254)[name = string("op_7260")]; tensor var_7265 = const()[name = string("op_7265"), val = tensor([0, 1, 3, 2])]; string var_7282_pad_type_0 = const()[name = string("op_7282_pad_type_0"), val = string("valid")]; tensor var_7282_strides_0 = const()[name = string("op_7282_strides_0"), val = tensor([1, 1])]; tensor var_7282_pad_0 = const()[name = string("op_7282_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7282_dilations_0 = const()[name = string("op_7282_dilations_0"), val = tensor([1, 1])]; int32 var_7282_groups_0 = const()[name = string("op_7282_groups_0"), val = int32(1)]; tensor var_7282 = conv(dilations = var_7282_dilations_0, groups = var_7282_groups_0, pad = var_7282_pad_0, pad_type = var_7282_pad_type_0, strides = var_7282_strides_0, weight = layers_7_self_attn_v_proj_weight_palettized, x = var_7169_cast_fp16)[name = string("op_7282")]; tensor var_7287 = const()[name = string("op_7287"), val = tensor([1, 1, 256, 1])]; tensor var_7288 = reshape(shape = var_7287, x = var_7282)[name = string("op_7288")]; tensor var_7293 = const()[name = string("op_7293"), val = tensor([0, 1, 3, 2])]; tensor var_7303 = const()[name = string("op_7303"), val = tensor([1, 1, 256])]; tensor var_7266 = transpose(perm = var_7265, x = var_7260)[name = string("transpose_259")]; tensor x_357 = reshape(shape = var_7303, x = var_7266)[name = string("x_357")]; int32 var_7309 = const()[name = string("op_7309"), val = int32(-1)]; fp16 const_203_promoted_to_fp16 = const()[name = string("const_203_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7315_cast_fp16 = mul(x = x_357, y = const_203_promoted_to_fp16)[name = string("op_7315_cast_fp16")]; bool input_285_interleave_0 = const()[name = string("input_285_interleave_0"), val = bool(false)]; tensor input_285_cast_fp16 = concat(axis = var_7309, interleave = input_285_interleave_0, values = (x_357, var_7315_cast_fp16))[name = string("input_285_cast_fp16")]; tensor normed_345_axes_0 = const()[name = string("normed_345_axes_0"), val = tensor([-1])]; fp16 var_7307_to_fp16 = const()[name = string("op_7307_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_345_cast_fp16 = layer_norm(axes = normed_345_axes_0, epsilon = var_7307_to_fp16, x = input_285_cast_fp16)[name = string("normed_345_cast_fp16")]; tensor var_7320_split_sizes_0 = const()[name = string("op_7320_split_sizes_0"), val = tensor([256, 256])]; int32 var_7320_axis_0 = const()[name = string("op_7320_axis_0"), val = int32(-1)]; tensor var_7320_cast_fp16_0, tensor var_7320_cast_fp16_1 = split(axis = var_7320_axis_0, split_sizes = var_7320_split_sizes_0, x = normed_345_cast_fp16)[name = string("op_7320_cast_fp16")]; tensor const_204_to_fp16 = const()[name = string("const_204_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2263062080)))]; tensor var_7323_cast_fp16 = mul(x = var_7320_cast_fp16_0, y = const_204_to_fp16)[name = string("op_7323_cast_fp16")]; tensor var_7329 = const()[name = string("op_7329"), val = tensor([1, 1, 1, 256])]; tensor q_61 = reshape(shape = var_7329, x = var_7323_cast_fp16)[name = string("q_61")]; fp16 var_7336_promoted_to_fp16 = const()[name = string("op_7336_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_7294 = transpose(perm = var_7293, x = var_7288)[name = string("transpose_258")]; tensor var_7337_cast_fp16 = pow(x = var_7294, y = var_7336_promoted_to_fp16)[name = string("op_7337_cast_fp16")]; tensor var_7342_axes_0 = const()[name = string("op_7342_axes_0"), val = tensor([-1])]; bool var_7342_keep_dims_0 = const()[name = string("op_7342_keep_dims_0"), val = bool(true)]; tensor var_7342_cast_fp16 = reduce_mean(axes = var_7342_axes_0, keep_dims = var_7342_keep_dims_0, x = var_7337_cast_fp16)[name = string("op_7342_cast_fp16")]; fp16 var_7344_to_fp16 = const()[name = string("op_7344_to_fp16"), val = fp16(0x1.1p-20)]; tensor mean_sq_15_cast_fp16 = add(x = var_7342_cast_fp16, y = var_7344_to_fp16)[name = string("mean_sq_15_cast_fp16")]; fp16 var_7351_to_fp16 = const()[name = string("op_7351_to_fp16"), val = fp16(-0x1p-1)]; tensor var_7352_cast_fp16 = pow(x = mean_sq_15_cast_fp16, y = var_7351_to_fp16)[name = string("op_7352_cast_fp16")]; tensor var_7353_cast_fp16 = mul(x = var_7294, y = var_7352_cast_fp16)[name = string("op_7353_cast_fp16")]; tensor var_7359 = mul(x = q_61, y = cos_1)[name = string("op_7359")]; tensor var_7360_split_sizes_0 = const()[name = string("op_7360_split_sizes_0"), val = tensor([128, 128])]; int32 var_7360_axis_0 = const()[name = string("op_7360_axis_0"), val = int32(-1)]; tensor var_7360_0, tensor var_7360_1 = split(axis = var_7360_axis_0, split_sizes = var_7360_split_sizes_0, x = q_61)[name = string("op_7360")]; fp16 const_205_promoted = const()[name = string("const_205_promoted"), val = fp16(-0x1p+0)]; tensor var_7362 = mul(x = var_7360_1, y = const_205_promoted)[name = string("op_7362")]; int32 var_7364 = const()[name = string("op_7364"), val = int32(-1)]; bool var_7365_interleave_0 = const()[name = string("op_7365_interleave_0"), val = bool(false)]; tensor var_7365 = concat(axis = var_7364, interleave = var_7365_interleave_0, values = (var_7362, var_7360_0))[name = string("op_7365")]; tensor var_7366 = mul(x = var_7365, y = sin_1)[name = string("op_7366")]; tensor input_287 = add(x = var_7359, y = var_7366)[name = string("input_287")]; tensor var_7371_begin_0 = const()[name = string("op_7371_begin_0"), val = tensor([7, 0, 0, 0])]; tensor var_7371_end_0 = const()[name = string("op_7371_end_0"), val = tensor([8, 1, 512, 512])]; tensor var_7371_end_mask_0 = const()[name = string("op_7371_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7371_squeeze_mask_0 = const()[name = string("op_7371_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_7371_cast_fp16 = slice_by_index(begin = var_7371_begin_0, end = var_7371_end_0, end_mask = var_7371_end_mask_0, squeeze_mask = var_7371_squeeze_mask_0, x = coreml_update_state_43)[name = string("op_7371_cast_fp16")]; tensor K_cache_15_axes_0 = const()[name = string("K_cache_15_axes_0"), val = tensor([0])]; tensor K_cache_15_cast_fp16 = expand_dims(axes = K_cache_15_axes_0, x = var_7371_cast_fp16)[name = string("K_cache_15_cast_fp16")]; tensor var_7376_begin_0 = const()[name = string("op_7376_begin_0"), val = tensor([42, 0, 0, 0])]; tensor var_7376_end_0 = const()[name = string("op_7376_end_0"), val = tensor([43, 1, 512, 512])]; tensor var_7376_end_mask_0 = const()[name = string("op_7376_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7376_squeeze_mask_0 = const()[name = string("op_7376_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_7376_cast_fp16 = slice_by_index(begin = var_7376_begin_0, end = var_7376_end_0, end_mask = var_7376_end_mask_0, squeeze_mask = var_7376_squeeze_mask_0, x = coreml_update_state_43)[name = string("op_7376_cast_fp16")]; tensor V_cache_15_axes_0 = const()[name = string("V_cache_15_axes_0"), val = tensor([0])]; tensor V_cache_15_cast_fp16 = expand_dims(axes = V_cache_15_axes_0, x = var_7376_cast_fp16)[name = string("V_cache_15_cast_fp16")]; tensor k_padded_13_pad_0 = const()[name = string("k_padded_13_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; string k_padded_13_mode_0 = const()[name = string("k_padded_13_mode_0"), val = string("constant")]; fp16 const_206_to_fp16 = const()[name = string("const_206_to_fp16"), val = fp16(0x0p+0)]; tensor k_padded_13_cast_fp16 = pad(constant_val = const_206_to_fp16, mode = k_padded_13_mode_0, pad = k_padded_13_pad_0, x = input_287)[name = string("k_padded_13_cast_fp16")]; tensor v_padded_13_pad_0 = const()[name = string("v_padded_13_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; string v_padded_13_mode_0 = const()[name = string("v_padded_13_mode_0"), val = string("constant")]; fp16 const_207_to_fp16 = const()[name = string("const_207_to_fp16"), val = fp16(0x0p+0)]; tensor v_padded_13_cast_fp16 = pad(constant_val = const_207_to_fp16, mode = v_padded_13_mode_0, pad = v_padded_13_pad_0, x = var_7353_cast_fp16)[name = string("v_padded_13_cast_fp16")]; tensor var_7394_cast_fp16 = mul(x = K_cache_15_cast_fp16, y = var_3515_cast_fp16)[name = string("op_7394_cast_fp16")]; tensor var_7395_reps_0 = const()[name = string("op_7395_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_7395_cast_fp16 = tile(reps = var_7395_reps_0, x = k_padded_13_cast_fp16)[name = string("op_7395_cast_fp16")]; tensor var_7396_cast_fp16 = mul(x = var_7395_cast_fp16, y = update_mask)[name = string("op_7396_cast_fp16")]; tensor K_new_15_cast_fp16 = add(x = var_7394_cast_fp16, y = var_7396_cast_fp16)[name = string("K_new_15_cast_fp16")]; tensor var_7402_cast_fp16 = mul(x = V_cache_15_cast_fp16, y = var_3515_cast_fp16)[name = string("op_7402_cast_fp16")]; tensor var_7403_reps_0 = const()[name = string("op_7403_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_7403_cast_fp16 = tile(reps = var_7403_reps_0, x = v_padded_13_cast_fp16)[name = string("op_7403_cast_fp16")]; tensor var_7404_cast_fp16 = mul(x = var_7403_cast_fp16, y = update_mask)[name = string("op_7404_cast_fp16")]; tensor V_new_15_cast_fp16 = add(x = var_7402_cast_fp16, y = var_7404_cast_fp16)[name = string("V_new_15_cast_fp16")]; tensor var_7408_axes_0 = const()[name = string("op_7408_axes_0"), val = tensor([0])]; tensor var_7408_cast_fp16 = squeeze(axes = var_7408_axes_0, x = K_new_15_cast_fp16)[name = string("op_7408_cast_fp16")]; tensor concat_56 = const()[name = string("concat_56"), val = tensor([7, 0, 0, 0])]; tensor concat_57 = const()[name = string("concat_57"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_15_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_15_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_15_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_56, begin_mask = kv_cache_0_internal_tensor_assign_15_begin_mask_0, end = concat_57, end_mask = kv_cache_0_internal_tensor_assign_15_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_15_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_15_stride_0, update = var_7408_cast_fp16, x = coreml_update_state_43)[name = string("kv_cache_0_internal_tensor_assign_15_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_15_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_44_write_state")]; tensor coreml_update_state_44 = read_state(input = kv_cache_0)[name = string("coreml_update_state_44")]; tensor var_7415_axes_0 = const()[name = string("op_7415_axes_0"), val = tensor([0])]; tensor var_7415_cast_fp16 = squeeze(axes = var_7415_axes_0, x = V_new_15_cast_fp16)[name = string("op_7415_cast_fp16")]; tensor concat_58 = const()[name = string("concat_58"), val = tensor([42, 0, 0, 0])]; tensor concat_59 = const()[name = string("concat_59"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_16_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_16_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_16_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_58, begin_mask = kv_cache_0_internal_tensor_assign_16_begin_mask_0, end = concat_59, end_mask = kv_cache_0_internal_tensor_assign_16_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_16_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_16_stride_0, update = var_7415_cast_fp16, x = coreml_update_state_44)[name = string("kv_cache_0_internal_tensor_assign_16_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_16_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_45_write_state")]; tensor coreml_update_state_45 = read_state(input = kv_cache_0)[name = string("coreml_update_state_45")]; tensor K_for_attn_15_begin_0 = const()[name = string("K_for_attn_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor K_for_attn_15_end_0 = const()[name = string("K_for_attn_15_end_0"), val = tensor([1, 1, 512, 256])]; tensor K_for_attn_15_end_mask_0 = const()[name = string("K_for_attn_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor K_for_attn_15_cast_fp16 = slice_by_index(begin = K_for_attn_15_begin_0, end = K_for_attn_15_end_0, end_mask = K_for_attn_15_end_mask_0, x = K_new_15_cast_fp16)[name = string("K_for_attn_15_cast_fp16")]; tensor V_for_attn_15_begin_0 = const()[name = string("V_for_attn_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor V_for_attn_15_end_0 = const()[name = string("V_for_attn_15_end_0"), val = tensor([1, 1, 512, 256])]; tensor V_for_attn_15_end_mask_0 = const()[name = string("V_for_attn_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor V_for_attn_15_cast_fp16 = slice_by_index(begin = V_for_attn_15_begin_0, end = V_for_attn_15_end_0, end_mask = V_for_attn_15_end_mask_0, x = V_new_15_cast_fp16)[name = string("V_for_attn_15_cast_fp16")]; tensor transpose_28_perm_0 = const()[name = string("transpose_28_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_14_reps_0 = const()[name = string("tile_14_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_28_cast_fp16 = transpose(perm = transpose_28_perm_0, x = K_for_attn_15_cast_fp16)[name = string("transpose_257")]; tensor tile_14_cast_fp16 = tile(reps = tile_14_reps_0, x = transpose_28_cast_fp16)[name = string("tile_14_cast_fp16")]; tensor concat_60 = const()[name = string("concat_60"), val = tensor([8, 1, 1, 512, 256])]; tensor reshape_28_cast_fp16 = reshape(shape = concat_60, x = tile_14_cast_fp16)[name = string("reshape_28_cast_fp16")]; tensor transpose_29_perm_0 = const()[name = string("transpose_29_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_61 = const()[name = string("concat_61"), val = tensor([-1, 1, 512, 256])]; tensor transpose_29_cast_fp16 = transpose(perm = transpose_29_perm_0, x = reshape_28_cast_fp16)[name = string("transpose_256")]; tensor reshape_29_cast_fp16 = reshape(shape = concat_61, x = transpose_29_cast_fp16)[name = string("reshape_29_cast_fp16")]; tensor transpose_147_perm_0 = const()[name = string("transpose_147_perm_0"), val = tensor([1, 0, -1, -2])]; tensor transpose_30_perm_0 = const()[name = string("transpose_30_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_15_reps_0 = const()[name = string("tile_15_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_30_cast_fp16 = transpose(perm = transpose_30_perm_0, x = V_for_attn_15_cast_fp16)[name = string("transpose_255")]; tensor tile_15_cast_fp16 = tile(reps = tile_15_reps_0, x = transpose_30_cast_fp16)[name = string("tile_15_cast_fp16")]; tensor concat_62 = const()[name = string("concat_62"), val = tensor([8, 1, 1, 512, 256])]; tensor reshape_30_cast_fp16 = reshape(shape = concat_62, x = tile_15_cast_fp16)[name = string("reshape_30_cast_fp16")]; tensor transpose_31_perm_0 = const()[name = string("transpose_31_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_63 = const()[name = string("concat_63"), val = tensor([-1, 1, 512, 256])]; tensor transpose_31_cast_fp16 = transpose(perm = transpose_31_perm_0, x = reshape_30_cast_fp16)[name = string("transpose_254")]; tensor reshape_31_cast_fp16 = reshape(shape = concat_63, x = transpose_31_cast_fp16)[name = string("reshape_31_cast_fp16")]; tensor V_expanded_15_perm_0 = const()[name = string("V_expanded_15_perm_0"), val = tensor([1, 0, -2, -1])]; bool var_7452_transpose_x_0 = const()[name = string("op_7452_transpose_x_0"), val = bool(false)]; bool var_7452_transpose_y_0 = const()[name = string("op_7452_transpose_y_0"), val = bool(false)]; tensor transpose_147_cast_fp16 = transpose(perm = transpose_147_perm_0, x = reshape_29_cast_fp16)[name = string("transpose_253")]; tensor var_7452_cast_fp16 = matmul(transpose_x = var_7452_transpose_x_0, transpose_y = var_7452_transpose_y_0, x = q_63, y = transpose_147_cast_fp16)[name = string("op_7452_cast_fp16")]; tensor attn_weights_45_cast_fp16 = add(x = var_7452_cast_fp16, y = causal_mask)[name = string("attn_weights_45_cast_fp16")]; int32 var_7462 = const()[name = string("op_7462"), val = int32(-1)]; tensor var_7464_cast_fp16 = softmax(axis = var_7462, x = attn_weights_45_cast_fp16)[name = string("op_7464_cast_fp16")]; bool var_7480_transpose_x_0 = const()[name = string("op_7480_transpose_x_0"), val = bool(false)]; bool var_7480_transpose_y_0 = const()[name = string("op_7480_transpose_y_0"), val = bool(false)]; tensor V_expanded_15_cast_fp16 = transpose(perm = V_expanded_15_perm_0, x = reshape_31_cast_fp16)[name = string("transpose_252")]; tensor var_7480_cast_fp16 = matmul(transpose_x = var_7480_transpose_x_0, transpose_y = var_7480_transpose_y_0, x = var_7464_cast_fp16, y = V_expanded_15_cast_fp16)[name = string("op_7480_cast_fp16")]; tensor var_7490 = const()[name = string("op_7490"), val = tensor([0, 2, 1, 3])]; tensor var_7497 = const()[name = string("op_7497"), val = tensor([1, 1, -1])]; tensor var_7491 = transpose(perm = var_7490, x = var_7480_cast_fp16)[name = string("transpose_251")]; tensor attn_output_45 = reshape(shape = var_7497, x = var_7491)[name = string("attn_output_45")]; tensor var_7502 = const()[name = string("op_7502"), val = tensor([0, 2, 1])]; tensor squeeze_7_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2263062656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2264635584))))[name = string("squeeze_7_palettized")]; string var_7518_pad_type_0 = const()[name = string("op_7518_pad_type_0"), val = string("valid")]; int32 var_7518_groups_0 = const()[name = string("op_7518_groups_0"), val = int32(1)]; tensor var_7518_strides_0 = const()[name = string("op_7518_strides_0"), val = tensor([1])]; tensor var_7518_pad_0 = const()[name = string("op_7518_pad_0"), val = tensor([0, 0])]; tensor var_7518_dilations_0 = const()[name = string("op_7518_dilations_0"), val = tensor([1])]; tensor var_7503 = transpose(perm = var_7502, x = attn_output_45)[name = string("transpose_250")]; tensor var_7518 = conv(dilations = var_7518_dilations_0, groups = var_7518_groups_0, pad = var_7518_pad_0, pad_type = var_7518_pad_type_0, strides = var_7518_strides_0, weight = squeeze_7_palettized, x = var_7503)[name = string("op_7518")]; tensor var_7522 = const()[name = string("op_7522"), val = tensor([0, 2, 1])]; int32 var_7528 = const()[name = string("op_7528"), val = int32(-1)]; fp16 const_208_promoted_to_fp16 = const()[name = string("const_208_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_363 = transpose(perm = var_7522, x = var_7518)[name = string("transpose_249")]; tensor var_7534_cast_fp16 = mul(x = x_363, y = const_208_promoted_to_fp16)[name = string("op_7534_cast_fp16")]; bool input_293_interleave_0 = const()[name = string("input_293_interleave_0"), val = bool(false)]; tensor input_293_cast_fp16 = concat(axis = var_7528, interleave = input_293_interleave_0, values = (x_363, var_7534_cast_fp16))[name = string("input_293_cast_fp16")]; tensor normed_349_axes_0 = const()[name = string("normed_349_axes_0"), val = tensor([-1])]; fp16 var_7526_to_fp16 = const()[name = string("op_7526_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_349_cast_fp16 = layer_norm(axes = normed_349_axes_0, epsilon = var_7526_to_fp16, x = input_293_cast_fp16)[name = string("normed_349_cast_fp16")]; tensor var_7539_split_sizes_0 = const()[name = string("op_7539_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_7539_axis_0 = const()[name = string("op_7539_axis_0"), val = int32(-1)]; tensor var_7539_cast_fp16_0, tensor var_7539_cast_fp16_1 = split(axis = var_7539_axis_0, split_sizes = var_7539_split_sizes_0, x = normed_349_cast_fp16)[name = string("op_7539_cast_fp16")]; tensor const_209_to_fp16 = const()[name = string("const_209_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2264637184)))]; tensor var_7542_cast_fp16 = mul(x = var_7539_cast_fp16_0, y = const_209_to_fp16)[name = string("op_7542_cast_fp16")]; tensor x_367_cast_fp16 = add(x = x_349_cast_fp16, y = var_7542_cast_fp16)[name = string("x_367_cast_fp16")]; int32 var_7549 = const()[name = string("op_7549"), val = int32(-1)]; fp16 const_210_promoted_to_fp16 = const()[name = string("const_210_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7555_cast_fp16 = mul(x = x_367_cast_fp16, y = const_210_promoted_to_fp16)[name = string("op_7555_cast_fp16")]; bool input_295_interleave_0 = const()[name = string("input_295_interleave_0"), val = bool(false)]; tensor input_295_cast_fp16 = concat(axis = var_7549, interleave = input_295_interleave_0, values = (x_367_cast_fp16, var_7555_cast_fp16))[name = string("input_295_cast_fp16")]; tensor normed_353_axes_0 = const()[name = string("normed_353_axes_0"), val = tensor([-1])]; fp16 var_7547_to_fp16 = const()[name = string("op_7547_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_353_cast_fp16 = layer_norm(axes = normed_353_axes_0, epsilon = var_7547_to_fp16, x = input_295_cast_fp16)[name = string("normed_353_cast_fp16")]; tensor var_7560_split_sizes_0 = const()[name = string("op_7560_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_7560_axis_0 = const()[name = string("op_7560_axis_0"), val = int32(-1)]; tensor var_7560_cast_fp16_0, tensor var_7560_cast_fp16_1 = split(axis = var_7560_axis_0, split_sizes = var_7560_split_sizes_0, x = normed_353_cast_fp16)[name = string("op_7560_cast_fp16")]; tensor const_211_to_fp16 = const()[name = string("const_211_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2264640320)))]; tensor var_7563_cast_fp16 = mul(x = var_7560_cast_fp16_0, y = const_211_to_fp16)[name = string("op_7563_cast_fp16")]; tensor var_7576 = const()[name = string("op_7576"), val = tensor([0, 2, 1])]; tensor input_297_axes_0 = const()[name = string("input_297_axes_0"), val = tensor([2])]; tensor var_7577 = transpose(perm = var_7576, x = var_7563_cast_fp16)[name = string("transpose_248")]; tensor input_297 = expand_dims(axes = input_297_axes_0, x = var_7577)[name = string("input_297")]; string gate_29_pad_type_0 = const()[name = string("gate_29_pad_type_0"), val = string("valid")]; tensor gate_29_strides_0 = const()[name = string("gate_29_strides_0"), val = tensor([1, 1])]; tensor gate_29_pad_0 = const()[name = string("gate_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_29_dilations_0 = const()[name = string("gate_29_dilations_0"), val = tensor([1, 1])]; int32 gate_29_groups_0 = const()[name = string("gate_29_groups_0"), val = int32(1)]; tensor gate_29 = conv(dilations = gate_29_dilations_0, groups = gate_29_groups_0, pad = gate_29_pad_0, pad_type = gate_29_pad_type_0, strides = gate_29_strides_0, weight = layers_7_mlp_gate_proj_weight_palettized, x = input_297)[name = string("gate_29")]; string up_15_pad_type_0 = const()[name = string("up_15_pad_type_0"), val = string("valid")]; tensor up_15_strides_0 = const()[name = string("up_15_strides_0"), val = tensor([1, 1])]; tensor up_15_pad_0 = const()[name = string("up_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_15_dilations_0 = const()[name = string("up_15_dilations_0"), val = tensor([1, 1])]; int32 up_15_groups_0 = const()[name = string("up_15_groups_0"), val = int32(1)]; tensor up_15 = conv(dilations = up_15_dilations_0, groups = up_15_groups_0, pad = up_15_pad_0, pad_type = up_15_pad_type_0, strides = up_15_strides_0, weight = layers_7_mlp_up_proj_weight_palettized, x = input_297)[name = string("up_15")]; string gate_31_mode_0 = const()[name = string("gate_31_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_31 = gelu(mode = gate_31_mode_0, x = gate_29)[name = string("gate_31")]; tensor input_299 = mul(x = gate_31, y = up_15)[name = string("input_299")]; string mlp_out_15_pad_type_0 = const()[name = string("mlp_out_15_pad_type_0"), val = string("valid")]; tensor mlp_out_15_strides_0 = const()[name = string("mlp_out_15_strides_0"), val = tensor([1, 1])]; tensor mlp_out_15_pad_0 = const()[name = string("mlp_out_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_15_dilations_0 = const()[name = string("mlp_out_15_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_15_groups_0 = const()[name = string("mlp_out_15_groups_0"), val = int32(1)]; tensor mlp_out_15 = conv(dilations = mlp_out_15_dilations_0, groups = mlp_out_15_groups_0, pad = mlp_out_15_pad_0, pad_type = mlp_out_15_pad_type_0, strides = mlp_out_15_strides_0, weight = layers_7_mlp_down_proj_weight_palettized, x = input_299)[name = string("mlp_out_15")]; tensor var_7617_axes_0 = const()[name = string("op_7617_axes_0"), val = tensor([2])]; tensor var_7617 = squeeze(axes = var_7617_axes_0, x = mlp_out_15)[name = string("op_7617")]; tensor var_7621 = const()[name = string("op_7621"), val = tensor([0, 2, 1])]; int32 var_7627 = const()[name = string("op_7627"), val = int32(-1)]; fp16 const_212_promoted_to_fp16 = const()[name = string("const_212_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_371 = transpose(perm = var_7621, x = var_7617)[name = string("transpose_247")]; tensor var_7633_cast_fp16 = mul(x = x_371, y = const_212_promoted_to_fp16)[name = string("op_7633_cast_fp16")]; bool input_301_interleave_0 = const()[name = string("input_301_interleave_0"), val = bool(false)]; tensor input_301_cast_fp16 = concat(axis = var_7627, interleave = input_301_interleave_0, values = (x_371, var_7633_cast_fp16))[name = string("input_301_cast_fp16")]; tensor normed_357_axes_0 = const()[name = string("normed_357_axes_0"), val = tensor([-1])]; fp16 var_7625_to_fp16 = const()[name = string("op_7625_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_357_cast_fp16 = layer_norm(axes = normed_357_axes_0, epsilon = var_7625_to_fp16, x = input_301_cast_fp16)[name = string("normed_357_cast_fp16")]; tensor var_7638_split_sizes_0 = const()[name = string("op_7638_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_7638_axis_0 = const()[name = string("op_7638_axis_0"), val = int32(-1)]; tensor var_7638_cast_fp16_0, tensor var_7638_cast_fp16_1 = split(axis = var_7638_axis_0, split_sizes = var_7638_split_sizes_0, x = normed_357_cast_fp16)[name = string("op_7638_cast_fp16")]; tensor const_213_to_fp16 = const()[name = string("const_213_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2264643456)))]; tensor var_7641_cast_fp16 = mul(x = var_7638_cast_fp16_0, y = const_213_to_fp16)[name = string("op_7641_cast_fp16")]; tensor hidden_states_97_cast_fp16 = add(x = x_367_cast_fp16, y = var_7641_cast_fp16)[name = string("hidden_states_97_cast_fp16")]; tensor per_layer_slice_15_begin_0 = const()[name = string("per_layer_slice_15_begin_0"), val = tensor([0, 0, 1792])]; tensor per_layer_slice_15_end_0 = const()[name = string("per_layer_slice_15_end_0"), val = tensor([1, 1, 2048])]; tensor per_layer_slice_15_end_mask_0 = const()[name = string("per_layer_slice_15_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_15 = slice_by_index(begin = per_layer_slice_15_begin_0, end = per_layer_slice_15_end_0, end_mask = per_layer_slice_15_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_15")]; tensor gated_29 = linear(bias = linear_1_bias_0, weight = layers_7_per_layer_input_gate_weight_palettized, x = hidden_states_97_cast_fp16)[name = string("linear_15")]; string gated_31_mode_0 = const()[name = string("gated_31_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_31 = gelu(mode = gated_31_mode_0, x = gated_29)[name = string("gated_31")]; tensor input_305 = mul(x = gated_31, y = per_layer_slice_15)[name = string("input_305")]; tensor x_375 = linear(bias = linear_2_bias_0, weight = layers_7_per_layer_projection_weight_palettized, x = input_305)[name = string("linear_16")]; int32 var_7678 = const()[name = string("op_7678"), val = int32(-1)]; fp16 const_214_promoted_to_fp16 = const()[name = string("const_214_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7684_cast_fp16 = mul(x = x_375, y = const_214_promoted_to_fp16)[name = string("op_7684_cast_fp16")]; bool input_307_interleave_0 = const()[name = string("input_307_interleave_0"), val = bool(false)]; tensor input_307_cast_fp16 = concat(axis = var_7678, interleave = input_307_interleave_0, values = (x_375, var_7684_cast_fp16))[name = string("input_307_cast_fp16")]; tensor normed_361_axes_0 = const()[name = string("normed_361_axes_0"), val = tensor([-1])]; fp16 var_7676_to_fp16 = const()[name = string("op_7676_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_361_cast_fp16 = layer_norm(axes = normed_361_axes_0, epsilon = var_7676_to_fp16, x = input_307_cast_fp16)[name = string("normed_361_cast_fp16")]; tensor var_7689_split_sizes_0 = const()[name = string("op_7689_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_7689_axis_0 = const()[name = string("op_7689_axis_0"), val = int32(-1)]; tensor var_7689_cast_fp16_0, tensor var_7689_cast_fp16_1 = split(axis = var_7689_axis_0, split_sizes = var_7689_split_sizes_0, x = normed_361_cast_fp16)[name = string("op_7689_cast_fp16")]; tensor const_215_to_fp16 = const()[name = string("const_215_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2264646592)))]; tensor var_7692_cast_fp16 = mul(x = var_7689_cast_fp16_0, y = const_215_to_fp16)[name = string("op_7692_cast_fp16")]; tensor hidden_states_101_cast_fp16 = add(x = hidden_states_97_cast_fp16, y = var_7692_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; tensor layers_7_layer_scalar_to_fp16 = const()[name = string("layers_7_layer_scalar_to_fp16"), val = tensor([0x1.38p-1])]; tensor x_379_cast_fp16 = mul(x = hidden_states_101_cast_fp16, y = layers_7_layer_scalar_to_fp16)[name = string("x_379_cast_fp16")]; int32 var_7700 = const()[name = string("op_7700"), val = int32(-1)]; fp16 const_216_promoted_to_fp16 = const()[name = string("const_216_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7706_cast_fp16 = mul(x = x_379_cast_fp16, y = const_216_promoted_to_fp16)[name = string("op_7706_cast_fp16")]; bool input_309_interleave_0 = const()[name = string("input_309_interleave_0"), val = bool(false)]; tensor input_309_cast_fp16 = concat(axis = var_7700, interleave = input_309_interleave_0, values = (x_379_cast_fp16, var_7706_cast_fp16))[name = string("input_309_cast_fp16")]; tensor normed_365_axes_0 = const()[name = string("normed_365_axes_0"), val = tensor([-1])]; fp16 var_7698_to_fp16 = const()[name = string("op_7698_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_365_cast_fp16 = layer_norm(axes = normed_365_axes_0, epsilon = var_7698_to_fp16, x = input_309_cast_fp16)[name = string("normed_365_cast_fp16")]; tensor var_7711_split_sizes_0 = const()[name = string("op_7711_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_7711_axis_0 = const()[name = string("op_7711_axis_0"), val = int32(-1)]; tensor var_7711_cast_fp16_0, tensor var_7711_cast_fp16_1 = split(axis = var_7711_axis_0, split_sizes = var_7711_split_sizes_0, x = normed_365_cast_fp16)[name = string("op_7711_cast_fp16")]; tensor const_217_to_fp16 = const()[name = string("const_217_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2264649728)))]; tensor var_7714_cast_fp16 = mul(x = var_7711_cast_fp16_0, y = const_217_to_fp16)[name = string("op_7714_cast_fp16")]; tensor var_7722 = const()[name = string("op_7722"), val = tensor([0, 2, 1])]; tensor var_7725_axes_0 = const()[name = string("op_7725_axes_0"), val = tensor([2])]; tensor var_7723_cast_fp16 = transpose(perm = var_7722, x = var_7714_cast_fp16)[name = string("transpose_246")]; tensor var_7725_cast_fp16 = expand_dims(axes = var_7725_axes_0, x = var_7723_cast_fp16)[name = string("op_7725_cast_fp16")]; string var_7741_pad_type_0 = const()[name = string("op_7741_pad_type_0"), val = string("valid")]; tensor var_7741_strides_0 = const()[name = string("op_7741_strides_0"), val = tensor([1, 1])]; tensor var_7741_pad_0 = const()[name = string("op_7741_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7741_dilations_0 = const()[name = string("op_7741_dilations_0"), val = tensor([1, 1])]; int32 var_7741_groups_0 = const()[name = string("op_7741_groups_0"), val = int32(1)]; tensor var_7741 = conv(dilations = var_7741_dilations_0, groups = var_7741_groups_0, pad = var_7741_pad_0, pad_type = var_7741_pad_type_0, strides = var_7741_strides_0, weight = layers_8_self_attn_q_proj_weight_palettized, x = var_7725_cast_fp16)[name = string("op_7741")]; tensor var_7746 = const()[name = string("op_7746"), val = tensor([1, 8, 256, 1])]; tensor var_7747 = reshape(shape = var_7746, x = var_7741)[name = string("op_7747")]; tensor var_7752 = const()[name = string("op_7752"), val = tensor([0, 1, 3, 2])]; tensor var_7762 = const()[name = string("op_7762"), val = tensor([1, 8, 256])]; tensor var_7753 = transpose(perm = var_7752, x = var_7747)[name = string("transpose_245")]; tensor x_383 = reshape(shape = var_7762, x = var_7753)[name = string("x_383")]; int32 var_7768 = const()[name = string("op_7768"), val = int32(-1)]; fp16 const_218_promoted_to_fp16 = const()[name = string("const_218_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7774_cast_fp16 = mul(x = x_383, y = const_218_promoted_to_fp16)[name = string("op_7774_cast_fp16")]; bool input_313_interleave_0 = const()[name = string("input_313_interleave_0"), val = bool(false)]; tensor input_313_cast_fp16 = concat(axis = var_7768, interleave = input_313_interleave_0, values = (x_383, var_7774_cast_fp16))[name = string("input_313_cast_fp16")]; tensor normed_369_axes_0 = const()[name = string("normed_369_axes_0"), val = tensor([-1])]; fp16 var_7766_to_fp16 = const()[name = string("op_7766_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_369_cast_fp16 = layer_norm(axes = normed_369_axes_0, epsilon = var_7766_to_fp16, x = input_313_cast_fp16)[name = string("normed_369_cast_fp16")]; tensor var_7779_split_sizes_0 = const()[name = string("op_7779_split_sizes_0"), val = tensor([256, 256])]; int32 var_7779_axis_0 = const()[name = string("op_7779_axis_0"), val = int32(-1)]; tensor var_7779_cast_fp16_0, tensor var_7779_cast_fp16_1 = split(axis = var_7779_axis_0, split_sizes = var_7779_split_sizes_0, x = normed_369_cast_fp16)[name = string("op_7779_cast_fp16")]; tensor const_219_to_fp16 = const()[name = string("const_219_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2264652864)))]; tensor var_7782_cast_fp16 = mul(x = var_7779_cast_fp16_0, y = const_219_to_fp16)[name = string("op_7782_cast_fp16")]; tensor var_7788 = const()[name = string("op_7788"), val = tensor([1, 8, 1, 256])]; tensor q_67 = reshape(shape = var_7788, x = var_7782_cast_fp16)[name = string("q_67")]; tensor var_7790 = mul(x = q_67, y = cos_1)[name = string("op_7790")]; tensor var_7791_split_sizes_0 = const()[name = string("op_7791_split_sizes_0"), val = tensor([128, 128])]; int32 var_7791_axis_0 = const()[name = string("op_7791_axis_0"), val = int32(-1)]; tensor var_7791_0, tensor var_7791_1 = split(axis = var_7791_axis_0, split_sizes = var_7791_split_sizes_0, x = q_67)[name = string("op_7791")]; fp16 const_220_promoted = const()[name = string("const_220_promoted"), val = fp16(-0x1p+0)]; tensor var_7793 = mul(x = var_7791_1, y = const_220_promoted)[name = string("op_7793")]; int32 var_7795 = const()[name = string("op_7795"), val = int32(-1)]; bool var_7796_interleave_0 = const()[name = string("op_7796_interleave_0"), val = bool(false)]; tensor var_7796 = concat(axis = var_7795, interleave = var_7796_interleave_0, values = (var_7793, var_7791_0))[name = string("op_7796")]; tensor var_7797 = mul(x = var_7796, y = sin_1)[name = string("op_7797")]; tensor q_71 = add(x = var_7790, y = var_7797)[name = string("q_71")]; string var_7810_pad_type_0 = const()[name = string("op_7810_pad_type_0"), val = string("valid")]; tensor var_7810_strides_0 = const()[name = string("op_7810_strides_0"), val = tensor([1, 1])]; tensor var_7810_pad_0 = const()[name = string("op_7810_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7810_dilations_0 = const()[name = string("op_7810_dilations_0"), val = tensor([1, 1])]; int32 var_7810_groups_0 = const()[name = string("op_7810_groups_0"), val = int32(1)]; tensor var_7810 = conv(dilations = var_7810_dilations_0, groups = var_7810_groups_0, pad = var_7810_pad_0, pad_type = var_7810_pad_type_0, strides = var_7810_strides_0, weight = layers_8_self_attn_k_proj_weight_palettized, x = var_7725_cast_fp16)[name = string("op_7810")]; tensor var_7815 = const()[name = string("op_7815"), val = tensor([1, 1, 256, 1])]; tensor var_7816 = reshape(shape = var_7815, x = var_7810)[name = string("op_7816")]; tensor var_7821 = const()[name = string("op_7821"), val = tensor([0, 1, 3, 2])]; string var_7838_pad_type_0 = const()[name = string("op_7838_pad_type_0"), val = string("valid")]; tensor var_7838_strides_0 = const()[name = string("op_7838_strides_0"), val = tensor([1, 1])]; tensor var_7838_pad_0 = const()[name = string("op_7838_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_7838_dilations_0 = const()[name = string("op_7838_dilations_0"), val = tensor([1, 1])]; int32 var_7838_groups_0 = const()[name = string("op_7838_groups_0"), val = int32(1)]; tensor var_7838 = conv(dilations = var_7838_dilations_0, groups = var_7838_groups_0, pad = var_7838_pad_0, pad_type = var_7838_pad_type_0, strides = var_7838_strides_0, weight = layers_8_self_attn_v_proj_weight_palettized, x = var_7725_cast_fp16)[name = string("op_7838")]; tensor var_7843 = const()[name = string("op_7843"), val = tensor([1, 1, 256, 1])]; tensor var_7844 = reshape(shape = var_7843, x = var_7838)[name = string("op_7844")]; tensor var_7849 = const()[name = string("op_7849"), val = tensor([0, 1, 3, 2])]; tensor var_7859 = const()[name = string("op_7859"), val = tensor([1, 1, 256])]; tensor var_7822 = transpose(perm = var_7821, x = var_7816)[name = string("transpose_244")]; tensor x_387 = reshape(shape = var_7859, x = var_7822)[name = string("x_387")]; int32 var_7865 = const()[name = string("op_7865"), val = int32(-1)]; fp16 const_221_promoted_to_fp16 = const()[name = string("const_221_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7871_cast_fp16 = mul(x = x_387, y = const_221_promoted_to_fp16)[name = string("op_7871_cast_fp16")]; bool input_315_interleave_0 = const()[name = string("input_315_interleave_0"), val = bool(false)]; tensor input_315_cast_fp16 = concat(axis = var_7865, interleave = input_315_interleave_0, values = (x_387, var_7871_cast_fp16))[name = string("input_315_cast_fp16")]; tensor normed_373_axes_0 = const()[name = string("normed_373_axes_0"), val = tensor([-1])]; fp16 var_7863_to_fp16 = const()[name = string("op_7863_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_373_cast_fp16 = layer_norm(axes = normed_373_axes_0, epsilon = var_7863_to_fp16, x = input_315_cast_fp16)[name = string("normed_373_cast_fp16")]; tensor var_7876_split_sizes_0 = const()[name = string("op_7876_split_sizes_0"), val = tensor([256, 256])]; int32 var_7876_axis_0 = const()[name = string("op_7876_axis_0"), val = int32(-1)]; tensor var_7876_cast_fp16_0, tensor var_7876_cast_fp16_1 = split(axis = var_7876_axis_0, split_sizes = var_7876_split_sizes_0, x = normed_373_cast_fp16)[name = string("op_7876_cast_fp16")]; tensor const_222_to_fp16 = const()[name = string("const_222_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2264653440)))]; tensor var_7879_cast_fp16 = mul(x = var_7876_cast_fp16_0, y = const_222_to_fp16)[name = string("op_7879_cast_fp16")]; tensor var_7885 = const()[name = string("op_7885"), val = tensor([1, 1, 1, 256])]; tensor q_69 = reshape(shape = var_7885, x = var_7879_cast_fp16)[name = string("q_69")]; fp16 var_7892_promoted_to_fp16 = const()[name = string("op_7892_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_7850 = transpose(perm = var_7849, x = var_7844)[name = string("transpose_243")]; tensor var_7893_cast_fp16 = pow(x = var_7850, y = var_7892_promoted_to_fp16)[name = string("op_7893_cast_fp16")]; tensor var_7898_axes_0 = const()[name = string("op_7898_axes_0"), val = tensor([-1])]; bool var_7898_keep_dims_0 = const()[name = string("op_7898_keep_dims_0"), val = bool(true)]; tensor var_7898_cast_fp16 = reduce_mean(axes = var_7898_axes_0, keep_dims = var_7898_keep_dims_0, x = var_7893_cast_fp16)[name = string("op_7898_cast_fp16")]; fp16 var_7900_to_fp16 = const()[name = string("op_7900_to_fp16"), val = fp16(0x1.1p-20)]; tensor mean_sq_17_cast_fp16 = add(x = var_7898_cast_fp16, y = var_7900_to_fp16)[name = string("mean_sq_17_cast_fp16")]; fp16 var_7907_to_fp16 = const()[name = string("op_7907_to_fp16"), val = fp16(-0x1p-1)]; tensor var_7908_cast_fp16 = pow(x = mean_sq_17_cast_fp16, y = var_7907_to_fp16)[name = string("op_7908_cast_fp16")]; tensor var_7909_cast_fp16 = mul(x = var_7850, y = var_7908_cast_fp16)[name = string("op_7909_cast_fp16")]; tensor var_7915 = mul(x = q_69, y = cos_1)[name = string("op_7915")]; tensor var_7916_split_sizes_0 = const()[name = string("op_7916_split_sizes_0"), val = tensor([128, 128])]; int32 var_7916_axis_0 = const()[name = string("op_7916_axis_0"), val = int32(-1)]; tensor var_7916_0, tensor var_7916_1 = split(axis = var_7916_axis_0, split_sizes = var_7916_split_sizes_0, x = q_69)[name = string("op_7916")]; fp16 const_223_promoted = const()[name = string("const_223_promoted"), val = fp16(-0x1p+0)]; tensor var_7918 = mul(x = var_7916_1, y = const_223_promoted)[name = string("op_7918")]; int32 var_7920 = const()[name = string("op_7920"), val = int32(-1)]; bool var_7921_interleave_0 = const()[name = string("op_7921_interleave_0"), val = bool(false)]; tensor var_7921 = concat(axis = var_7920, interleave = var_7921_interleave_0, values = (var_7918, var_7916_0))[name = string("op_7921")]; tensor var_7922 = mul(x = var_7921, y = sin_1)[name = string("op_7922")]; tensor input_317 = add(x = var_7915, y = var_7922)[name = string("input_317")]; tensor var_7927_begin_0 = const()[name = string("op_7927_begin_0"), val = tensor([8, 0, 0, 0])]; tensor var_7927_end_0 = const()[name = string("op_7927_end_0"), val = tensor([9, 1, 512, 512])]; tensor var_7927_end_mask_0 = const()[name = string("op_7927_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7927_squeeze_mask_0 = const()[name = string("op_7927_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_7927_cast_fp16 = slice_by_index(begin = var_7927_begin_0, end = var_7927_end_0, end_mask = var_7927_end_mask_0, squeeze_mask = var_7927_squeeze_mask_0, x = coreml_update_state_45)[name = string("op_7927_cast_fp16")]; tensor K_cache_17_axes_0 = const()[name = string("K_cache_17_axes_0"), val = tensor([0])]; tensor K_cache_17_cast_fp16 = expand_dims(axes = K_cache_17_axes_0, x = var_7927_cast_fp16)[name = string("K_cache_17_cast_fp16")]; tensor var_7932_begin_0 = const()[name = string("op_7932_begin_0"), val = tensor([43, 0, 0, 0])]; tensor var_7932_end_0 = const()[name = string("op_7932_end_0"), val = tensor([44, 1, 512, 512])]; tensor var_7932_end_mask_0 = const()[name = string("op_7932_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_7932_squeeze_mask_0 = const()[name = string("op_7932_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_7932_cast_fp16 = slice_by_index(begin = var_7932_begin_0, end = var_7932_end_0, end_mask = var_7932_end_mask_0, squeeze_mask = var_7932_squeeze_mask_0, x = coreml_update_state_45)[name = string("op_7932_cast_fp16")]; tensor V_cache_17_axes_0 = const()[name = string("V_cache_17_axes_0"), val = tensor([0])]; tensor V_cache_17_cast_fp16 = expand_dims(axes = V_cache_17_axes_0, x = var_7932_cast_fp16)[name = string("V_cache_17_cast_fp16")]; tensor k_padded_15_pad_0 = const()[name = string("k_padded_15_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; string k_padded_15_mode_0 = const()[name = string("k_padded_15_mode_0"), val = string("constant")]; fp16 const_224_to_fp16 = const()[name = string("const_224_to_fp16"), val = fp16(0x0p+0)]; tensor k_padded_15_cast_fp16 = pad(constant_val = const_224_to_fp16, mode = k_padded_15_mode_0, pad = k_padded_15_pad_0, x = input_317)[name = string("k_padded_15_cast_fp16")]; tensor v_padded_15_pad_0 = const()[name = string("v_padded_15_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; string v_padded_15_mode_0 = const()[name = string("v_padded_15_mode_0"), val = string("constant")]; fp16 const_225_to_fp16 = const()[name = string("const_225_to_fp16"), val = fp16(0x0p+0)]; tensor v_padded_15_cast_fp16 = pad(constant_val = const_225_to_fp16, mode = v_padded_15_mode_0, pad = v_padded_15_pad_0, x = var_7909_cast_fp16)[name = string("v_padded_15_cast_fp16")]; tensor var_7950_cast_fp16 = mul(x = K_cache_17_cast_fp16, y = var_3515_cast_fp16)[name = string("op_7950_cast_fp16")]; tensor var_7951_reps_0 = const()[name = string("op_7951_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_7951_cast_fp16 = tile(reps = var_7951_reps_0, x = k_padded_15_cast_fp16)[name = string("op_7951_cast_fp16")]; tensor var_7952_cast_fp16 = mul(x = var_7951_cast_fp16, y = update_mask)[name = string("op_7952_cast_fp16")]; tensor K_new_17_cast_fp16 = add(x = var_7950_cast_fp16, y = var_7952_cast_fp16)[name = string("K_new_17_cast_fp16")]; tensor var_7958_cast_fp16 = mul(x = V_cache_17_cast_fp16, y = var_3515_cast_fp16)[name = string("op_7958_cast_fp16")]; tensor var_7959_reps_0 = const()[name = string("op_7959_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_7959_cast_fp16 = tile(reps = var_7959_reps_0, x = v_padded_15_cast_fp16)[name = string("op_7959_cast_fp16")]; tensor var_7960_cast_fp16 = mul(x = var_7959_cast_fp16, y = update_mask)[name = string("op_7960_cast_fp16")]; tensor V_new_17_cast_fp16 = add(x = var_7958_cast_fp16, y = var_7960_cast_fp16)[name = string("V_new_17_cast_fp16")]; tensor var_7964_axes_0 = const()[name = string("op_7964_axes_0"), val = tensor([0])]; tensor var_7964_cast_fp16 = squeeze(axes = var_7964_axes_0, x = K_new_17_cast_fp16)[name = string("op_7964_cast_fp16")]; tensor concat_64 = const()[name = string("concat_64"), val = tensor([8, 0, 0, 0])]; tensor concat_65 = const()[name = string("concat_65"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_17_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_17_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_17_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_64, begin_mask = kv_cache_0_internal_tensor_assign_17_begin_mask_0, end = concat_65, end_mask = kv_cache_0_internal_tensor_assign_17_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_17_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_17_stride_0, update = var_7964_cast_fp16, x = coreml_update_state_45)[name = string("kv_cache_0_internal_tensor_assign_17_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_17_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_46_write_state")]; tensor coreml_update_state_46 = read_state(input = kv_cache_0)[name = string("coreml_update_state_46")]; tensor var_7971_axes_0 = const()[name = string("op_7971_axes_0"), val = tensor([0])]; tensor var_7971_cast_fp16 = squeeze(axes = var_7971_axes_0, x = V_new_17_cast_fp16)[name = string("op_7971_cast_fp16")]; tensor concat_66 = const()[name = string("concat_66"), val = tensor([43, 0, 0, 0])]; tensor concat_67 = const()[name = string("concat_67"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_18_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_18_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_18_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_66, begin_mask = kv_cache_0_internal_tensor_assign_18_begin_mask_0, end = concat_67, end_mask = kv_cache_0_internal_tensor_assign_18_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_18_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_18_stride_0, update = var_7971_cast_fp16, x = coreml_update_state_46)[name = string("kv_cache_0_internal_tensor_assign_18_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_18_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_47_write_state")]; tensor coreml_update_state_47 = read_state(input = kv_cache_0)[name = string("coreml_update_state_47")]; tensor K_for_attn_17_begin_0 = const()[name = string("K_for_attn_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor K_for_attn_17_end_0 = const()[name = string("K_for_attn_17_end_0"), val = tensor([1, 1, 512, 256])]; tensor K_for_attn_17_end_mask_0 = const()[name = string("K_for_attn_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor K_for_attn_17_cast_fp16 = slice_by_index(begin = K_for_attn_17_begin_0, end = K_for_attn_17_end_0, end_mask = K_for_attn_17_end_mask_0, x = K_new_17_cast_fp16)[name = string("K_for_attn_17_cast_fp16")]; tensor V_for_attn_17_begin_0 = const()[name = string("V_for_attn_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor V_for_attn_17_end_0 = const()[name = string("V_for_attn_17_end_0"), val = tensor([1, 1, 512, 256])]; tensor V_for_attn_17_end_mask_0 = const()[name = string("V_for_attn_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor V_for_attn_17_cast_fp16 = slice_by_index(begin = V_for_attn_17_begin_0, end = V_for_attn_17_end_0, end_mask = V_for_attn_17_end_mask_0, x = V_new_17_cast_fp16)[name = string("V_for_attn_17_cast_fp16")]; tensor transpose_32_perm_0 = const()[name = string("transpose_32_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_16_reps_0 = const()[name = string("tile_16_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_32_cast_fp16 = transpose(perm = transpose_32_perm_0, x = K_for_attn_17_cast_fp16)[name = string("transpose_242")]; tensor tile_16_cast_fp16 = tile(reps = tile_16_reps_0, x = transpose_32_cast_fp16)[name = string("tile_16_cast_fp16")]; tensor concat_68 = const()[name = string("concat_68"), val = tensor([8, 1, 1, 512, 256])]; tensor reshape_32_cast_fp16 = reshape(shape = concat_68, x = tile_16_cast_fp16)[name = string("reshape_32_cast_fp16")]; tensor transpose_33_perm_0 = const()[name = string("transpose_33_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_69 = const()[name = string("concat_69"), val = tensor([-1, 1, 512, 256])]; tensor transpose_33_cast_fp16 = transpose(perm = transpose_33_perm_0, x = reshape_32_cast_fp16)[name = string("transpose_241")]; tensor reshape_33_cast_fp16 = reshape(shape = concat_69, x = transpose_33_cast_fp16)[name = string("reshape_33_cast_fp16")]; tensor transpose_148_perm_0 = const()[name = string("transpose_148_perm_0"), val = tensor([1, 0, -1, -2])]; tensor transpose_34_perm_0 = const()[name = string("transpose_34_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_17_reps_0 = const()[name = string("tile_17_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_34_cast_fp16 = transpose(perm = transpose_34_perm_0, x = V_for_attn_17_cast_fp16)[name = string("transpose_240")]; tensor tile_17_cast_fp16 = tile(reps = tile_17_reps_0, x = transpose_34_cast_fp16)[name = string("tile_17_cast_fp16")]; tensor concat_70 = const()[name = string("concat_70"), val = tensor([8, 1, 1, 512, 256])]; tensor reshape_34_cast_fp16 = reshape(shape = concat_70, x = tile_17_cast_fp16)[name = string("reshape_34_cast_fp16")]; tensor transpose_35_perm_0 = const()[name = string("transpose_35_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_71 = const()[name = string("concat_71"), val = tensor([-1, 1, 512, 256])]; tensor transpose_35_cast_fp16 = transpose(perm = transpose_35_perm_0, x = reshape_34_cast_fp16)[name = string("transpose_239")]; tensor reshape_35_cast_fp16 = reshape(shape = concat_71, x = transpose_35_cast_fp16)[name = string("reshape_35_cast_fp16")]; tensor V_expanded_17_perm_0 = const()[name = string("V_expanded_17_perm_0"), val = tensor([1, 0, -2, -1])]; bool var_8008_transpose_x_0 = const()[name = string("op_8008_transpose_x_0"), val = bool(false)]; bool var_8008_transpose_y_0 = const()[name = string("op_8008_transpose_y_0"), val = bool(false)]; tensor transpose_148_cast_fp16 = transpose(perm = transpose_148_perm_0, x = reshape_33_cast_fp16)[name = string("transpose_238")]; tensor var_8008_cast_fp16 = matmul(transpose_x = var_8008_transpose_x_0, transpose_y = var_8008_transpose_y_0, x = q_71, y = transpose_148_cast_fp16)[name = string("op_8008_cast_fp16")]; tensor attn_weights_51_cast_fp16 = add(x = var_8008_cast_fp16, y = causal_mask)[name = string("attn_weights_51_cast_fp16")]; int32 var_8018 = const()[name = string("op_8018"), val = int32(-1)]; tensor var_8020_cast_fp16 = softmax(axis = var_8018, x = attn_weights_51_cast_fp16)[name = string("op_8020_cast_fp16")]; bool var_8036_transpose_x_0 = const()[name = string("op_8036_transpose_x_0"), val = bool(false)]; bool var_8036_transpose_y_0 = const()[name = string("op_8036_transpose_y_0"), val = bool(false)]; tensor V_expanded_17_cast_fp16 = transpose(perm = V_expanded_17_perm_0, x = reshape_35_cast_fp16)[name = string("transpose_237")]; tensor var_8036_cast_fp16 = matmul(transpose_x = var_8036_transpose_x_0, transpose_y = var_8036_transpose_y_0, x = var_8020_cast_fp16, y = V_expanded_17_cast_fp16)[name = string("op_8036_cast_fp16")]; tensor var_8046 = const()[name = string("op_8046"), val = tensor([0, 2, 1, 3])]; tensor var_8053 = const()[name = string("op_8053"), val = tensor([1, 1, -1])]; tensor var_8047 = transpose(perm = var_8046, x = var_8036_cast_fp16)[name = string("transpose_236")]; tensor attn_output_51 = reshape(shape = var_8053, x = var_8047)[name = string("attn_output_51")]; tensor var_8058 = const()[name = string("op_8058"), val = tensor([0, 2, 1])]; tensor squeeze_8_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2264654016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2266226944))))[name = string("squeeze_8_palettized")]; string var_8074_pad_type_0 = const()[name = string("op_8074_pad_type_0"), val = string("valid")]; int32 var_8074_groups_0 = const()[name = string("op_8074_groups_0"), val = int32(1)]; tensor var_8074_strides_0 = const()[name = string("op_8074_strides_0"), val = tensor([1])]; tensor var_8074_pad_0 = const()[name = string("op_8074_pad_0"), val = tensor([0, 0])]; tensor var_8074_dilations_0 = const()[name = string("op_8074_dilations_0"), val = tensor([1])]; tensor var_8059 = transpose(perm = var_8058, x = attn_output_51)[name = string("transpose_235")]; tensor var_8074 = conv(dilations = var_8074_dilations_0, groups = var_8074_groups_0, pad = var_8074_pad_0, pad_type = var_8074_pad_type_0, strides = var_8074_strides_0, weight = squeeze_8_palettized, x = var_8059)[name = string("op_8074")]; tensor var_8078 = const()[name = string("op_8078"), val = tensor([0, 2, 1])]; int32 var_8084 = const()[name = string("op_8084"), val = int32(-1)]; fp16 const_226_promoted_to_fp16 = const()[name = string("const_226_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_393 = transpose(perm = var_8078, x = var_8074)[name = string("transpose_234")]; tensor var_8090_cast_fp16 = mul(x = x_393, y = const_226_promoted_to_fp16)[name = string("op_8090_cast_fp16")]; bool input_323_interleave_0 = const()[name = string("input_323_interleave_0"), val = bool(false)]; tensor input_323_cast_fp16 = concat(axis = var_8084, interleave = input_323_interleave_0, values = (x_393, var_8090_cast_fp16))[name = string("input_323_cast_fp16")]; tensor normed_377_axes_0 = const()[name = string("normed_377_axes_0"), val = tensor([-1])]; fp16 var_8082_to_fp16 = const()[name = string("op_8082_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_377_cast_fp16 = layer_norm(axes = normed_377_axes_0, epsilon = var_8082_to_fp16, x = input_323_cast_fp16)[name = string("normed_377_cast_fp16")]; tensor var_8095_split_sizes_0 = const()[name = string("op_8095_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_8095_axis_0 = const()[name = string("op_8095_axis_0"), val = int32(-1)]; tensor var_8095_cast_fp16_0, tensor var_8095_cast_fp16_1 = split(axis = var_8095_axis_0, split_sizes = var_8095_split_sizes_0, x = normed_377_cast_fp16)[name = string("op_8095_cast_fp16")]; tensor const_227_to_fp16 = const()[name = string("const_227_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2266228544)))]; tensor var_8098_cast_fp16 = mul(x = var_8095_cast_fp16_0, y = const_227_to_fp16)[name = string("op_8098_cast_fp16")]; tensor x_397_cast_fp16 = add(x = x_379_cast_fp16, y = var_8098_cast_fp16)[name = string("x_397_cast_fp16")]; int32 var_8105 = const()[name = string("op_8105"), val = int32(-1)]; fp16 const_228_promoted_to_fp16 = const()[name = string("const_228_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8111_cast_fp16 = mul(x = x_397_cast_fp16, y = const_228_promoted_to_fp16)[name = string("op_8111_cast_fp16")]; bool input_325_interleave_0 = const()[name = string("input_325_interleave_0"), val = bool(false)]; tensor input_325_cast_fp16 = concat(axis = var_8105, interleave = input_325_interleave_0, values = (x_397_cast_fp16, var_8111_cast_fp16))[name = string("input_325_cast_fp16")]; tensor normed_381_axes_0 = const()[name = string("normed_381_axes_0"), val = tensor([-1])]; fp16 var_8103_to_fp16 = const()[name = string("op_8103_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_381_cast_fp16 = layer_norm(axes = normed_381_axes_0, epsilon = var_8103_to_fp16, x = input_325_cast_fp16)[name = string("normed_381_cast_fp16")]; tensor var_8116_split_sizes_0 = const()[name = string("op_8116_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_8116_axis_0 = const()[name = string("op_8116_axis_0"), val = int32(-1)]; tensor var_8116_cast_fp16_0, tensor var_8116_cast_fp16_1 = split(axis = var_8116_axis_0, split_sizes = var_8116_split_sizes_0, x = normed_381_cast_fp16)[name = string("op_8116_cast_fp16")]; tensor const_229_to_fp16 = const()[name = string("const_229_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2266231680)))]; tensor var_8119_cast_fp16 = mul(x = var_8116_cast_fp16_0, y = const_229_to_fp16)[name = string("op_8119_cast_fp16")]; tensor var_8132 = const()[name = string("op_8132"), val = tensor([0, 2, 1])]; tensor input_327_axes_0 = const()[name = string("input_327_axes_0"), val = tensor([2])]; tensor var_8133 = transpose(perm = var_8132, x = var_8119_cast_fp16)[name = string("transpose_233")]; tensor input_327 = expand_dims(axes = input_327_axes_0, x = var_8133)[name = string("input_327")]; string gate_33_pad_type_0 = const()[name = string("gate_33_pad_type_0"), val = string("valid")]; tensor gate_33_strides_0 = const()[name = string("gate_33_strides_0"), val = tensor([1, 1])]; tensor gate_33_pad_0 = const()[name = string("gate_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_33_dilations_0 = const()[name = string("gate_33_dilations_0"), val = tensor([1, 1])]; int32 gate_33_groups_0 = const()[name = string("gate_33_groups_0"), val = int32(1)]; tensor gate_33 = conv(dilations = gate_33_dilations_0, groups = gate_33_groups_0, pad = gate_33_pad_0, pad_type = gate_33_pad_type_0, strides = gate_33_strides_0, weight = layers_8_mlp_gate_proj_weight_palettized, x = input_327)[name = string("gate_33")]; string up_17_pad_type_0 = const()[name = string("up_17_pad_type_0"), val = string("valid")]; tensor up_17_strides_0 = const()[name = string("up_17_strides_0"), val = tensor([1, 1])]; tensor up_17_pad_0 = const()[name = string("up_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_17_dilations_0 = const()[name = string("up_17_dilations_0"), val = tensor([1, 1])]; int32 up_17_groups_0 = const()[name = string("up_17_groups_0"), val = int32(1)]; tensor up_17 = conv(dilations = up_17_dilations_0, groups = up_17_groups_0, pad = up_17_pad_0, pad_type = up_17_pad_type_0, strides = up_17_strides_0, weight = layers_8_mlp_up_proj_weight_palettized, x = input_327)[name = string("up_17")]; string gate_35_mode_0 = const()[name = string("gate_35_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_35 = gelu(mode = gate_35_mode_0, x = gate_33)[name = string("gate_35")]; tensor input_329 = mul(x = gate_35, y = up_17)[name = string("input_329")]; string mlp_out_17_pad_type_0 = const()[name = string("mlp_out_17_pad_type_0"), val = string("valid")]; tensor mlp_out_17_strides_0 = const()[name = string("mlp_out_17_strides_0"), val = tensor([1, 1])]; tensor mlp_out_17_pad_0 = const()[name = string("mlp_out_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_17_dilations_0 = const()[name = string("mlp_out_17_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_17_groups_0 = const()[name = string("mlp_out_17_groups_0"), val = int32(1)]; tensor mlp_out_17 = conv(dilations = mlp_out_17_dilations_0, groups = mlp_out_17_groups_0, pad = mlp_out_17_pad_0, pad_type = mlp_out_17_pad_type_0, strides = mlp_out_17_strides_0, weight = layers_8_mlp_down_proj_weight_palettized, x = input_329)[name = string("mlp_out_17")]; tensor var_8173_axes_0 = const()[name = string("op_8173_axes_0"), val = tensor([2])]; tensor var_8173 = squeeze(axes = var_8173_axes_0, x = mlp_out_17)[name = string("op_8173")]; tensor var_8177 = const()[name = string("op_8177"), val = tensor([0, 2, 1])]; int32 var_8183 = const()[name = string("op_8183"), val = int32(-1)]; fp16 const_230_promoted_to_fp16 = const()[name = string("const_230_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_401 = transpose(perm = var_8177, x = var_8173)[name = string("transpose_232")]; tensor var_8189_cast_fp16 = mul(x = x_401, y = const_230_promoted_to_fp16)[name = string("op_8189_cast_fp16")]; bool input_331_interleave_0 = const()[name = string("input_331_interleave_0"), val = bool(false)]; tensor input_331_cast_fp16 = concat(axis = var_8183, interleave = input_331_interleave_0, values = (x_401, var_8189_cast_fp16))[name = string("input_331_cast_fp16")]; tensor normed_385_axes_0 = const()[name = string("normed_385_axes_0"), val = tensor([-1])]; fp16 var_8181_to_fp16 = const()[name = string("op_8181_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_385_cast_fp16 = layer_norm(axes = normed_385_axes_0, epsilon = var_8181_to_fp16, x = input_331_cast_fp16)[name = string("normed_385_cast_fp16")]; tensor var_8194_split_sizes_0 = const()[name = string("op_8194_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_8194_axis_0 = const()[name = string("op_8194_axis_0"), val = int32(-1)]; tensor var_8194_cast_fp16_0, tensor var_8194_cast_fp16_1 = split(axis = var_8194_axis_0, split_sizes = var_8194_split_sizes_0, x = normed_385_cast_fp16)[name = string("op_8194_cast_fp16")]; tensor const_231_to_fp16 = const()[name = string("const_231_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2266234816)))]; tensor var_8197_cast_fp16 = mul(x = var_8194_cast_fp16_0, y = const_231_to_fp16)[name = string("op_8197_cast_fp16")]; tensor hidden_states_109_cast_fp16 = add(x = x_397_cast_fp16, y = var_8197_cast_fp16)[name = string("hidden_states_109_cast_fp16")]; tensor per_layer_slice_17_begin_0 = const()[name = string("per_layer_slice_17_begin_0"), val = tensor([0, 0, 2048])]; tensor per_layer_slice_17_end_0 = const()[name = string("per_layer_slice_17_end_0"), val = tensor([1, 1, 2304])]; tensor per_layer_slice_17_end_mask_0 = const()[name = string("per_layer_slice_17_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_17 = slice_by_index(begin = per_layer_slice_17_begin_0, end = per_layer_slice_17_end_0, end_mask = per_layer_slice_17_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_17")]; tensor gated_33 = linear(bias = linear_1_bias_0, weight = layers_8_per_layer_input_gate_weight_palettized, x = hidden_states_109_cast_fp16)[name = string("linear_17")]; string gated_35_mode_0 = const()[name = string("gated_35_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_35 = gelu(mode = gated_35_mode_0, x = gated_33)[name = string("gated_35")]; tensor input_335 = mul(x = gated_35, y = per_layer_slice_17)[name = string("input_335")]; tensor x_405 = linear(bias = linear_2_bias_0, weight = layers_8_per_layer_projection_weight_palettized, x = input_335)[name = string("linear_18")]; int32 var_8234 = const()[name = string("op_8234"), val = int32(-1)]; fp16 const_232_promoted_to_fp16 = const()[name = string("const_232_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8240_cast_fp16 = mul(x = x_405, y = const_232_promoted_to_fp16)[name = string("op_8240_cast_fp16")]; bool input_337_interleave_0 = const()[name = string("input_337_interleave_0"), val = bool(false)]; tensor input_337_cast_fp16 = concat(axis = var_8234, interleave = input_337_interleave_0, values = (x_405, var_8240_cast_fp16))[name = string("input_337_cast_fp16")]; tensor normed_389_axes_0 = const()[name = string("normed_389_axes_0"), val = tensor([-1])]; fp16 var_8232_to_fp16 = const()[name = string("op_8232_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_389_cast_fp16 = layer_norm(axes = normed_389_axes_0, epsilon = var_8232_to_fp16, x = input_337_cast_fp16)[name = string("normed_389_cast_fp16")]; tensor var_8245_split_sizes_0 = const()[name = string("op_8245_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_8245_axis_0 = const()[name = string("op_8245_axis_0"), val = int32(-1)]; tensor var_8245_cast_fp16_0, tensor var_8245_cast_fp16_1 = split(axis = var_8245_axis_0, split_sizes = var_8245_split_sizes_0, x = normed_389_cast_fp16)[name = string("op_8245_cast_fp16")]; tensor const_233_to_fp16 = const()[name = string("const_233_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2266237952)))]; tensor var_8248_cast_fp16 = mul(x = var_8245_cast_fp16_0, y = const_233_to_fp16)[name = string("op_8248_cast_fp16")]; tensor hidden_states_113_cast_fp16 = add(x = hidden_states_109_cast_fp16, y = var_8248_cast_fp16)[name = string("hidden_states_113_cast_fp16")]; tensor layers_8_layer_scalar_to_fp16 = const()[name = string("layers_8_layer_scalar_to_fp16"), val = tensor([0x1.82p-2])]; tensor x_409_cast_fp16 = mul(x = hidden_states_113_cast_fp16, y = layers_8_layer_scalar_to_fp16)[name = string("x_409_cast_fp16")]; int32 var_8256 = const()[name = string("op_8256"), val = int32(-1)]; fp16 const_234_promoted_to_fp16 = const()[name = string("const_234_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8262_cast_fp16 = mul(x = x_409_cast_fp16, y = const_234_promoted_to_fp16)[name = string("op_8262_cast_fp16")]; bool input_339_interleave_0 = const()[name = string("input_339_interleave_0"), val = bool(false)]; tensor input_339_cast_fp16 = concat(axis = var_8256, interleave = input_339_interleave_0, values = (x_409_cast_fp16, var_8262_cast_fp16))[name = string("input_339_cast_fp16")]; tensor normed_393_axes_0 = const()[name = string("normed_393_axes_0"), val = tensor([-1])]; fp16 var_8254_to_fp16 = const()[name = string("op_8254_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_393_cast_fp16 = layer_norm(axes = normed_393_axes_0, epsilon = var_8254_to_fp16, x = input_339_cast_fp16)[name = string("normed_393_cast_fp16")]; tensor var_8267_split_sizes_0 = const()[name = string("op_8267_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_8267_axis_0 = const()[name = string("op_8267_axis_0"), val = int32(-1)]; tensor var_8267_cast_fp16_0, tensor var_8267_cast_fp16_1 = split(axis = var_8267_axis_0, split_sizes = var_8267_split_sizes_0, x = normed_393_cast_fp16)[name = string("op_8267_cast_fp16")]; tensor const_235_to_fp16 = const()[name = string("const_235_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2266241088)))]; tensor var_8270_cast_fp16 = mul(x = var_8267_cast_fp16_0, y = const_235_to_fp16)[name = string("op_8270_cast_fp16")]; tensor var_8278 = const()[name = string("op_8278"), val = tensor([0, 2, 1])]; tensor var_8281_axes_0 = const()[name = string("op_8281_axes_0"), val = tensor([2])]; tensor var_8279_cast_fp16 = transpose(perm = var_8278, x = var_8270_cast_fp16)[name = string("transpose_231")]; tensor var_8281_cast_fp16 = expand_dims(axes = var_8281_axes_0, x = var_8279_cast_fp16)[name = string("op_8281_cast_fp16")]; string var_8297_pad_type_0 = const()[name = string("op_8297_pad_type_0"), val = string("valid")]; tensor var_8297_strides_0 = const()[name = string("op_8297_strides_0"), val = tensor([1, 1])]; tensor var_8297_pad_0 = const()[name = string("op_8297_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8297_dilations_0 = const()[name = string("op_8297_dilations_0"), val = tensor([1, 1])]; int32 var_8297_groups_0 = const()[name = string("op_8297_groups_0"), val = int32(1)]; tensor var_8297 = conv(dilations = var_8297_dilations_0, groups = var_8297_groups_0, pad = var_8297_pad_0, pad_type = var_8297_pad_type_0, strides = var_8297_strides_0, weight = layers_9_self_attn_q_proj_weight_palettized, x = var_8281_cast_fp16)[name = string("op_8297")]; tensor var_8302 = const()[name = string("op_8302"), val = tensor([1, 8, 512, 1])]; tensor var_8303 = reshape(shape = var_8302, x = var_8297)[name = string("op_8303")]; tensor var_8308 = const()[name = string("op_8308"), val = tensor([0, 1, 3, 2])]; tensor var_8318 = const()[name = string("op_8318"), val = tensor([1, 8, 512])]; tensor var_8309 = transpose(perm = var_8308, x = var_8303)[name = string("transpose_230")]; tensor x_413 = reshape(shape = var_8318, x = var_8309)[name = string("x_413")]; int32 var_8324 = const()[name = string("op_8324"), val = int32(-1)]; fp16 const_236_promoted_to_fp16 = const()[name = string("const_236_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8330_cast_fp16 = mul(x = x_413, y = const_236_promoted_to_fp16)[name = string("op_8330_cast_fp16")]; bool input_343_interleave_0 = const()[name = string("input_343_interleave_0"), val = bool(false)]; tensor input_343_cast_fp16 = concat(axis = var_8324, interleave = input_343_interleave_0, values = (x_413, var_8330_cast_fp16))[name = string("input_343_cast_fp16")]; tensor normed_397_axes_0 = const()[name = string("normed_397_axes_0"), val = tensor([-1])]; fp16 var_8322_to_fp16 = const()[name = string("op_8322_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_397_cast_fp16 = layer_norm(axes = normed_397_axes_0, epsilon = var_8322_to_fp16, x = input_343_cast_fp16)[name = string("normed_397_cast_fp16")]; tensor var_8335_split_sizes_0 = const()[name = string("op_8335_split_sizes_0"), val = tensor([512, 512])]; int32 var_8335_axis_0 = const()[name = string("op_8335_axis_0"), val = int32(-1)]; tensor var_8335_cast_fp16_0, tensor var_8335_cast_fp16_1 = split(axis = var_8335_axis_0, split_sizes = var_8335_split_sizes_0, x = normed_397_cast_fp16)[name = string("op_8335_cast_fp16")]; tensor const_237_to_fp16 = const()[name = string("const_237_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2266244224)))]; tensor var_8338_cast_fp16 = mul(x = var_8335_cast_fp16_0, y = const_237_to_fp16)[name = string("op_8338_cast_fp16")]; tensor var_8344 = const()[name = string("op_8344"), val = tensor([1, 8, 1, 512])]; tensor q_75 = reshape(shape = var_8344, x = var_8338_cast_fp16)[name = string("q_75")]; tensor var_8346 = mul(x = q_75, y = cos)[name = string("op_8346")]; tensor var_8347_split_sizes_0 = const()[name = string("op_8347_split_sizes_0"), val = tensor([256, 256])]; int32 var_8347_axis_0 = const()[name = string("op_8347_axis_0"), val = int32(-1)]; tensor var_8347_0, tensor var_8347_1 = split(axis = var_8347_axis_0, split_sizes = var_8347_split_sizes_0, x = q_75)[name = string("op_8347")]; fp16 const_238_promoted = const()[name = string("const_238_promoted"), val = fp16(-0x1p+0)]; tensor var_8349 = mul(x = var_8347_1, y = const_238_promoted)[name = string("op_8349")]; int32 var_8351 = const()[name = string("op_8351"), val = int32(-1)]; bool var_8352_interleave_0 = const()[name = string("op_8352_interleave_0"), val = bool(false)]; tensor var_8352 = concat(axis = var_8351, interleave = var_8352_interleave_0, values = (var_8349, var_8347_0))[name = string("op_8352")]; tensor var_8353 = mul(x = var_8352, y = sin)[name = string("op_8353")]; tensor q_79 = add(x = var_8346, y = var_8353)[name = string("q_79")]; string var_8366_pad_type_0 = const()[name = string("op_8366_pad_type_0"), val = string("valid")]; tensor var_8366_strides_0 = const()[name = string("op_8366_strides_0"), val = tensor([1, 1])]; tensor var_8366_pad_0 = const()[name = string("op_8366_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8366_dilations_0 = const()[name = string("op_8366_dilations_0"), val = tensor([1, 1])]; int32 var_8366_groups_0 = const()[name = string("op_8366_groups_0"), val = int32(1)]; tensor var_8366 = conv(dilations = var_8366_dilations_0, groups = var_8366_groups_0, pad = var_8366_pad_0, pad_type = var_8366_pad_type_0, strides = var_8366_strides_0, weight = layers_9_self_attn_k_proj_weight_palettized, x = var_8281_cast_fp16)[name = string("op_8366")]; tensor var_8371 = const()[name = string("op_8371"), val = tensor([1, 1, 512, 1])]; tensor var_8372 = reshape(shape = var_8371, x = var_8366)[name = string("op_8372")]; tensor var_8377 = const()[name = string("op_8377"), val = tensor([0, 1, 3, 2])]; string var_8394_pad_type_0 = const()[name = string("op_8394_pad_type_0"), val = string("valid")]; tensor var_8394_strides_0 = const()[name = string("op_8394_strides_0"), val = tensor([1, 1])]; tensor var_8394_pad_0 = const()[name = string("op_8394_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8394_dilations_0 = const()[name = string("op_8394_dilations_0"), val = tensor([1, 1])]; int32 var_8394_groups_0 = const()[name = string("op_8394_groups_0"), val = int32(1)]; tensor var_8394 = conv(dilations = var_8394_dilations_0, groups = var_8394_groups_0, pad = var_8394_pad_0, pad_type = var_8394_pad_type_0, strides = var_8394_strides_0, weight = layers_9_self_attn_v_proj_weight_palettized, x = var_8281_cast_fp16)[name = string("op_8394")]; tensor var_8399 = const()[name = string("op_8399"), val = tensor([1, 1, 512, 1])]; tensor var_8400 = reshape(shape = var_8399, x = var_8394)[name = string("op_8400")]; tensor var_8405 = const()[name = string("op_8405"), val = tensor([0, 1, 3, 2])]; tensor var_8415 = const()[name = string("op_8415"), val = tensor([1, 1, 512])]; tensor var_8378 = transpose(perm = var_8377, x = var_8372)[name = string("transpose_229")]; tensor x_417 = reshape(shape = var_8415, x = var_8378)[name = string("x_417")]; int32 var_8421 = const()[name = string("op_8421"), val = int32(-1)]; fp16 const_239_promoted_to_fp16 = const()[name = string("const_239_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8427_cast_fp16 = mul(x = x_417, y = const_239_promoted_to_fp16)[name = string("op_8427_cast_fp16")]; bool input_345_interleave_0 = const()[name = string("input_345_interleave_0"), val = bool(false)]; tensor input_345_cast_fp16 = concat(axis = var_8421, interleave = input_345_interleave_0, values = (x_417, var_8427_cast_fp16))[name = string("input_345_cast_fp16")]; tensor normed_401_axes_0 = const()[name = string("normed_401_axes_0"), val = tensor([-1])]; fp16 var_8419_to_fp16 = const()[name = string("op_8419_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_401_cast_fp16 = layer_norm(axes = normed_401_axes_0, epsilon = var_8419_to_fp16, x = input_345_cast_fp16)[name = string("normed_401_cast_fp16")]; tensor var_8432_split_sizes_0 = const()[name = string("op_8432_split_sizes_0"), val = tensor([512, 512])]; int32 var_8432_axis_0 = const()[name = string("op_8432_axis_0"), val = int32(-1)]; tensor var_8432_cast_fp16_0, tensor var_8432_cast_fp16_1 = split(axis = var_8432_axis_0, split_sizes = var_8432_split_sizes_0, x = normed_401_cast_fp16)[name = string("op_8432_cast_fp16")]; tensor const_240_to_fp16 = const()[name = string("const_240_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2266245312)))]; tensor var_8435_cast_fp16 = mul(x = var_8432_cast_fp16_0, y = const_240_to_fp16)[name = string("op_8435_cast_fp16")]; tensor var_8441 = const()[name = string("op_8441"), val = tensor([1, 1, 1, 512])]; tensor q_77 = reshape(shape = var_8441, x = var_8435_cast_fp16)[name = string("q_77")]; fp16 var_8448_promoted_to_fp16 = const()[name = string("op_8448_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_8406 = transpose(perm = var_8405, x = var_8400)[name = string("transpose_228")]; tensor var_8449_cast_fp16 = pow(x = var_8406, y = var_8448_promoted_to_fp16)[name = string("op_8449_cast_fp16")]; tensor var_8454_axes_0 = const()[name = string("op_8454_axes_0"), val = tensor([-1])]; bool var_8454_keep_dims_0 = const()[name = string("op_8454_keep_dims_0"), val = bool(true)]; tensor var_8454_cast_fp16 = reduce_mean(axes = var_8454_axes_0, keep_dims = var_8454_keep_dims_0, x = var_8449_cast_fp16)[name = string("op_8454_cast_fp16")]; fp16 var_8456_to_fp16 = const()[name = string("op_8456_to_fp16"), val = fp16(0x1.1p-20)]; tensor mean_sq_19_cast_fp16 = add(x = var_8454_cast_fp16, y = var_8456_to_fp16)[name = string("mean_sq_19_cast_fp16")]; fp16 var_8463_to_fp16 = const()[name = string("op_8463_to_fp16"), val = fp16(-0x1p-1)]; tensor var_8464_cast_fp16 = pow(x = mean_sq_19_cast_fp16, y = var_8463_to_fp16)[name = string("op_8464_cast_fp16")]; tensor var_8465_cast_fp16 = mul(x = var_8406, y = var_8464_cast_fp16)[name = string("op_8465_cast_fp16")]; tensor var_8471 = mul(x = q_77, y = cos)[name = string("op_8471")]; tensor var_8472_split_sizes_0 = const()[name = string("op_8472_split_sizes_0"), val = tensor([256, 256])]; int32 var_8472_axis_0 = const()[name = string("op_8472_axis_0"), val = int32(-1)]; tensor var_8472_0, tensor var_8472_1 = split(axis = var_8472_axis_0, split_sizes = var_8472_split_sizes_0, x = q_77)[name = string("op_8472")]; fp16 const_241_promoted = const()[name = string("const_241_promoted"), val = fp16(-0x1p+0)]; tensor var_8474 = mul(x = var_8472_1, y = const_241_promoted)[name = string("op_8474")]; int32 var_8476 = const()[name = string("op_8476"), val = int32(-1)]; bool var_8477_interleave_0 = const()[name = string("op_8477_interleave_0"), val = bool(false)]; tensor var_8477 = concat(axis = var_8476, interleave = var_8477_interleave_0, values = (var_8474, var_8472_0))[name = string("op_8477")]; tensor var_8478 = mul(x = var_8477, y = sin)[name = string("op_8478")]; tensor k_23 = add(x = var_8471, y = var_8478)[name = string("k_23")]; tensor var_8483_begin_0 = const()[name = string("op_8483_begin_0"), val = tensor([9, 0, 0, 0])]; tensor var_8483_end_0 = const()[name = string("op_8483_end_0"), val = tensor([10, 1, 512, 512])]; tensor var_8483_end_mask_0 = const()[name = string("op_8483_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8483_squeeze_mask_0 = const()[name = string("op_8483_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_8483_cast_fp16 = slice_by_index(begin = var_8483_begin_0, end = var_8483_end_0, end_mask = var_8483_end_mask_0, squeeze_mask = var_8483_squeeze_mask_0, x = coreml_update_state_47)[name = string("op_8483_cast_fp16")]; tensor K_cache_19_axes_0 = const()[name = string("K_cache_19_axes_0"), val = tensor([0])]; tensor K_cache_19_cast_fp16 = expand_dims(axes = K_cache_19_axes_0, x = var_8483_cast_fp16)[name = string("K_cache_19_cast_fp16")]; tensor var_8488_begin_0 = const()[name = string("op_8488_begin_0"), val = tensor([44, 0, 0, 0])]; tensor var_8488_end_0 = const()[name = string("op_8488_end_0"), val = tensor([45, 1, 512, 512])]; tensor var_8488_end_mask_0 = const()[name = string("op_8488_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_8488_squeeze_mask_0 = const()[name = string("op_8488_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_8488_cast_fp16 = slice_by_index(begin = var_8488_begin_0, end = var_8488_end_0, end_mask = var_8488_end_mask_0, squeeze_mask = var_8488_squeeze_mask_0, x = coreml_update_state_47)[name = string("op_8488_cast_fp16")]; tensor V_cache_19_axes_0 = const()[name = string("V_cache_19_axes_0"), val = tensor([0])]; tensor V_cache_19_cast_fp16 = expand_dims(axes = V_cache_19_axes_0, x = var_8488_cast_fp16)[name = string("V_cache_19_cast_fp16")]; tensor var_8494_cast_fp16 = mul(x = K_cache_19_cast_fp16, y = var_3515_cast_fp16)[name = string("op_8494_cast_fp16")]; tensor var_8495_reps_0 = const()[name = string("op_8495_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_8495 = tile(reps = var_8495_reps_0, x = k_23)[name = string("op_8495")]; tensor var_8496_cast_fp16 = mul(x = var_8495, y = update_mask)[name = string("op_8496_cast_fp16")]; tensor K_new_19_cast_fp16 = add(x = var_8494_cast_fp16, y = var_8496_cast_fp16)[name = string("K_new_19_cast_fp16")]; tensor var_8502_cast_fp16 = mul(x = V_cache_19_cast_fp16, y = var_3515_cast_fp16)[name = string("op_8502_cast_fp16")]; tensor var_8503_reps_0 = const()[name = string("op_8503_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_8503 = tile(reps = var_8503_reps_0, x = var_8465_cast_fp16)[name = string("op_8503")]; tensor var_8504_cast_fp16 = mul(x = var_8503, y = update_mask)[name = string("op_8504_cast_fp16")]; tensor V_new_19_cast_fp16 = add(x = var_8502_cast_fp16, y = var_8504_cast_fp16)[name = string("V_new_19_cast_fp16")]; tensor var_8508_axes_0 = const()[name = string("op_8508_axes_0"), val = tensor([0])]; tensor var_8508_cast_fp16 = squeeze(axes = var_8508_axes_0, x = K_new_19_cast_fp16)[name = string("op_8508_cast_fp16")]; tensor concat_72 = const()[name = string("concat_72"), val = tensor([9, 0, 0, 0])]; tensor concat_73 = const()[name = string("concat_73"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_19_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_19_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_19_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_72, begin_mask = kv_cache_0_internal_tensor_assign_19_begin_mask_0, end = concat_73, end_mask = kv_cache_0_internal_tensor_assign_19_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_19_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_19_stride_0, update = var_8508_cast_fp16, x = coreml_update_state_47)[name = string("kv_cache_0_internal_tensor_assign_19_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_19_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_48_write_state")]; tensor coreml_update_state_48 = read_state(input = kv_cache_0)[name = string("coreml_update_state_48")]; tensor var_8515_axes_0 = const()[name = string("op_8515_axes_0"), val = tensor([0])]; tensor var_8515_cast_fp16 = squeeze(axes = var_8515_axes_0, x = V_new_19_cast_fp16)[name = string("op_8515_cast_fp16")]; tensor concat_74 = const()[name = string("concat_74"), val = tensor([44, 0, 0, 0])]; tensor concat_75 = const()[name = string("concat_75"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_20_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_20_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_20_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_74, begin_mask = kv_cache_0_internal_tensor_assign_20_begin_mask_0, end = concat_75, end_mask = kv_cache_0_internal_tensor_assign_20_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_20_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_20_stride_0, update = var_8515_cast_fp16, x = coreml_update_state_48)[name = string("kv_cache_0_internal_tensor_assign_20_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_20_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_49_write_state")]; tensor coreml_update_state_49 = read_state(input = kv_cache_0)[name = string("coreml_update_state_49")]; tensor transpose_36_perm_0 = const()[name = string("transpose_36_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_18_reps_0 = const()[name = string("tile_18_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_36_cast_fp16 = transpose(perm = transpose_36_perm_0, x = K_new_19_cast_fp16)[name = string("transpose_227")]; tensor tile_18_cast_fp16 = tile(reps = tile_18_reps_0, x = transpose_36_cast_fp16)[name = string("tile_18_cast_fp16")]; tensor concat_76 = const()[name = string("concat_76"), val = tensor([8, 1, 1, 512, 512])]; tensor reshape_36_cast_fp16 = reshape(shape = concat_76, x = tile_18_cast_fp16)[name = string("reshape_36_cast_fp16")]; tensor transpose_37_perm_0 = const()[name = string("transpose_37_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_77 = const()[name = string("concat_77"), val = tensor([-1, 1, 512, 512])]; tensor transpose_37_cast_fp16 = transpose(perm = transpose_37_perm_0, x = reshape_36_cast_fp16)[name = string("transpose_226")]; tensor reshape_37_cast_fp16 = reshape(shape = concat_77, x = transpose_37_cast_fp16)[name = string("reshape_37_cast_fp16")]; tensor transpose_149_perm_0 = const()[name = string("transpose_149_perm_0"), val = tensor([1, 0, -1, -2])]; tensor transpose_38_perm_0 = const()[name = string("transpose_38_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_19_reps_0 = const()[name = string("tile_19_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_38_cast_fp16 = transpose(perm = transpose_38_perm_0, x = V_new_19_cast_fp16)[name = string("transpose_225")]; tensor tile_19_cast_fp16 = tile(reps = tile_19_reps_0, x = transpose_38_cast_fp16)[name = string("tile_19_cast_fp16")]; tensor concat_78 = const()[name = string("concat_78"), val = tensor([8, 1, 1, 512, 512])]; tensor reshape_38_cast_fp16 = reshape(shape = concat_78, x = tile_19_cast_fp16)[name = string("reshape_38_cast_fp16")]; tensor transpose_39_perm_0 = const()[name = string("transpose_39_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_79 = const()[name = string("concat_79"), val = tensor([-1, 1, 512, 512])]; tensor transpose_39_cast_fp16 = transpose(perm = transpose_39_perm_0, x = reshape_38_cast_fp16)[name = string("transpose_224")]; tensor reshape_39_cast_fp16 = reshape(shape = concat_79, x = transpose_39_cast_fp16)[name = string("reshape_39_cast_fp16")]; tensor V_expanded_19_perm_0 = const()[name = string("V_expanded_19_perm_0"), val = tensor([1, 0, -2, -1])]; bool var_8552_transpose_x_0 = const()[name = string("op_8552_transpose_x_0"), val = bool(false)]; bool var_8552_transpose_y_0 = const()[name = string("op_8552_transpose_y_0"), val = bool(false)]; tensor transpose_149_cast_fp16 = transpose(perm = transpose_149_perm_0, x = reshape_37_cast_fp16)[name = string("transpose_223")]; tensor var_8552_cast_fp16 = matmul(transpose_x = var_8552_transpose_x_0, transpose_y = var_8552_transpose_y_0, x = q_79, y = transpose_149_cast_fp16)[name = string("op_8552_cast_fp16")]; tensor attn_weights_57_cast_fp16 = add(x = var_8552_cast_fp16, y = causal_mask)[name = string("attn_weights_57_cast_fp16")]; int32 var_8562 = const()[name = string("op_8562"), val = int32(-1)]; tensor var_8564_cast_fp16 = softmax(axis = var_8562, x = attn_weights_57_cast_fp16)[name = string("op_8564_cast_fp16")]; bool var_8580_transpose_x_0 = const()[name = string("op_8580_transpose_x_0"), val = bool(false)]; bool var_8580_transpose_y_0 = const()[name = string("op_8580_transpose_y_0"), val = bool(false)]; tensor V_expanded_19_cast_fp16 = transpose(perm = V_expanded_19_perm_0, x = reshape_39_cast_fp16)[name = string("transpose_222")]; tensor var_8580_cast_fp16 = matmul(transpose_x = var_8580_transpose_x_0, transpose_y = var_8580_transpose_y_0, x = var_8564_cast_fp16, y = V_expanded_19_cast_fp16)[name = string("op_8580_cast_fp16")]; tensor var_8590 = const()[name = string("op_8590"), val = tensor([0, 2, 1, 3])]; tensor var_8597 = const()[name = string("op_8597"), val = tensor([1, 1, -1])]; tensor var_8591 = transpose(perm = var_8590, x = var_8580_cast_fp16)[name = string("transpose_221")]; tensor attn_output_57 = reshape(shape = var_8597, x = var_8591)[name = string("attn_output_57")]; tensor var_8602 = const()[name = string("op_8602"), val = tensor([0, 2, 1])]; tensor squeeze_9_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2266246400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2269392192))))[name = string("squeeze_9_palettized")]; string var_8618_pad_type_0 = const()[name = string("op_8618_pad_type_0"), val = string("valid")]; int32 var_8618_groups_0 = const()[name = string("op_8618_groups_0"), val = int32(1)]; tensor var_8618_strides_0 = const()[name = string("op_8618_strides_0"), val = tensor([1])]; tensor var_8618_pad_0 = const()[name = string("op_8618_pad_0"), val = tensor([0, 0])]; tensor var_8618_dilations_0 = const()[name = string("op_8618_dilations_0"), val = tensor([1])]; tensor var_8603 = transpose(perm = var_8602, x = attn_output_57)[name = string("transpose_220")]; tensor var_8618 = conv(dilations = var_8618_dilations_0, groups = var_8618_groups_0, pad = var_8618_pad_0, pad_type = var_8618_pad_type_0, strides = var_8618_strides_0, weight = squeeze_9_palettized, x = var_8603)[name = string("op_8618")]; tensor var_8622 = const()[name = string("op_8622"), val = tensor([0, 2, 1])]; int32 var_8628 = const()[name = string("op_8628"), val = int32(-1)]; fp16 const_242_promoted_to_fp16 = const()[name = string("const_242_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_423 = transpose(perm = var_8622, x = var_8618)[name = string("transpose_219")]; tensor var_8634_cast_fp16 = mul(x = x_423, y = const_242_promoted_to_fp16)[name = string("op_8634_cast_fp16")]; bool input_349_interleave_0 = const()[name = string("input_349_interleave_0"), val = bool(false)]; tensor input_349_cast_fp16 = concat(axis = var_8628, interleave = input_349_interleave_0, values = (x_423, var_8634_cast_fp16))[name = string("input_349_cast_fp16")]; tensor normed_405_axes_0 = const()[name = string("normed_405_axes_0"), val = tensor([-1])]; fp16 var_8626_to_fp16 = const()[name = string("op_8626_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_405_cast_fp16 = layer_norm(axes = normed_405_axes_0, epsilon = var_8626_to_fp16, x = input_349_cast_fp16)[name = string("normed_405_cast_fp16")]; tensor var_8639_split_sizes_0 = const()[name = string("op_8639_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_8639_axis_0 = const()[name = string("op_8639_axis_0"), val = int32(-1)]; tensor var_8639_cast_fp16_0, tensor var_8639_cast_fp16_1 = split(axis = var_8639_axis_0, split_sizes = var_8639_split_sizes_0, x = normed_405_cast_fp16)[name = string("op_8639_cast_fp16")]; tensor const_243_to_fp16 = const()[name = string("const_243_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2269393792)))]; tensor var_8642_cast_fp16 = mul(x = var_8639_cast_fp16_0, y = const_243_to_fp16)[name = string("op_8642_cast_fp16")]; tensor x_427_cast_fp16 = add(x = x_409_cast_fp16, y = var_8642_cast_fp16)[name = string("x_427_cast_fp16")]; int32 var_8649 = const()[name = string("op_8649"), val = int32(-1)]; fp16 const_244_promoted_to_fp16 = const()[name = string("const_244_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8655_cast_fp16 = mul(x = x_427_cast_fp16, y = const_244_promoted_to_fp16)[name = string("op_8655_cast_fp16")]; bool input_351_interleave_0 = const()[name = string("input_351_interleave_0"), val = bool(false)]; tensor input_351_cast_fp16 = concat(axis = var_8649, interleave = input_351_interleave_0, values = (x_427_cast_fp16, var_8655_cast_fp16))[name = string("input_351_cast_fp16")]; tensor normed_409_axes_0 = const()[name = string("normed_409_axes_0"), val = tensor([-1])]; fp16 var_8647_to_fp16 = const()[name = string("op_8647_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_409_cast_fp16 = layer_norm(axes = normed_409_axes_0, epsilon = var_8647_to_fp16, x = input_351_cast_fp16)[name = string("normed_409_cast_fp16")]; tensor var_8660_split_sizes_0 = const()[name = string("op_8660_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_8660_axis_0 = const()[name = string("op_8660_axis_0"), val = int32(-1)]; tensor var_8660_cast_fp16_0, tensor var_8660_cast_fp16_1 = split(axis = var_8660_axis_0, split_sizes = var_8660_split_sizes_0, x = normed_409_cast_fp16)[name = string("op_8660_cast_fp16")]; tensor const_245_to_fp16 = const()[name = string("const_245_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2269396928)))]; tensor var_8663_cast_fp16 = mul(x = var_8660_cast_fp16_0, y = const_245_to_fp16)[name = string("op_8663_cast_fp16")]; tensor var_8676 = const()[name = string("op_8676"), val = tensor([0, 2, 1])]; tensor input_353_axes_0 = const()[name = string("input_353_axes_0"), val = tensor([2])]; tensor var_8677 = transpose(perm = var_8676, x = var_8663_cast_fp16)[name = string("transpose_218")]; tensor input_353 = expand_dims(axes = input_353_axes_0, x = var_8677)[name = string("input_353")]; string gate_37_pad_type_0 = const()[name = string("gate_37_pad_type_0"), val = string("valid")]; tensor gate_37_strides_0 = const()[name = string("gate_37_strides_0"), val = tensor([1, 1])]; tensor gate_37_pad_0 = const()[name = string("gate_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_37_dilations_0 = const()[name = string("gate_37_dilations_0"), val = tensor([1, 1])]; int32 gate_37_groups_0 = const()[name = string("gate_37_groups_0"), val = int32(1)]; tensor gate_37 = conv(dilations = gate_37_dilations_0, groups = gate_37_groups_0, pad = gate_37_pad_0, pad_type = gate_37_pad_type_0, strides = gate_37_strides_0, weight = layers_9_mlp_gate_proj_weight_palettized, x = input_353)[name = string("gate_37")]; string up_19_pad_type_0 = const()[name = string("up_19_pad_type_0"), val = string("valid")]; tensor up_19_strides_0 = const()[name = string("up_19_strides_0"), val = tensor([1, 1])]; tensor up_19_pad_0 = const()[name = string("up_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_19_dilations_0 = const()[name = string("up_19_dilations_0"), val = tensor([1, 1])]; int32 up_19_groups_0 = const()[name = string("up_19_groups_0"), val = int32(1)]; tensor up_19 = conv(dilations = up_19_dilations_0, groups = up_19_groups_0, pad = up_19_pad_0, pad_type = up_19_pad_type_0, strides = up_19_strides_0, weight = layers_9_mlp_up_proj_weight_palettized, x = input_353)[name = string("up_19")]; string gate_39_mode_0 = const()[name = string("gate_39_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_39 = gelu(mode = gate_39_mode_0, x = gate_37)[name = string("gate_39")]; tensor input_355 = mul(x = gate_39, y = up_19)[name = string("input_355")]; string mlp_out_19_pad_type_0 = const()[name = string("mlp_out_19_pad_type_0"), val = string("valid")]; tensor mlp_out_19_strides_0 = const()[name = string("mlp_out_19_strides_0"), val = tensor([1, 1])]; tensor mlp_out_19_pad_0 = const()[name = string("mlp_out_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_19_dilations_0 = const()[name = string("mlp_out_19_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_19_groups_0 = const()[name = string("mlp_out_19_groups_0"), val = int32(1)]; tensor mlp_out_19 = conv(dilations = mlp_out_19_dilations_0, groups = mlp_out_19_groups_0, pad = mlp_out_19_pad_0, pad_type = mlp_out_19_pad_type_0, strides = mlp_out_19_strides_0, weight = layers_9_mlp_down_proj_weight_palettized, x = input_355)[name = string("mlp_out_19")]; tensor var_8717_axes_0 = const()[name = string("op_8717_axes_0"), val = tensor([2])]; tensor var_8717 = squeeze(axes = var_8717_axes_0, x = mlp_out_19)[name = string("op_8717")]; tensor var_8721 = const()[name = string("op_8721"), val = tensor([0, 2, 1])]; int32 var_8727 = const()[name = string("op_8727"), val = int32(-1)]; fp16 const_246_promoted_to_fp16 = const()[name = string("const_246_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_431 = transpose(perm = var_8721, x = var_8717)[name = string("transpose_217")]; tensor var_8733_cast_fp16 = mul(x = x_431, y = const_246_promoted_to_fp16)[name = string("op_8733_cast_fp16")]; bool input_357_interleave_0 = const()[name = string("input_357_interleave_0"), val = bool(false)]; tensor input_357_cast_fp16 = concat(axis = var_8727, interleave = input_357_interleave_0, values = (x_431, var_8733_cast_fp16))[name = string("input_357_cast_fp16")]; tensor normed_413_axes_0 = const()[name = string("normed_413_axes_0"), val = tensor([-1])]; fp16 var_8725_to_fp16 = const()[name = string("op_8725_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_413_cast_fp16 = layer_norm(axes = normed_413_axes_0, epsilon = var_8725_to_fp16, x = input_357_cast_fp16)[name = string("normed_413_cast_fp16")]; tensor var_8738_split_sizes_0 = const()[name = string("op_8738_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_8738_axis_0 = const()[name = string("op_8738_axis_0"), val = int32(-1)]; tensor var_8738_cast_fp16_0, tensor var_8738_cast_fp16_1 = split(axis = var_8738_axis_0, split_sizes = var_8738_split_sizes_0, x = normed_413_cast_fp16)[name = string("op_8738_cast_fp16")]; tensor const_247_to_fp16 = const()[name = string("const_247_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2269400064)))]; tensor var_8741_cast_fp16 = mul(x = var_8738_cast_fp16_0, y = const_247_to_fp16)[name = string("op_8741_cast_fp16")]; tensor hidden_states_121_cast_fp16 = add(x = x_427_cast_fp16, y = var_8741_cast_fp16)[name = string("hidden_states_121_cast_fp16")]; tensor per_layer_slice_19_begin_0 = const()[name = string("per_layer_slice_19_begin_0"), val = tensor([0, 0, 2304])]; tensor per_layer_slice_19_end_0 = const()[name = string("per_layer_slice_19_end_0"), val = tensor([1, 1, 2560])]; tensor per_layer_slice_19_end_mask_0 = const()[name = string("per_layer_slice_19_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_19 = slice_by_index(begin = per_layer_slice_19_begin_0, end = per_layer_slice_19_end_0, end_mask = per_layer_slice_19_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_19")]; tensor gated_37 = linear(bias = linear_1_bias_0, weight = layers_9_per_layer_input_gate_weight_palettized, x = hidden_states_121_cast_fp16)[name = string("linear_19")]; string gated_39_mode_0 = const()[name = string("gated_39_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_39 = gelu(mode = gated_39_mode_0, x = gated_37)[name = string("gated_39")]; tensor input_361 = mul(x = gated_39, y = per_layer_slice_19)[name = string("input_361")]; tensor x_435 = linear(bias = linear_2_bias_0, weight = layers_9_per_layer_projection_weight_palettized, x = input_361)[name = string("linear_20")]; int32 var_8778 = const()[name = string("op_8778"), val = int32(-1)]; fp16 const_248_promoted_to_fp16 = const()[name = string("const_248_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8784_cast_fp16 = mul(x = x_435, y = const_248_promoted_to_fp16)[name = string("op_8784_cast_fp16")]; bool input_363_interleave_0 = const()[name = string("input_363_interleave_0"), val = bool(false)]; tensor input_363_cast_fp16 = concat(axis = var_8778, interleave = input_363_interleave_0, values = (x_435, var_8784_cast_fp16))[name = string("input_363_cast_fp16")]; tensor normed_417_axes_0 = const()[name = string("normed_417_axes_0"), val = tensor([-1])]; fp16 var_8776_to_fp16 = const()[name = string("op_8776_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_417_cast_fp16 = layer_norm(axes = normed_417_axes_0, epsilon = var_8776_to_fp16, x = input_363_cast_fp16)[name = string("normed_417_cast_fp16")]; tensor var_8789_split_sizes_0 = const()[name = string("op_8789_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_8789_axis_0 = const()[name = string("op_8789_axis_0"), val = int32(-1)]; tensor var_8789_cast_fp16_0, tensor var_8789_cast_fp16_1 = split(axis = var_8789_axis_0, split_sizes = var_8789_split_sizes_0, x = normed_417_cast_fp16)[name = string("op_8789_cast_fp16")]; tensor const_249_to_fp16 = const()[name = string("const_249_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2269403200)))]; tensor var_8792_cast_fp16 = mul(x = var_8789_cast_fp16_0, y = const_249_to_fp16)[name = string("op_8792_cast_fp16")]; tensor hidden_states_125_cast_fp16 = add(x = hidden_states_121_cast_fp16, y = var_8792_cast_fp16)[name = string("hidden_states_125_cast_fp16")]; tensor layers_9_layer_scalar_to_fp16 = const()[name = string("layers_9_layer_scalar_to_fp16"), val = tensor([0x1.dcp-2])]; tensor x_439_cast_fp16 = mul(x = hidden_states_125_cast_fp16, y = layers_9_layer_scalar_to_fp16)[name = string("x_439_cast_fp16")]; int32 var_8800 = const()[name = string("op_8800"), val = int32(-1)]; fp16 const_250_promoted_to_fp16 = const()[name = string("const_250_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8806_cast_fp16 = mul(x = x_439_cast_fp16, y = const_250_promoted_to_fp16)[name = string("op_8806_cast_fp16")]; bool input_365_interleave_0 = const()[name = string("input_365_interleave_0"), val = bool(false)]; tensor input_365_cast_fp16 = concat(axis = var_8800, interleave = input_365_interleave_0, values = (x_439_cast_fp16, var_8806_cast_fp16))[name = string("input_365_cast_fp16")]; tensor normed_421_axes_0 = const()[name = string("normed_421_axes_0"), val = tensor([-1])]; fp16 var_8798_to_fp16 = const()[name = string("op_8798_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_421_cast_fp16 = layer_norm(axes = normed_421_axes_0, epsilon = var_8798_to_fp16, x = input_365_cast_fp16)[name = string("normed_421_cast_fp16")]; tensor var_8811_split_sizes_0 = const()[name = string("op_8811_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_8811_axis_0 = const()[name = string("op_8811_axis_0"), val = int32(-1)]; tensor var_8811_cast_fp16_0, tensor var_8811_cast_fp16_1 = split(axis = var_8811_axis_0, split_sizes = var_8811_split_sizes_0, x = normed_421_cast_fp16)[name = string("op_8811_cast_fp16")]; tensor const_251_to_fp16 = const()[name = string("const_251_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2269406336)))]; tensor var_8814_cast_fp16 = mul(x = var_8811_cast_fp16_0, y = const_251_to_fp16)[name = string("op_8814_cast_fp16")]; tensor var_8822 = const()[name = string("op_8822"), val = tensor([0, 2, 1])]; tensor var_8825_axes_0 = const()[name = string("op_8825_axes_0"), val = tensor([2])]; tensor var_8823_cast_fp16 = transpose(perm = var_8822, x = var_8814_cast_fp16)[name = string("transpose_216")]; tensor var_8825_cast_fp16 = expand_dims(axes = var_8825_axes_0, x = var_8823_cast_fp16)[name = string("op_8825_cast_fp16")]; string var_8841_pad_type_0 = const()[name = string("op_8841_pad_type_0"), val = string("valid")]; tensor var_8841_strides_0 = const()[name = string("op_8841_strides_0"), val = tensor([1, 1])]; tensor var_8841_pad_0 = const()[name = string("op_8841_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8841_dilations_0 = const()[name = string("op_8841_dilations_0"), val = tensor([1, 1])]; int32 var_8841_groups_0 = const()[name = string("op_8841_groups_0"), val = int32(1)]; tensor var_8841 = conv(dilations = var_8841_dilations_0, groups = var_8841_groups_0, pad = var_8841_pad_0, pad_type = var_8841_pad_type_0, strides = var_8841_strides_0, weight = layers_10_self_attn_q_proj_weight_palettized, x = var_8825_cast_fp16)[name = string("op_8841")]; tensor var_8846 = const()[name = string("op_8846"), val = tensor([1, 8, 256, 1])]; tensor var_8847 = reshape(shape = var_8846, x = var_8841)[name = string("op_8847")]; tensor var_8852 = const()[name = string("op_8852"), val = tensor([0, 1, 3, 2])]; tensor var_8862 = const()[name = string("op_8862"), val = tensor([1, 8, 256])]; tensor var_8853 = transpose(perm = var_8852, x = var_8847)[name = string("transpose_215")]; tensor x_443 = reshape(shape = var_8862, x = var_8853)[name = string("x_443")]; int32 var_8868 = const()[name = string("op_8868"), val = int32(-1)]; fp16 const_252_promoted_to_fp16 = const()[name = string("const_252_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8874_cast_fp16 = mul(x = x_443, y = const_252_promoted_to_fp16)[name = string("op_8874_cast_fp16")]; bool input_369_interleave_0 = const()[name = string("input_369_interleave_0"), val = bool(false)]; tensor input_369_cast_fp16 = concat(axis = var_8868, interleave = input_369_interleave_0, values = (x_443, var_8874_cast_fp16))[name = string("input_369_cast_fp16")]; tensor normed_425_axes_0 = const()[name = string("normed_425_axes_0"), val = tensor([-1])]; fp16 var_8866_to_fp16 = const()[name = string("op_8866_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_425_cast_fp16 = layer_norm(axes = normed_425_axes_0, epsilon = var_8866_to_fp16, x = input_369_cast_fp16)[name = string("normed_425_cast_fp16")]; tensor var_8879_split_sizes_0 = const()[name = string("op_8879_split_sizes_0"), val = tensor([256, 256])]; int32 var_8879_axis_0 = const()[name = string("op_8879_axis_0"), val = int32(-1)]; tensor var_8879_cast_fp16_0, tensor var_8879_cast_fp16_1 = split(axis = var_8879_axis_0, split_sizes = var_8879_split_sizes_0, x = normed_425_cast_fp16)[name = string("op_8879_cast_fp16")]; tensor var_8882_cast_fp16 = mul(x = var_8879_cast_fp16_0, y = const_131_to_fp16)[name = string("op_8882_cast_fp16")]; tensor var_8888 = const()[name = string("op_8888"), val = tensor([1, 8, 1, 256])]; tensor q_83 = reshape(shape = var_8888, x = var_8882_cast_fp16)[name = string("q_83")]; tensor var_8890 = mul(x = q_83, y = cos_1)[name = string("op_8890")]; tensor var_8891_split_sizes_0 = const()[name = string("op_8891_split_sizes_0"), val = tensor([128, 128])]; int32 var_8891_axis_0 = const()[name = string("op_8891_axis_0"), val = int32(-1)]; tensor var_8891_0, tensor var_8891_1 = split(axis = var_8891_axis_0, split_sizes = var_8891_split_sizes_0, x = q_83)[name = string("op_8891")]; fp16 const_254_promoted = const()[name = string("const_254_promoted"), val = fp16(-0x1p+0)]; tensor var_8893 = mul(x = var_8891_1, y = const_254_promoted)[name = string("op_8893")]; int32 var_8895 = const()[name = string("op_8895"), val = int32(-1)]; bool var_8896_interleave_0 = const()[name = string("op_8896_interleave_0"), val = bool(false)]; tensor var_8896 = concat(axis = var_8895, interleave = var_8896_interleave_0, values = (var_8893, var_8891_0))[name = string("op_8896")]; tensor var_8897 = mul(x = var_8896, y = sin_1)[name = string("op_8897")]; tensor q_87 = add(x = var_8890, y = var_8897)[name = string("q_87")]; string var_8910_pad_type_0 = const()[name = string("op_8910_pad_type_0"), val = string("valid")]; tensor var_8910_strides_0 = const()[name = string("op_8910_strides_0"), val = tensor([1, 1])]; tensor var_8910_pad_0 = const()[name = string("op_8910_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8910_dilations_0 = const()[name = string("op_8910_dilations_0"), val = tensor([1, 1])]; int32 var_8910_groups_0 = const()[name = string("op_8910_groups_0"), val = int32(1)]; tensor var_8910 = conv(dilations = var_8910_dilations_0, groups = var_8910_groups_0, pad = var_8910_pad_0, pad_type = var_8910_pad_type_0, strides = var_8910_strides_0, weight = layers_10_self_attn_k_proj_weight_palettized, x = var_8825_cast_fp16)[name = string("op_8910")]; tensor var_8915 = const()[name = string("op_8915"), val = tensor([1, 1, 256, 1])]; tensor var_8916 = reshape(shape = var_8915, x = var_8910)[name = string("op_8916")]; tensor var_8921 = const()[name = string("op_8921"), val = tensor([0, 1, 3, 2])]; string var_8938_pad_type_0 = const()[name = string("op_8938_pad_type_0"), val = string("valid")]; tensor var_8938_strides_0 = const()[name = string("op_8938_strides_0"), val = tensor([1, 1])]; tensor var_8938_pad_0 = const()[name = string("op_8938_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8938_dilations_0 = const()[name = string("op_8938_dilations_0"), val = tensor([1, 1])]; int32 var_8938_groups_0 = const()[name = string("op_8938_groups_0"), val = int32(1)]; tensor var_8938 = conv(dilations = var_8938_dilations_0, groups = var_8938_groups_0, pad = var_8938_pad_0, pad_type = var_8938_pad_type_0, strides = var_8938_strides_0, weight = layers_10_self_attn_v_proj_weight_palettized, x = var_8825_cast_fp16)[name = string("op_8938")]; tensor var_8943 = const()[name = string("op_8943"), val = tensor([1, 1, 256, 1])]; tensor var_8944 = reshape(shape = var_8943, x = var_8938)[name = string("op_8944")]; tensor var_8949 = const()[name = string("op_8949"), val = tensor([0, 1, 3, 2])]; tensor var_8959 = const()[name = string("op_8959"), val = tensor([1, 1, 256])]; tensor var_8922 = transpose(perm = var_8921, x = var_8916)[name = string("transpose_214")]; tensor x_447 = reshape(shape = var_8959, x = var_8922)[name = string("x_447")]; int32 var_8965 = const()[name = string("op_8965"), val = int32(-1)]; fp16 const_255_promoted_to_fp16 = const()[name = string("const_255_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8971_cast_fp16 = mul(x = x_447, y = const_255_promoted_to_fp16)[name = string("op_8971_cast_fp16")]; bool input_371_interleave_0 = const()[name = string("input_371_interleave_0"), val = bool(false)]; tensor input_371_cast_fp16 = concat(axis = var_8965, interleave = input_371_interleave_0, values = (x_447, var_8971_cast_fp16))[name = string("input_371_cast_fp16")]; tensor normed_429_axes_0 = const()[name = string("normed_429_axes_0"), val = tensor([-1])]; fp16 var_8963_to_fp16 = const()[name = string("op_8963_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_429_cast_fp16 = layer_norm(axes = normed_429_axes_0, epsilon = var_8963_to_fp16, x = input_371_cast_fp16)[name = string("normed_429_cast_fp16")]; tensor var_8976_split_sizes_0 = const()[name = string("op_8976_split_sizes_0"), val = tensor([256, 256])]; int32 var_8976_axis_0 = const()[name = string("op_8976_axis_0"), val = int32(-1)]; tensor var_8976_cast_fp16_0, tensor var_8976_cast_fp16_1 = split(axis = var_8976_axis_0, split_sizes = var_8976_split_sizes_0, x = normed_429_cast_fp16)[name = string("op_8976_cast_fp16")]; tensor const_256_to_fp16 = const()[name = string("const_256_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2269409472)))]; tensor var_8979_cast_fp16 = mul(x = var_8976_cast_fp16_0, y = const_256_to_fp16)[name = string("op_8979_cast_fp16")]; tensor var_8985 = const()[name = string("op_8985"), val = tensor([1, 1, 1, 256])]; tensor q_85 = reshape(shape = var_8985, x = var_8979_cast_fp16)[name = string("q_85")]; fp16 var_8992_promoted_to_fp16 = const()[name = string("op_8992_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_8950 = transpose(perm = var_8949, x = var_8944)[name = string("transpose_213")]; tensor var_8993_cast_fp16 = pow(x = var_8950, y = var_8992_promoted_to_fp16)[name = string("op_8993_cast_fp16")]; tensor var_8998_axes_0 = const()[name = string("op_8998_axes_0"), val = tensor([-1])]; bool var_8998_keep_dims_0 = const()[name = string("op_8998_keep_dims_0"), val = bool(true)]; tensor var_8998_cast_fp16 = reduce_mean(axes = var_8998_axes_0, keep_dims = var_8998_keep_dims_0, x = var_8993_cast_fp16)[name = string("op_8998_cast_fp16")]; fp16 var_9000_to_fp16 = const()[name = string("op_9000_to_fp16"), val = fp16(0x1.1p-20)]; tensor mean_sq_21_cast_fp16 = add(x = var_8998_cast_fp16, y = var_9000_to_fp16)[name = string("mean_sq_21_cast_fp16")]; fp16 var_9007_to_fp16 = const()[name = string("op_9007_to_fp16"), val = fp16(-0x1p-1)]; tensor var_9008_cast_fp16 = pow(x = mean_sq_21_cast_fp16, y = var_9007_to_fp16)[name = string("op_9008_cast_fp16")]; tensor var_9009_cast_fp16 = mul(x = var_8950, y = var_9008_cast_fp16)[name = string("op_9009_cast_fp16")]; tensor var_9015 = mul(x = q_85, y = cos_1)[name = string("op_9015")]; tensor var_9016_split_sizes_0 = const()[name = string("op_9016_split_sizes_0"), val = tensor([128, 128])]; int32 var_9016_axis_0 = const()[name = string("op_9016_axis_0"), val = int32(-1)]; tensor var_9016_0, tensor var_9016_1 = split(axis = var_9016_axis_0, split_sizes = var_9016_split_sizes_0, x = q_85)[name = string("op_9016")]; fp16 const_257_promoted = const()[name = string("const_257_promoted"), val = fp16(-0x1p+0)]; tensor var_9018 = mul(x = var_9016_1, y = const_257_promoted)[name = string("op_9018")]; int32 var_9020 = const()[name = string("op_9020"), val = int32(-1)]; bool var_9021_interleave_0 = const()[name = string("op_9021_interleave_0"), val = bool(false)]; tensor var_9021 = concat(axis = var_9020, interleave = var_9021_interleave_0, values = (var_9018, var_9016_0))[name = string("op_9021")]; tensor var_9022 = mul(x = var_9021, y = sin_1)[name = string("op_9022")]; tensor input_373 = add(x = var_9015, y = var_9022)[name = string("input_373")]; tensor var_9027_begin_0 = const()[name = string("op_9027_begin_0"), val = tensor([10, 0, 0, 0])]; tensor var_9027_end_0 = const()[name = string("op_9027_end_0"), val = tensor([11, 1, 512, 512])]; tensor var_9027_end_mask_0 = const()[name = string("op_9027_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_9027_squeeze_mask_0 = const()[name = string("op_9027_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_9027_cast_fp16 = slice_by_index(begin = var_9027_begin_0, end = var_9027_end_0, end_mask = var_9027_end_mask_0, squeeze_mask = var_9027_squeeze_mask_0, x = coreml_update_state_49)[name = string("op_9027_cast_fp16")]; tensor K_cache_21_axes_0 = const()[name = string("K_cache_21_axes_0"), val = tensor([0])]; tensor K_cache_21_cast_fp16 = expand_dims(axes = K_cache_21_axes_0, x = var_9027_cast_fp16)[name = string("K_cache_21_cast_fp16")]; tensor var_9032_begin_0 = const()[name = string("op_9032_begin_0"), val = tensor([45, 0, 0, 0])]; tensor var_9032_end_0 = const()[name = string("op_9032_end_0"), val = tensor([46, 1, 512, 512])]; tensor var_9032_end_mask_0 = const()[name = string("op_9032_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_9032_squeeze_mask_0 = const()[name = string("op_9032_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_9032_cast_fp16 = slice_by_index(begin = var_9032_begin_0, end = var_9032_end_0, end_mask = var_9032_end_mask_0, squeeze_mask = var_9032_squeeze_mask_0, x = coreml_update_state_49)[name = string("op_9032_cast_fp16")]; tensor V_cache_21_axes_0 = const()[name = string("V_cache_21_axes_0"), val = tensor([0])]; tensor V_cache_21_cast_fp16 = expand_dims(axes = V_cache_21_axes_0, x = var_9032_cast_fp16)[name = string("V_cache_21_cast_fp16")]; tensor k_padded_17_pad_0 = const()[name = string("k_padded_17_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; string k_padded_17_mode_0 = const()[name = string("k_padded_17_mode_0"), val = string("constant")]; fp16 const_258_to_fp16 = const()[name = string("const_258_to_fp16"), val = fp16(0x0p+0)]; tensor k_padded_17_cast_fp16 = pad(constant_val = const_258_to_fp16, mode = k_padded_17_mode_0, pad = k_padded_17_pad_0, x = input_373)[name = string("k_padded_17_cast_fp16")]; tensor v_padded_17_pad_0 = const()[name = string("v_padded_17_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; string v_padded_17_mode_0 = const()[name = string("v_padded_17_mode_0"), val = string("constant")]; fp16 const_259_to_fp16 = const()[name = string("const_259_to_fp16"), val = fp16(0x0p+0)]; tensor v_padded_17_cast_fp16 = pad(constant_val = const_259_to_fp16, mode = v_padded_17_mode_0, pad = v_padded_17_pad_0, x = var_9009_cast_fp16)[name = string("v_padded_17_cast_fp16")]; tensor var_9050_cast_fp16 = mul(x = K_cache_21_cast_fp16, y = var_3515_cast_fp16)[name = string("op_9050_cast_fp16")]; tensor var_9051_reps_0 = const()[name = string("op_9051_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_9051_cast_fp16 = tile(reps = var_9051_reps_0, x = k_padded_17_cast_fp16)[name = string("op_9051_cast_fp16")]; tensor var_9052_cast_fp16 = mul(x = var_9051_cast_fp16, y = update_mask)[name = string("op_9052_cast_fp16")]; tensor K_new_21_cast_fp16 = add(x = var_9050_cast_fp16, y = var_9052_cast_fp16)[name = string("K_new_21_cast_fp16")]; tensor var_9058_cast_fp16 = mul(x = V_cache_21_cast_fp16, y = var_3515_cast_fp16)[name = string("op_9058_cast_fp16")]; tensor var_9059_reps_0 = const()[name = string("op_9059_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_9059_cast_fp16 = tile(reps = var_9059_reps_0, x = v_padded_17_cast_fp16)[name = string("op_9059_cast_fp16")]; tensor var_9060_cast_fp16 = mul(x = var_9059_cast_fp16, y = update_mask)[name = string("op_9060_cast_fp16")]; tensor V_new_21_cast_fp16 = add(x = var_9058_cast_fp16, y = var_9060_cast_fp16)[name = string("V_new_21_cast_fp16")]; tensor var_9064_axes_0 = const()[name = string("op_9064_axes_0"), val = tensor([0])]; tensor var_9064_cast_fp16 = squeeze(axes = var_9064_axes_0, x = K_new_21_cast_fp16)[name = string("op_9064_cast_fp16")]; tensor concat_80 = const()[name = string("concat_80"), val = tensor([10, 0, 0, 0])]; tensor concat_81 = const()[name = string("concat_81"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_21_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_21_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_21_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_80, begin_mask = kv_cache_0_internal_tensor_assign_21_begin_mask_0, end = concat_81, end_mask = kv_cache_0_internal_tensor_assign_21_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_21_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_21_stride_0, update = var_9064_cast_fp16, x = coreml_update_state_49)[name = string("kv_cache_0_internal_tensor_assign_21_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_21_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_50_write_state")]; tensor coreml_update_state_50 = read_state(input = kv_cache_0)[name = string("coreml_update_state_50")]; tensor var_9071_axes_0 = const()[name = string("op_9071_axes_0"), val = tensor([0])]; tensor var_9071_cast_fp16 = squeeze(axes = var_9071_axes_0, x = V_new_21_cast_fp16)[name = string("op_9071_cast_fp16")]; tensor concat_82 = const()[name = string("concat_82"), val = tensor([45, 0, 0, 0])]; tensor concat_83 = const()[name = string("concat_83"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_22_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_22_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_22_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_82, begin_mask = kv_cache_0_internal_tensor_assign_22_begin_mask_0, end = concat_83, end_mask = kv_cache_0_internal_tensor_assign_22_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_22_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_22_stride_0, update = var_9071_cast_fp16, x = coreml_update_state_50)[name = string("kv_cache_0_internal_tensor_assign_22_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_22_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_51_write_state")]; tensor coreml_update_state_51 = read_state(input = kv_cache_0)[name = string("coreml_update_state_51")]; tensor K_for_attn_21_begin_0 = const()[name = string("K_for_attn_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor K_for_attn_21_end_0 = const()[name = string("K_for_attn_21_end_0"), val = tensor([1, 1, 512, 256])]; tensor K_for_attn_21_end_mask_0 = const()[name = string("K_for_attn_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor K_for_attn_21_cast_fp16 = slice_by_index(begin = K_for_attn_21_begin_0, end = K_for_attn_21_end_0, end_mask = K_for_attn_21_end_mask_0, x = K_new_21_cast_fp16)[name = string("K_for_attn_21_cast_fp16")]; tensor V_for_attn_21_begin_0 = const()[name = string("V_for_attn_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor V_for_attn_21_end_0 = const()[name = string("V_for_attn_21_end_0"), val = tensor([1, 1, 512, 256])]; tensor V_for_attn_21_end_mask_0 = const()[name = string("V_for_attn_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor V_for_attn_21_cast_fp16 = slice_by_index(begin = V_for_attn_21_begin_0, end = V_for_attn_21_end_0, end_mask = V_for_attn_21_end_mask_0, x = V_new_21_cast_fp16)[name = string("V_for_attn_21_cast_fp16")]; tensor transpose_40_perm_0 = const()[name = string("transpose_40_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_20_reps_0 = const()[name = string("tile_20_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_40_cast_fp16 = transpose(perm = transpose_40_perm_0, x = K_for_attn_21_cast_fp16)[name = string("transpose_212")]; tensor tile_20_cast_fp16 = tile(reps = tile_20_reps_0, x = transpose_40_cast_fp16)[name = string("tile_20_cast_fp16")]; tensor concat_84 = const()[name = string("concat_84"), val = tensor([8, 1, 1, 512, 256])]; tensor reshape_40_cast_fp16 = reshape(shape = concat_84, x = tile_20_cast_fp16)[name = string("reshape_40_cast_fp16")]; tensor transpose_41_perm_0 = const()[name = string("transpose_41_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_85 = const()[name = string("concat_85"), val = tensor([-1, 1, 512, 256])]; tensor transpose_41_cast_fp16 = transpose(perm = transpose_41_perm_0, x = reshape_40_cast_fp16)[name = string("transpose_211")]; tensor reshape_41_cast_fp16 = reshape(shape = concat_85, x = transpose_41_cast_fp16)[name = string("reshape_41_cast_fp16")]; tensor transpose_150_perm_0 = const()[name = string("transpose_150_perm_0"), val = tensor([1, 0, -1, -2])]; tensor transpose_42_perm_0 = const()[name = string("transpose_42_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_21_reps_0 = const()[name = string("tile_21_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_42_cast_fp16 = transpose(perm = transpose_42_perm_0, x = V_for_attn_21_cast_fp16)[name = string("transpose_210")]; tensor tile_21_cast_fp16 = tile(reps = tile_21_reps_0, x = transpose_42_cast_fp16)[name = string("tile_21_cast_fp16")]; tensor concat_86 = const()[name = string("concat_86"), val = tensor([8, 1, 1, 512, 256])]; tensor reshape_42_cast_fp16 = reshape(shape = concat_86, x = tile_21_cast_fp16)[name = string("reshape_42_cast_fp16")]; tensor transpose_43_perm_0 = const()[name = string("transpose_43_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_87 = const()[name = string("concat_87"), val = tensor([-1, 1, 512, 256])]; tensor transpose_43_cast_fp16 = transpose(perm = transpose_43_perm_0, x = reshape_42_cast_fp16)[name = string("transpose_209")]; tensor reshape_43_cast_fp16 = reshape(shape = concat_87, x = transpose_43_cast_fp16)[name = string("reshape_43_cast_fp16")]; tensor V_expanded_21_perm_0 = const()[name = string("V_expanded_21_perm_0"), val = tensor([1, 0, -2, -1])]; bool var_9108_transpose_x_0 = const()[name = string("op_9108_transpose_x_0"), val = bool(false)]; bool var_9108_transpose_y_0 = const()[name = string("op_9108_transpose_y_0"), val = bool(false)]; tensor transpose_150_cast_fp16 = transpose(perm = transpose_150_perm_0, x = reshape_41_cast_fp16)[name = string("transpose_208")]; tensor var_9108_cast_fp16 = matmul(transpose_x = var_9108_transpose_x_0, transpose_y = var_9108_transpose_y_0, x = q_87, y = transpose_150_cast_fp16)[name = string("op_9108_cast_fp16")]; tensor attn_weights_63_cast_fp16 = add(x = var_9108_cast_fp16, y = causal_mask)[name = string("attn_weights_63_cast_fp16")]; int32 var_9118 = const()[name = string("op_9118"), val = int32(-1)]; tensor var_9120_cast_fp16 = softmax(axis = var_9118, x = attn_weights_63_cast_fp16)[name = string("op_9120_cast_fp16")]; bool var_9136_transpose_x_0 = const()[name = string("op_9136_transpose_x_0"), val = bool(false)]; bool var_9136_transpose_y_0 = const()[name = string("op_9136_transpose_y_0"), val = bool(false)]; tensor V_expanded_21_cast_fp16 = transpose(perm = V_expanded_21_perm_0, x = reshape_43_cast_fp16)[name = string("transpose_207")]; tensor var_9136_cast_fp16 = matmul(transpose_x = var_9136_transpose_x_0, transpose_y = var_9136_transpose_y_0, x = var_9120_cast_fp16, y = V_expanded_21_cast_fp16)[name = string("op_9136_cast_fp16")]; tensor var_9146 = const()[name = string("op_9146"), val = tensor([0, 2, 1, 3])]; tensor var_9153 = const()[name = string("op_9153"), val = tensor([1, 1, -1])]; tensor var_9147 = transpose(perm = var_9146, x = var_9136_cast_fp16)[name = string("transpose_206")]; tensor attn_output_63 = reshape(shape = var_9153, x = var_9147)[name = string("attn_output_63")]; tensor var_9158 = const()[name = string("op_9158"), val = tensor([0, 2, 1])]; tensor squeeze_10_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2269410048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2270982976))))[name = string("squeeze_10_palettized")]; string var_9174_pad_type_0 = const()[name = string("op_9174_pad_type_0"), val = string("valid")]; int32 var_9174_groups_0 = const()[name = string("op_9174_groups_0"), val = int32(1)]; tensor var_9174_strides_0 = const()[name = string("op_9174_strides_0"), val = tensor([1])]; tensor var_9174_pad_0 = const()[name = string("op_9174_pad_0"), val = tensor([0, 0])]; tensor var_9174_dilations_0 = const()[name = string("op_9174_dilations_0"), val = tensor([1])]; tensor var_9159 = transpose(perm = var_9158, x = attn_output_63)[name = string("transpose_205")]; tensor var_9174 = conv(dilations = var_9174_dilations_0, groups = var_9174_groups_0, pad = var_9174_pad_0, pad_type = var_9174_pad_type_0, strides = var_9174_strides_0, weight = squeeze_10_palettized, x = var_9159)[name = string("op_9174")]; tensor var_9178 = const()[name = string("op_9178"), val = tensor([0, 2, 1])]; int32 var_9184 = const()[name = string("op_9184"), val = int32(-1)]; fp16 const_260_promoted_to_fp16 = const()[name = string("const_260_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_453 = transpose(perm = var_9178, x = var_9174)[name = string("transpose_204")]; tensor var_9190_cast_fp16 = mul(x = x_453, y = const_260_promoted_to_fp16)[name = string("op_9190_cast_fp16")]; bool input_379_interleave_0 = const()[name = string("input_379_interleave_0"), val = bool(false)]; tensor input_379_cast_fp16 = concat(axis = var_9184, interleave = input_379_interleave_0, values = (x_453, var_9190_cast_fp16))[name = string("input_379_cast_fp16")]; tensor normed_433_axes_0 = const()[name = string("normed_433_axes_0"), val = tensor([-1])]; fp16 var_9182_to_fp16 = const()[name = string("op_9182_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_433_cast_fp16 = layer_norm(axes = normed_433_axes_0, epsilon = var_9182_to_fp16, x = input_379_cast_fp16)[name = string("normed_433_cast_fp16")]; tensor var_9195_split_sizes_0 = const()[name = string("op_9195_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_9195_axis_0 = const()[name = string("op_9195_axis_0"), val = int32(-1)]; tensor var_9195_cast_fp16_0, tensor var_9195_cast_fp16_1 = split(axis = var_9195_axis_0, split_sizes = var_9195_split_sizes_0, x = normed_433_cast_fp16)[name = string("op_9195_cast_fp16")]; tensor const_261_to_fp16 = const()[name = string("const_261_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2270984576)))]; tensor var_9198_cast_fp16 = mul(x = var_9195_cast_fp16_0, y = const_261_to_fp16)[name = string("op_9198_cast_fp16")]; tensor x_457_cast_fp16 = add(x = x_439_cast_fp16, y = var_9198_cast_fp16)[name = string("x_457_cast_fp16")]; int32 var_9205 = const()[name = string("op_9205"), val = int32(-1)]; fp16 const_262_promoted_to_fp16 = const()[name = string("const_262_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9211_cast_fp16 = mul(x = x_457_cast_fp16, y = const_262_promoted_to_fp16)[name = string("op_9211_cast_fp16")]; bool input_381_interleave_0 = const()[name = string("input_381_interleave_0"), val = bool(false)]; tensor input_381_cast_fp16 = concat(axis = var_9205, interleave = input_381_interleave_0, values = (x_457_cast_fp16, var_9211_cast_fp16))[name = string("input_381_cast_fp16")]; tensor normed_437_axes_0 = const()[name = string("normed_437_axes_0"), val = tensor([-1])]; fp16 var_9203_to_fp16 = const()[name = string("op_9203_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_437_cast_fp16 = layer_norm(axes = normed_437_axes_0, epsilon = var_9203_to_fp16, x = input_381_cast_fp16)[name = string("normed_437_cast_fp16")]; tensor var_9216_split_sizes_0 = const()[name = string("op_9216_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_9216_axis_0 = const()[name = string("op_9216_axis_0"), val = int32(-1)]; tensor var_9216_cast_fp16_0, tensor var_9216_cast_fp16_1 = split(axis = var_9216_axis_0, split_sizes = var_9216_split_sizes_0, x = normed_437_cast_fp16)[name = string("op_9216_cast_fp16")]; tensor const_263_to_fp16 = const()[name = string("const_263_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2270987712)))]; tensor var_9219_cast_fp16 = mul(x = var_9216_cast_fp16_0, y = const_263_to_fp16)[name = string("op_9219_cast_fp16")]; tensor var_9232 = const()[name = string("op_9232"), val = tensor([0, 2, 1])]; tensor input_383_axes_0 = const()[name = string("input_383_axes_0"), val = tensor([2])]; tensor var_9233 = transpose(perm = var_9232, x = var_9219_cast_fp16)[name = string("transpose_203")]; tensor input_383 = expand_dims(axes = input_383_axes_0, x = var_9233)[name = string("input_383")]; string gate_41_pad_type_0 = const()[name = string("gate_41_pad_type_0"), val = string("valid")]; tensor gate_41_strides_0 = const()[name = string("gate_41_strides_0"), val = tensor([1, 1])]; tensor gate_41_pad_0 = const()[name = string("gate_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_41_dilations_0 = const()[name = string("gate_41_dilations_0"), val = tensor([1, 1])]; int32 gate_41_groups_0 = const()[name = string("gate_41_groups_0"), val = int32(1)]; tensor gate_41 = conv(dilations = gate_41_dilations_0, groups = gate_41_groups_0, pad = gate_41_pad_0, pad_type = gate_41_pad_type_0, strides = gate_41_strides_0, weight = layers_10_mlp_gate_proj_weight_palettized, x = input_383)[name = string("gate_41")]; string up_21_pad_type_0 = const()[name = string("up_21_pad_type_0"), val = string("valid")]; tensor up_21_strides_0 = const()[name = string("up_21_strides_0"), val = tensor([1, 1])]; tensor up_21_pad_0 = const()[name = string("up_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_21_dilations_0 = const()[name = string("up_21_dilations_0"), val = tensor([1, 1])]; int32 up_21_groups_0 = const()[name = string("up_21_groups_0"), val = int32(1)]; tensor up_21 = conv(dilations = up_21_dilations_0, groups = up_21_groups_0, pad = up_21_pad_0, pad_type = up_21_pad_type_0, strides = up_21_strides_0, weight = layers_10_mlp_up_proj_weight_palettized, x = input_383)[name = string("up_21")]; string gate_43_mode_0 = const()[name = string("gate_43_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_43 = gelu(mode = gate_43_mode_0, x = gate_41)[name = string("gate_43")]; tensor input_385 = mul(x = gate_43, y = up_21)[name = string("input_385")]; string mlp_out_21_pad_type_0 = const()[name = string("mlp_out_21_pad_type_0"), val = string("valid")]; tensor mlp_out_21_strides_0 = const()[name = string("mlp_out_21_strides_0"), val = tensor([1, 1])]; tensor mlp_out_21_pad_0 = const()[name = string("mlp_out_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_21_dilations_0 = const()[name = string("mlp_out_21_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_21_groups_0 = const()[name = string("mlp_out_21_groups_0"), val = int32(1)]; tensor mlp_out_21 = conv(dilations = mlp_out_21_dilations_0, groups = mlp_out_21_groups_0, pad = mlp_out_21_pad_0, pad_type = mlp_out_21_pad_type_0, strides = mlp_out_21_strides_0, weight = layers_10_mlp_down_proj_weight_palettized, x = input_385)[name = string("mlp_out_21")]; tensor var_9273_axes_0 = const()[name = string("op_9273_axes_0"), val = tensor([2])]; tensor var_9273 = squeeze(axes = var_9273_axes_0, x = mlp_out_21)[name = string("op_9273")]; tensor var_9277 = const()[name = string("op_9277"), val = tensor([0, 2, 1])]; int32 var_9283 = const()[name = string("op_9283"), val = int32(-1)]; fp16 const_264_promoted_to_fp16 = const()[name = string("const_264_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_461 = transpose(perm = var_9277, x = var_9273)[name = string("transpose_202")]; tensor var_9289_cast_fp16 = mul(x = x_461, y = const_264_promoted_to_fp16)[name = string("op_9289_cast_fp16")]; bool input_387_interleave_0 = const()[name = string("input_387_interleave_0"), val = bool(false)]; tensor input_387_cast_fp16 = concat(axis = var_9283, interleave = input_387_interleave_0, values = (x_461, var_9289_cast_fp16))[name = string("input_387_cast_fp16")]; tensor normed_441_axes_0 = const()[name = string("normed_441_axes_0"), val = tensor([-1])]; fp16 var_9281_to_fp16 = const()[name = string("op_9281_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_441_cast_fp16 = layer_norm(axes = normed_441_axes_0, epsilon = var_9281_to_fp16, x = input_387_cast_fp16)[name = string("normed_441_cast_fp16")]; tensor var_9294_split_sizes_0 = const()[name = string("op_9294_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_9294_axis_0 = const()[name = string("op_9294_axis_0"), val = int32(-1)]; tensor var_9294_cast_fp16_0, tensor var_9294_cast_fp16_1 = split(axis = var_9294_axis_0, split_sizes = var_9294_split_sizes_0, x = normed_441_cast_fp16)[name = string("op_9294_cast_fp16")]; tensor const_265_to_fp16 = const()[name = string("const_265_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2270990848)))]; tensor var_9297_cast_fp16 = mul(x = var_9294_cast_fp16_0, y = const_265_to_fp16)[name = string("op_9297_cast_fp16")]; tensor hidden_states_133_cast_fp16 = add(x = x_457_cast_fp16, y = var_9297_cast_fp16)[name = string("hidden_states_133_cast_fp16")]; tensor per_layer_slice_21_begin_0 = const()[name = string("per_layer_slice_21_begin_0"), val = tensor([0, 0, 2560])]; tensor per_layer_slice_21_end_0 = const()[name = string("per_layer_slice_21_end_0"), val = tensor([1, 1, 2816])]; tensor per_layer_slice_21_end_mask_0 = const()[name = string("per_layer_slice_21_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_21 = slice_by_index(begin = per_layer_slice_21_begin_0, end = per_layer_slice_21_end_0, end_mask = per_layer_slice_21_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_21")]; tensor gated_41 = linear(bias = linear_1_bias_0, weight = layers_10_per_layer_input_gate_weight_palettized, x = hidden_states_133_cast_fp16)[name = string("linear_21")]; string gated_43_mode_0 = const()[name = string("gated_43_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_43 = gelu(mode = gated_43_mode_0, x = gated_41)[name = string("gated_43")]; tensor input_391 = mul(x = gated_43, y = per_layer_slice_21)[name = string("input_391")]; tensor x_465 = linear(bias = linear_2_bias_0, weight = layers_10_per_layer_projection_weight_palettized, x = input_391)[name = string("linear_22")]; int32 var_9334 = const()[name = string("op_9334"), val = int32(-1)]; fp16 const_266_promoted_to_fp16 = const()[name = string("const_266_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9340_cast_fp16 = mul(x = x_465, y = const_266_promoted_to_fp16)[name = string("op_9340_cast_fp16")]; bool input_393_interleave_0 = const()[name = string("input_393_interleave_0"), val = bool(false)]; tensor input_393_cast_fp16 = concat(axis = var_9334, interleave = input_393_interleave_0, values = (x_465, var_9340_cast_fp16))[name = string("input_393_cast_fp16")]; tensor normed_445_axes_0 = const()[name = string("normed_445_axes_0"), val = tensor([-1])]; fp16 var_9332_to_fp16 = const()[name = string("op_9332_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_445_cast_fp16 = layer_norm(axes = normed_445_axes_0, epsilon = var_9332_to_fp16, x = input_393_cast_fp16)[name = string("normed_445_cast_fp16")]; tensor var_9345_split_sizes_0 = const()[name = string("op_9345_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_9345_axis_0 = const()[name = string("op_9345_axis_0"), val = int32(-1)]; tensor var_9345_cast_fp16_0, tensor var_9345_cast_fp16_1 = split(axis = var_9345_axis_0, split_sizes = var_9345_split_sizes_0, x = normed_445_cast_fp16)[name = string("op_9345_cast_fp16")]; tensor const_267_to_fp16 = const()[name = string("const_267_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2270993984)))]; tensor var_9348_cast_fp16 = mul(x = var_9345_cast_fp16_0, y = const_267_to_fp16)[name = string("op_9348_cast_fp16")]; tensor hidden_states_137_cast_fp16 = add(x = hidden_states_133_cast_fp16, y = var_9348_cast_fp16)[name = string("hidden_states_137_cast_fp16")]; tensor layers_10_layer_scalar_to_fp16 = const()[name = string("layers_10_layer_scalar_to_fp16"), val = tensor([0x1.c6p-2])]; tensor x_469_cast_fp16 = mul(x = hidden_states_137_cast_fp16, y = layers_10_layer_scalar_to_fp16)[name = string("x_469_cast_fp16")]; int32 var_9356 = const()[name = string("op_9356"), val = int32(-1)]; fp16 const_268_promoted_to_fp16 = const()[name = string("const_268_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9362_cast_fp16 = mul(x = x_469_cast_fp16, y = const_268_promoted_to_fp16)[name = string("op_9362_cast_fp16")]; bool input_395_interleave_0 = const()[name = string("input_395_interleave_0"), val = bool(false)]; tensor input_395_cast_fp16 = concat(axis = var_9356, interleave = input_395_interleave_0, values = (x_469_cast_fp16, var_9362_cast_fp16))[name = string("input_395_cast_fp16")]; tensor normed_449_axes_0 = const()[name = string("normed_449_axes_0"), val = tensor([-1])]; fp16 var_9354_to_fp16 = const()[name = string("op_9354_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_449_cast_fp16 = layer_norm(axes = normed_449_axes_0, epsilon = var_9354_to_fp16, x = input_395_cast_fp16)[name = string("normed_449_cast_fp16")]; tensor var_9367_split_sizes_0 = const()[name = string("op_9367_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_9367_axis_0 = const()[name = string("op_9367_axis_0"), val = int32(-1)]; tensor var_9367_cast_fp16_0, tensor var_9367_cast_fp16_1 = split(axis = var_9367_axis_0, split_sizes = var_9367_split_sizes_0, x = normed_449_cast_fp16)[name = string("op_9367_cast_fp16")]; tensor const_269_to_fp16 = const()[name = string("const_269_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2270997120)))]; tensor var_9370_cast_fp16 = mul(x = var_9367_cast_fp16_0, y = const_269_to_fp16)[name = string("op_9370_cast_fp16")]; tensor var_9378 = const()[name = string("op_9378"), val = tensor([0, 2, 1])]; tensor var_9381_axes_0 = const()[name = string("op_9381_axes_0"), val = tensor([2])]; tensor var_9379_cast_fp16 = transpose(perm = var_9378, x = var_9370_cast_fp16)[name = string("transpose_201")]; tensor var_9381_cast_fp16 = expand_dims(axes = var_9381_axes_0, x = var_9379_cast_fp16)[name = string("op_9381_cast_fp16")]; string var_9397_pad_type_0 = const()[name = string("op_9397_pad_type_0"), val = string("valid")]; tensor var_9397_strides_0 = const()[name = string("op_9397_strides_0"), val = tensor([1, 1])]; tensor var_9397_pad_0 = const()[name = string("op_9397_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9397_dilations_0 = const()[name = string("op_9397_dilations_0"), val = tensor([1, 1])]; int32 var_9397_groups_0 = const()[name = string("op_9397_groups_0"), val = int32(1)]; tensor var_9397 = conv(dilations = var_9397_dilations_0, groups = var_9397_groups_0, pad = var_9397_pad_0, pad_type = var_9397_pad_type_0, strides = var_9397_strides_0, weight = layers_11_self_attn_q_proj_weight_palettized, x = var_9381_cast_fp16)[name = string("op_9397")]; tensor var_9402 = const()[name = string("op_9402"), val = tensor([1, 8, 256, 1])]; tensor var_9403 = reshape(shape = var_9402, x = var_9397)[name = string("op_9403")]; tensor var_9408 = const()[name = string("op_9408"), val = tensor([0, 1, 3, 2])]; tensor var_9418 = const()[name = string("op_9418"), val = tensor([1, 8, 256])]; tensor var_9409 = transpose(perm = var_9408, x = var_9403)[name = string("transpose_200")]; tensor x_473 = reshape(shape = var_9418, x = var_9409)[name = string("x_473")]; int32 var_9424 = const()[name = string("op_9424"), val = int32(-1)]; fp16 const_270_promoted_to_fp16 = const()[name = string("const_270_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9430_cast_fp16 = mul(x = x_473, y = const_270_promoted_to_fp16)[name = string("op_9430_cast_fp16")]; bool input_399_interleave_0 = const()[name = string("input_399_interleave_0"), val = bool(false)]; tensor input_399_cast_fp16 = concat(axis = var_9424, interleave = input_399_interleave_0, values = (x_473, var_9430_cast_fp16))[name = string("input_399_cast_fp16")]; tensor normed_453_axes_0 = const()[name = string("normed_453_axes_0"), val = tensor([-1])]; fp16 var_9422_to_fp16 = const()[name = string("op_9422_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_453_cast_fp16 = layer_norm(axes = normed_453_axes_0, epsilon = var_9422_to_fp16, x = input_399_cast_fp16)[name = string("normed_453_cast_fp16")]; tensor var_9435_split_sizes_0 = const()[name = string("op_9435_split_sizes_0"), val = tensor([256, 256])]; int32 var_9435_axis_0 = const()[name = string("op_9435_axis_0"), val = int32(-1)]; tensor var_9435_cast_fp16_0, tensor var_9435_cast_fp16_1 = split(axis = var_9435_axis_0, split_sizes = var_9435_split_sizes_0, x = normed_453_cast_fp16)[name = string("op_9435_cast_fp16")]; tensor var_9444 = const()[name = string("op_9444"), val = tensor([1, 8, 1, 256])]; tensor q_91 = reshape(shape = var_9444, x = var_9435_cast_fp16_0)[name = string("q_91")]; tensor var_9446 = mul(x = q_91, y = cos_1)[name = string("op_9446")]; tensor var_9447_split_sizes_0 = const()[name = string("op_9447_split_sizes_0"), val = tensor([128, 128])]; int32 var_9447_axis_0 = const()[name = string("op_9447_axis_0"), val = int32(-1)]; tensor var_9447_0, tensor var_9447_1 = split(axis = var_9447_axis_0, split_sizes = var_9447_split_sizes_0, x = q_91)[name = string("op_9447")]; fp16 const_272_promoted = const()[name = string("const_272_promoted"), val = fp16(-0x1p+0)]; tensor var_9449 = mul(x = var_9447_1, y = const_272_promoted)[name = string("op_9449")]; int32 var_9451 = const()[name = string("op_9451"), val = int32(-1)]; bool var_9452_interleave_0 = const()[name = string("op_9452_interleave_0"), val = bool(false)]; tensor var_9452 = concat(axis = var_9451, interleave = var_9452_interleave_0, values = (var_9449, var_9447_0))[name = string("op_9452")]; tensor var_9453 = mul(x = var_9452, y = sin_1)[name = string("op_9453")]; tensor q_95 = add(x = var_9446, y = var_9453)[name = string("q_95")]; string var_9466_pad_type_0 = const()[name = string("op_9466_pad_type_0"), val = string("valid")]; tensor var_9466_strides_0 = const()[name = string("op_9466_strides_0"), val = tensor([1, 1])]; tensor var_9466_pad_0 = const()[name = string("op_9466_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9466_dilations_0 = const()[name = string("op_9466_dilations_0"), val = tensor([1, 1])]; int32 var_9466_groups_0 = const()[name = string("op_9466_groups_0"), val = int32(1)]; tensor var_9466 = conv(dilations = var_9466_dilations_0, groups = var_9466_groups_0, pad = var_9466_pad_0, pad_type = var_9466_pad_type_0, strides = var_9466_strides_0, weight = layers_11_self_attn_k_proj_weight_palettized, x = var_9381_cast_fp16)[name = string("op_9466")]; tensor var_9471 = const()[name = string("op_9471"), val = tensor([1, 1, 256, 1])]; tensor var_9472 = reshape(shape = var_9471, x = var_9466)[name = string("op_9472")]; tensor var_9477 = const()[name = string("op_9477"), val = tensor([0, 1, 3, 2])]; string var_9494_pad_type_0 = const()[name = string("op_9494_pad_type_0"), val = string("valid")]; tensor var_9494_strides_0 = const()[name = string("op_9494_strides_0"), val = tensor([1, 1])]; tensor var_9494_pad_0 = const()[name = string("op_9494_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9494_dilations_0 = const()[name = string("op_9494_dilations_0"), val = tensor([1, 1])]; int32 var_9494_groups_0 = const()[name = string("op_9494_groups_0"), val = int32(1)]; tensor var_9494 = conv(dilations = var_9494_dilations_0, groups = var_9494_groups_0, pad = var_9494_pad_0, pad_type = var_9494_pad_type_0, strides = var_9494_strides_0, weight = layers_11_self_attn_v_proj_weight_palettized, x = var_9381_cast_fp16)[name = string("op_9494")]; tensor var_9499 = const()[name = string("op_9499"), val = tensor([1, 1, 256, 1])]; tensor var_9500 = reshape(shape = var_9499, x = var_9494)[name = string("op_9500")]; tensor var_9505 = const()[name = string("op_9505"), val = tensor([0, 1, 3, 2])]; tensor var_9515 = const()[name = string("op_9515"), val = tensor([1, 1, 256])]; tensor var_9478 = transpose(perm = var_9477, x = var_9472)[name = string("transpose_199")]; tensor x_477 = reshape(shape = var_9515, x = var_9478)[name = string("x_477")]; int32 var_9521 = const()[name = string("op_9521"), val = int32(-1)]; fp16 const_273_promoted_to_fp16 = const()[name = string("const_273_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9527_cast_fp16 = mul(x = x_477, y = const_273_promoted_to_fp16)[name = string("op_9527_cast_fp16")]; bool input_401_interleave_0 = const()[name = string("input_401_interleave_0"), val = bool(false)]; tensor input_401_cast_fp16 = concat(axis = var_9521, interleave = input_401_interleave_0, values = (x_477, var_9527_cast_fp16))[name = string("input_401_cast_fp16")]; tensor normed_457_axes_0 = const()[name = string("normed_457_axes_0"), val = tensor([-1])]; fp16 var_9519_to_fp16 = const()[name = string("op_9519_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_457_cast_fp16 = layer_norm(axes = normed_457_axes_0, epsilon = var_9519_to_fp16, x = input_401_cast_fp16)[name = string("normed_457_cast_fp16")]; tensor var_9532_split_sizes_0 = const()[name = string("op_9532_split_sizes_0"), val = tensor([256, 256])]; int32 var_9532_axis_0 = const()[name = string("op_9532_axis_0"), val = int32(-1)]; tensor var_9532_cast_fp16_0, tensor var_9532_cast_fp16_1 = split(axis = var_9532_axis_0, split_sizes = var_9532_split_sizes_0, x = normed_457_cast_fp16)[name = string("op_9532_cast_fp16")]; tensor const_274_to_fp16 = const()[name = string("const_274_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2271000256)))]; tensor var_9535_cast_fp16 = mul(x = var_9532_cast_fp16_0, y = const_274_to_fp16)[name = string("op_9535_cast_fp16")]; tensor var_9541 = const()[name = string("op_9541"), val = tensor([1, 1, 1, 256])]; tensor q_93 = reshape(shape = var_9541, x = var_9535_cast_fp16)[name = string("q_93")]; fp16 var_9548_promoted_to_fp16 = const()[name = string("op_9548_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_9506 = transpose(perm = var_9505, x = var_9500)[name = string("transpose_198")]; tensor var_9549_cast_fp16 = pow(x = var_9506, y = var_9548_promoted_to_fp16)[name = string("op_9549_cast_fp16")]; tensor var_9554_axes_0 = const()[name = string("op_9554_axes_0"), val = tensor([-1])]; bool var_9554_keep_dims_0 = const()[name = string("op_9554_keep_dims_0"), val = bool(true)]; tensor var_9554_cast_fp16 = reduce_mean(axes = var_9554_axes_0, keep_dims = var_9554_keep_dims_0, x = var_9549_cast_fp16)[name = string("op_9554_cast_fp16")]; fp16 var_9556_to_fp16 = const()[name = string("op_9556_to_fp16"), val = fp16(0x1.1p-20)]; tensor mean_sq_23_cast_fp16 = add(x = var_9554_cast_fp16, y = var_9556_to_fp16)[name = string("mean_sq_23_cast_fp16")]; fp16 var_9563_to_fp16 = const()[name = string("op_9563_to_fp16"), val = fp16(-0x1p-1)]; tensor var_9564_cast_fp16 = pow(x = mean_sq_23_cast_fp16, y = var_9563_to_fp16)[name = string("op_9564_cast_fp16")]; tensor var_9565_cast_fp16 = mul(x = var_9506, y = var_9564_cast_fp16)[name = string("op_9565_cast_fp16")]; tensor var_9571 = mul(x = q_93, y = cos_1)[name = string("op_9571")]; tensor var_9572_split_sizes_0 = const()[name = string("op_9572_split_sizes_0"), val = tensor([128, 128])]; int32 var_9572_axis_0 = const()[name = string("op_9572_axis_0"), val = int32(-1)]; tensor var_9572_0, tensor var_9572_1 = split(axis = var_9572_axis_0, split_sizes = var_9572_split_sizes_0, x = q_93)[name = string("op_9572")]; fp16 const_275_promoted = const()[name = string("const_275_promoted"), val = fp16(-0x1p+0)]; tensor var_9574 = mul(x = var_9572_1, y = const_275_promoted)[name = string("op_9574")]; int32 var_9576 = const()[name = string("op_9576"), val = int32(-1)]; bool var_9577_interleave_0 = const()[name = string("op_9577_interleave_0"), val = bool(false)]; tensor var_9577 = concat(axis = var_9576, interleave = var_9577_interleave_0, values = (var_9574, var_9572_0))[name = string("op_9577")]; tensor var_9578 = mul(x = var_9577, y = sin_1)[name = string("op_9578")]; tensor input_403 = add(x = var_9571, y = var_9578)[name = string("input_403")]; tensor var_9583_begin_0 = const()[name = string("op_9583_begin_0"), val = tensor([11, 0, 0, 0])]; tensor var_9583_end_0 = const()[name = string("op_9583_end_0"), val = tensor([12, 1, 512, 512])]; tensor var_9583_end_mask_0 = const()[name = string("op_9583_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_9583_squeeze_mask_0 = const()[name = string("op_9583_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_9583_cast_fp16 = slice_by_index(begin = var_9583_begin_0, end = var_9583_end_0, end_mask = var_9583_end_mask_0, squeeze_mask = var_9583_squeeze_mask_0, x = coreml_update_state_51)[name = string("op_9583_cast_fp16")]; tensor K_cache_23_axes_0 = const()[name = string("K_cache_23_axes_0"), val = tensor([0])]; tensor K_cache_23_cast_fp16 = expand_dims(axes = K_cache_23_axes_0, x = var_9583_cast_fp16)[name = string("K_cache_23_cast_fp16")]; tensor var_9588_begin_0 = const()[name = string("op_9588_begin_0"), val = tensor([46, 0, 0, 0])]; tensor var_9588_end_0 = const()[name = string("op_9588_end_0"), val = tensor([47, 1, 512, 512])]; tensor var_9588_end_mask_0 = const()[name = string("op_9588_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_9588_squeeze_mask_0 = const()[name = string("op_9588_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_9588_cast_fp16 = slice_by_index(begin = var_9588_begin_0, end = var_9588_end_0, end_mask = var_9588_end_mask_0, squeeze_mask = var_9588_squeeze_mask_0, x = coreml_update_state_51)[name = string("op_9588_cast_fp16")]; tensor V_cache_23_axes_0 = const()[name = string("V_cache_23_axes_0"), val = tensor([0])]; tensor V_cache_23_cast_fp16 = expand_dims(axes = V_cache_23_axes_0, x = var_9588_cast_fp16)[name = string("V_cache_23_cast_fp16")]; tensor k_padded_19_pad_0 = const()[name = string("k_padded_19_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; string k_padded_19_mode_0 = const()[name = string("k_padded_19_mode_0"), val = string("constant")]; fp16 const_276_to_fp16 = const()[name = string("const_276_to_fp16"), val = fp16(0x0p+0)]; tensor k_padded_19_cast_fp16 = pad(constant_val = const_276_to_fp16, mode = k_padded_19_mode_0, pad = k_padded_19_pad_0, x = input_403)[name = string("k_padded_19_cast_fp16")]; tensor v_padded_19_pad_0 = const()[name = string("v_padded_19_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; string v_padded_19_mode_0 = const()[name = string("v_padded_19_mode_0"), val = string("constant")]; fp16 const_277_to_fp16 = const()[name = string("const_277_to_fp16"), val = fp16(0x0p+0)]; tensor v_padded_19_cast_fp16 = pad(constant_val = const_277_to_fp16, mode = v_padded_19_mode_0, pad = v_padded_19_pad_0, x = var_9565_cast_fp16)[name = string("v_padded_19_cast_fp16")]; tensor var_9606_cast_fp16 = mul(x = K_cache_23_cast_fp16, y = var_3515_cast_fp16)[name = string("op_9606_cast_fp16")]; tensor var_9607_reps_0 = const()[name = string("op_9607_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_9607_cast_fp16 = tile(reps = var_9607_reps_0, x = k_padded_19_cast_fp16)[name = string("op_9607_cast_fp16")]; tensor var_9608_cast_fp16 = mul(x = var_9607_cast_fp16, y = update_mask)[name = string("op_9608_cast_fp16")]; tensor K_new_23_cast_fp16 = add(x = var_9606_cast_fp16, y = var_9608_cast_fp16)[name = string("K_new_23_cast_fp16")]; tensor var_9614_cast_fp16 = mul(x = V_cache_23_cast_fp16, y = var_3515_cast_fp16)[name = string("op_9614_cast_fp16")]; tensor var_9615_reps_0 = const()[name = string("op_9615_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_9615_cast_fp16 = tile(reps = var_9615_reps_0, x = v_padded_19_cast_fp16)[name = string("op_9615_cast_fp16")]; tensor var_9616_cast_fp16 = mul(x = var_9615_cast_fp16, y = update_mask)[name = string("op_9616_cast_fp16")]; tensor V_new_23_cast_fp16 = add(x = var_9614_cast_fp16, y = var_9616_cast_fp16)[name = string("V_new_23_cast_fp16")]; tensor var_9620_axes_0 = const()[name = string("op_9620_axes_0"), val = tensor([0])]; tensor var_9620_cast_fp16 = squeeze(axes = var_9620_axes_0, x = K_new_23_cast_fp16)[name = string("op_9620_cast_fp16")]; tensor concat_88 = const()[name = string("concat_88"), val = tensor([11, 0, 0, 0])]; tensor concat_89 = const()[name = string("concat_89"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_23_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_23_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_23_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_88, begin_mask = kv_cache_0_internal_tensor_assign_23_begin_mask_0, end = concat_89, end_mask = kv_cache_0_internal_tensor_assign_23_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_23_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_23_stride_0, update = var_9620_cast_fp16, x = coreml_update_state_51)[name = string("kv_cache_0_internal_tensor_assign_23_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_23_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_52_write_state")]; tensor coreml_update_state_52 = read_state(input = kv_cache_0)[name = string("coreml_update_state_52")]; tensor var_9627_axes_0 = const()[name = string("op_9627_axes_0"), val = tensor([0])]; tensor var_9627_cast_fp16 = squeeze(axes = var_9627_axes_0, x = V_new_23_cast_fp16)[name = string("op_9627_cast_fp16")]; tensor concat_90 = const()[name = string("concat_90"), val = tensor([46, 0, 0, 0])]; tensor concat_91 = const()[name = string("concat_91"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_24_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_24_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_24_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_90, begin_mask = kv_cache_0_internal_tensor_assign_24_begin_mask_0, end = concat_91, end_mask = kv_cache_0_internal_tensor_assign_24_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_24_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_24_stride_0, update = var_9627_cast_fp16, x = coreml_update_state_52)[name = string("kv_cache_0_internal_tensor_assign_24_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_24_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_53_write_state")]; tensor coreml_update_state_53 = read_state(input = kv_cache_0)[name = string("coreml_update_state_53")]; tensor K_for_attn_23_begin_0 = const()[name = string("K_for_attn_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor K_for_attn_23_end_0 = const()[name = string("K_for_attn_23_end_0"), val = tensor([1, 1, 512, 256])]; tensor K_for_attn_23_end_mask_0 = const()[name = string("K_for_attn_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor K_for_attn_23_cast_fp16 = slice_by_index(begin = K_for_attn_23_begin_0, end = K_for_attn_23_end_0, end_mask = K_for_attn_23_end_mask_0, x = K_new_23_cast_fp16)[name = string("K_for_attn_23_cast_fp16")]; tensor V_for_attn_23_begin_0 = const()[name = string("V_for_attn_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor V_for_attn_23_end_0 = const()[name = string("V_for_attn_23_end_0"), val = tensor([1, 1, 512, 256])]; tensor V_for_attn_23_end_mask_0 = const()[name = string("V_for_attn_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor V_for_attn_23_cast_fp16 = slice_by_index(begin = V_for_attn_23_begin_0, end = V_for_attn_23_end_0, end_mask = V_for_attn_23_end_mask_0, x = V_new_23_cast_fp16)[name = string("V_for_attn_23_cast_fp16")]; tensor transpose_44_perm_0 = const()[name = string("transpose_44_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_22_reps_0 = const()[name = string("tile_22_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_44_cast_fp16 = transpose(perm = transpose_44_perm_0, x = K_for_attn_23_cast_fp16)[name = string("transpose_197")]; tensor tile_22_cast_fp16 = tile(reps = tile_22_reps_0, x = transpose_44_cast_fp16)[name = string("tile_22_cast_fp16")]; tensor concat_92 = const()[name = string("concat_92"), val = tensor([8, 1, 1, 512, 256])]; tensor reshape_44_cast_fp16 = reshape(shape = concat_92, x = tile_22_cast_fp16)[name = string("reshape_44_cast_fp16")]; tensor transpose_45_perm_0 = const()[name = string("transpose_45_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_93 = const()[name = string("concat_93"), val = tensor([-1, 1, 512, 256])]; tensor transpose_45_cast_fp16 = transpose(perm = transpose_45_perm_0, x = reshape_44_cast_fp16)[name = string("transpose_196")]; tensor reshape_45_cast_fp16 = reshape(shape = concat_93, x = transpose_45_cast_fp16)[name = string("reshape_45_cast_fp16")]; tensor transpose_151_perm_0 = const()[name = string("transpose_151_perm_0"), val = tensor([1, 0, -1, -2])]; tensor transpose_46_perm_0 = const()[name = string("transpose_46_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_23_reps_0 = const()[name = string("tile_23_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_46_cast_fp16 = transpose(perm = transpose_46_perm_0, x = V_for_attn_23_cast_fp16)[name = string("transpose_195")]; tensor tile_23_cast_fp16 = tile(reps = tile_23_reps_0, x = transpose_46_cast_fp16)[name = string("tile_23_cast_fp16")]; tensor concat_94 = const()[name = string("concat_94"), val = tensor([8, 1, 1, 512, 256])]; tensor reshape_46_cast_fp16 = reshape(shape = concat_94, x = tile_23_cast_fp16)[name = string("reshape_46_cast_fp16")]; tensor transpose_47_perm_0 = const()[name = string("transpose_47_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_95 = const()[name = string("concat_95"), val = tensor([-1, 1, 512, 256])]; tensor transpose_47_cast_fp16 = transpose(perm = transpose_47_perm_0, x = reshape_46_cast_fp16)[name = string("transpose_194")]; tensor reshape_47_cast_fp16 = reshape(shape = concat_95, x = transpose_47_cast_fp16)[name = string("reshape_47_cast_fp16")]; tensor V_expanded_23_perm_0 = const()[name = string("V_expanded_23_perm_0"), val = tensor([1, 0, -2, -1])]; bool var_9664_transpose_x_0 = const()[name = string("op_9664_transpose_x_0"), val = bool(false)]; bool var_9664_transpose_y_0 = const()[name = string("op_9664_transpose_y_0"), val = bool(false)]; tensor transpose_151_cast_fp16 = transpose(perm = transpose_151_perm_0, x = reshape_45_cast_fp16)[name = string("transpose_193")]; tensor var_9664_cast_fp16 = matmul(transpose_x = var_9664_transpose_x_0, transpose_y = var_9664_transpose_y_0, x = q_95, y = transpose_151_cast_fp16)[name = string("op_9664_cast_fp16")]; tensor attn_weights_69_cast_fp16 = add(x = var_9664_cast_fp16, y = causal_mask)[name = string("attn_weights_69_cast_fp16")]; int32 var_9674 = const()[name = string("op_9674"), val = int32(-1)]; tensor var_9676_cast_fp16 = softmax(axis = var_9674, x = attn_weights_69_cast_fp16)[name = string("op_9676_cast_fp16")]; bool var_9692_transpose_x_0 = const()[name = string("op_9692_transpose_x_0"), val = bool(false)]; bool var_9692_transpose_y_0 = const()[name = string("op_9692_transpose_y_0"), val = bool(false)]; tensor V_expanded_23_cast_fp16 = transpose(perm = V_expanded_23_perm_0, x = reshape_47_cast_fp16)[name = string("transpose_192")]; tensor var_9692_cast_fp16 = matmul(transpose_x = var_9692_transpose_x_0, transpose_y = var_9692_transpose_y_0, x = var_9676_cast_fp16, y = V_expanded_23_cast_fp16)[name = string("op_9692_cast_fp16")]; tensor var_9702 = const()[name = string("op_9702"), val = tensor([0, 2, 1, 3])]; tensor var_9709 = const()[name = string("op_9709"), val = tensor([1, 1, -1])]; tensor var_9703 = transpose(perm = var_9702, x = var_9692_cast_fp16)[name = string("transpose_191")]; tensor attn_output_69 = reshape(shape = var_9709, x = var_9703)[name = string("attn_output_69")]; tensor var_9714 = const()[name = string("op_9714"), val = tensor([0, 2, 1])]; tensor squeeze_11_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2271000832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2272573760))))[name = string("squeeze_11_palettized")]; string var_9730_pad_type_0 = const()[name = string("op_9730_pad_type_0"), val = string("valid")]; int32 var_9730_groups_0 = const()[name = string("op_9730_groups_0"), val = int32(1)]; tensor var_9730_strides_0 = const()[name = string("op_9730_strides_0"), val = tensor([1])]; tensor var_9730_pad_0 = const()[name = string("op_9730_pad_0"), val = tensor([0, 0])]; tensor var_9730_dilations_0 = const()[name = string("op_9730_dilations_0"), val = tensor([1])]; tensor var_9715 = transpose(perm = var_9714, x = attn_output_69)[name = string("transpose_190")]; tensor var_9730 = conv(dilations = var_9730_dilations_0, groups = var_9730_groups_0, pad = var_9730_pad_0, pad_type = var_9730_pad_type_0, strides = var_9730_strides_0, weight = squeeze_11_palettized, x = var_9715)[name = string("op_9730")]; tensor var_9734 = const()[name = string("op_9734"), val = tensor([0, 2, 1])]; int32 var_9740 = const()[name = string("op_9740"), val = int32(-1)]; fp16 const_278_promoted_to_fp16 = const()[name = string("const_278_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_483 = transpose(perm = var_9734, x = var_9730)[name = string("transpose_189")]; tensor var_9746_cast_fp16 = mul(x = x_483, y = const_278_promoted_to_fp16)[name = string("op_9746_cast_fp16")]; bool input_409_interleave_0 = const()[name = string("input_409_interleave_0"), val = bool(false)]; tensor input_409_cast_fp16 = concat(axis = var_9740, interleave = input_409_interleave_0, values = (x_483, var_9746_cast_fp16))[name = string("input_409_cast_fp16")]; tensor normed_461_axes_0 = const()[name = string("normed_461_axes_0"), val = tensor([-1])]; fp16 var_9738_to_fp16 = const()[name = string("op_9738_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_461_cast_fp16 = layer_norm(axes = normed_461_axes_0, epsilon = var_9738_to_fp16, x = input_409_cast_fp16)[name = string("normed_461_cast_fp16")]; tensor var_9751_split_sizes_0 = const()[name = string("op_9751_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_9751_axis_0 = const()[name = string("op_9751_axis_0"), val = int32(-1)]; tensor var_9751_cast_fp16_0, tensor var_9751_cast_fp16_1 = split(axis = var_9751_axis_0, split_sizes = var_9751_split_sizes_0, x = normed_461_cast_fp16)[name = string("op_9751_cast_fp16")]; tensor const_279_to_fp16 = const()[name = string("const_279_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2272575360)))]; tensor var_9754_cast_fp16 = mul(x = var_9751_cast_fp16_0, y = const_279_to_fp16)[name = string("op_9754_cast_fp16")]; tensor x_487_cast_fp16 = add(x = x_469_cast_fp16, y = var_9754_cast_fp16)[name = string("x_487_cast_fp16")]; int32 var_9761 = const()[name = string("op_9761"), val = int32(-1)]; fp16 const_280_promoted_to_fp16 = const()[name = string("const_280_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9767_cast_fp16 = mul(x = x_487_cast_fp16, y = const_280_promoted_to_fp16)[name = string("op_9767_cast_fp16")]; bool input_411_interleave_0 = const()[name = string("input_411_interleave_0"), val = bool(false)]; tensor input_411_cast_fp16 = concat(axis = var_9761, interleave = input_411_interleave_0, values = (x_487_cast_fp16, var_9767_cast_fp16))[name = string("input_411_cast_fp16")]; tensor normed_465_axes_0 = const()[name = string("normed_465_axes_0"), val = tensor([-1])]; fp16 var_9759_to_fp16 = const()[name = string("op_9759_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_465_cast_fp16 = layer_norm(axes = normed_465_axes_0, epsilon = var_9759_to_fp16, x = input_411_cast_fp16)[name = string("normed_465_cast_fp16")]; tensor var_9772_split_sizes_0 = const()[name = string("op_9772_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_9772_axis_0 = const()[name = string("op_9772_axis_0"), val = int32(-1)]; tensor var_9772_cast_fp16_0, tensor var_9772_cast_fp16_1 = split(axis = var_9772_axis_0, split_sizes = var_9772_split_sizes_0, x = normed_465_cast_fp16)[name = string("op_9772_cast_fp16")]; tensor const_281_to_fp16 = const()[name = string("const_281_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2272578496)))]; tensor var_9775_cast_fp16 = mul(x = var_9772_cast_fp16_0, y = const_281_to_fp16)[name = string("op_9775_cast_fp16")]; tensor var_9788 = const()[name = string("op_9788"), val = tensor([0, 2, 1])]; tensor input_413_axes_0 = const()[name = string("input_413_axes_0"), val = tensor([2])]; tensor var_9789 = transpose(perm = var_9788, x = var_9775_cast_fp16)[name = string("transpose_188")]; tensor input_413 = expand_dims(axes = input_413_axes_0, x = var_9789)[name = string("input_413")]; string gate_45_pad_type_0 = const()[name = string("gate_45_pad_type_0"), val = string("valid")]; tensor gate_45_strides_0 = const()[name = string("gate_45_strides_0"), val = tensor([1, 1])]; tensor gate_45_pad_0 = const()[name = string("gate_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_45_dilations_0 = const()[name = string("gate_45_dilations_0"), val = tensor([1, 1])]; int32 gate_45_groups_0 = const()[name = string("gate_45_groups_0"), val = int32(1)]; tensor gate_45 = conv(dilations = gate_45_dilations_0, groups = gate_45_groups_0, pad = gate_45_pad_0, pad_type = gate_45_pad_type_0, strides = gate_45_strides_0, weight = layers_11_mlp_gate_proj_weight_palettized, x = input_413)[name = string("gate_45")]; string up_23_pad_type_0 = const()[name = string("up_23_pad_type_0"), val = string("valid")]; tensor up_23_strides_0 = const()[name = string("up_23_strides_0"), val = tensor([1, 1])]; tensor up_23_pad_0 = const()[name = string("up_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_23_dilations_0 = const()[name = string("up_23_dilations_0"), val = tensor([1, 1])]; int32 up_23_groups_0 = const()[name = string("up_23_groups_0"), val = int32(1)]; tensor up_23 = conv(dilations = up_23_dilations_0, groups = up_23_groups_0, pad = up_23_pad_0, pad_type = up_23_pad_type_0, strides = up_23_strides_0, weight = layers_11_mlp_up_proj_weight_palettized, x = input_413)[name = string("up_23")]; string gate_47_mode_0 = const()[name = string("gate_47_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_47 = gelu(mode = gate_47_mode_0, x = gate_45)[name = string("gate_47")]; tensor input_415 = mul(x = gate_47, y = up_23)[name = string("input_415")]; string mlp_out_23_pad_type_0 = const()[name = string("mlp_out_23_pad_type_0"), val = string("valid")]; tensor mlp_out_23_strides_0 = const()[name = string("mlp_out_23_strides_0"), val = tensor([1, 1])]; tensor mlp_out_23_pad_0 = const()[name = string("mlp_out_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_23_dilations_0 = const()[name = string("mlp_out_23_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_23_groups_0 = const()[name = string("mlp_out_23_groups_0"), val = int32(1)]; tensor mlp_out_23 = conv(dilations = mlp_out_23_dilations_0, groups = mlp_out_23_groups_0, pad = mlp_out_23_pad_0, pad_type = mlp_out_23_pad_type_0, strides = mlp_out_23_strides_0, weight = layers_11_mlp_down_proj_weight_palettized, x = input_415)[name = string("mlp_out_23")]; tensor var_9829_axes_0 = const()[name = string("op_9829_axes_0"), val = tensor([2])]; tensor var_9829 = squeeze(axes = var_9829_axes_0, x = mlp_out_23)[name = string("op_9829")]; tensor var_9833 = const()[name = string("op_9833"), val = tensor([0, 2, 1])]; int32 var_9839 = const()[name = string("op_9839"), val = int32(-1)]; fp16 const_282_promoted_to_fp16 = const()[name = string("const_282_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_491 = transpose(perm = var_9833, x = var_9829)[name = string("transpose_187")]; tensor var_9845_cast_fp16 = mul(x = x_491, y = const_282_promoted_to_fp16)[name = string("op_9845_cast_fp16")]; bool input_417_interleave_0 = const()[name = string("input_417_interleave_0"), val = bool(false)]; tensor input_417_cast_fp16 = concat(axis = var_9839, interleave = input_417_interleave_0, values = (x_491, var_9845_cast_fp16))[name = string("input_417_cast_fp16")]; tensor normed_469_axes_0 = const()[name = string("normed_469_axes_0"), val = tensor([-1])]; fp16 var_9837_to_fp16 = const()[name = string("op_9837_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_469_cast_fp16 = layer_norm(axes = normed_469_axes_0, epsilon = var_9837_to_fp16, x = input_417_cast_fp16)[name = string("normed_469_cast_fp16")]; tensor var_9850_split_sizes_0 = const()[name = string("op_9850_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_9850_axis_0 = const()[name = string("op_9850_axis_0"), val = int32(-1)]; tensor var_9850_cast_fp16_0, tensor var_9850_cast_fp16_1 = split(axis = var_9850_axis_0, split_sizes = var_9850_split_sizes_0, x = normed_469_cast_fp16)[name = string("op_9850_cast_fp16")]; tensor const_283_to_fp16 = const()[name = string("const_283_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2272581632)))]; tensor var_9853_cast_fp16 = mul(x = var_9850_cast_fp16_0, y = const_283_to_fp16)[name = string("op_9853_cast_fp16")]; tensor hidden_states_145_cast_fp16 = add(x = x_487_cast_fp16, y = var_9853_cast_fp16)[name = string("hidden_states_145_cast_fp16")]; tensor per_layer_slice_23_begin_0 = const()[name = string("per_layer_slice_23_begin_0"), val = tensor([0, 0, 2816])]; tensor per_layer_slice_23_end_0 = const()[name = string("per_layer_slice_23_end_0"), val = tensor([1, 1, 3072])]; tensor per_layer_slice_23_end_mask_0 = const()[name = string("per_layer_slice_23_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_23 = slice_by_index(begin = per_layer_slice_23_begin_0, end = per_layer_slice_23_end_0, end_mask = per_layer_slice_23_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_23")]; tensor gated_45 = linear(bias = linear_1_bias_0, weight = layers_11_per_layer_input_gate_weight_palettized, x = hidden_states_145_cast_fp16)[name = string("linear_23")]; string gated_47_mode_0 = const()[name = string("gated_47_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_47 = gelu(mode = gated_47_mode_0, x = gated_45)[name = string("gated_47")]; tensor input_421 = mul(x = gated_47, y = per_layer_slice_23)[name = string("input_421")]; tensor x_495 = linear(bias = linear_2_bias_0, weight = layers_11_per_layer_projection_weight_palettized, x = input_421)[name = string("linear_24")]; int32 var_9890 = const()[name = string("op_9890"), val = int32(-1)]; fp16 const_284_promoted_to_fp16 = const()[name = string("const_284_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9896_cast_fp16 = mul(x = x_495, y = const_284_promoted_to_fp16)[name = string("op_9896_cast_fp16")]; bool input_423_interleave_0 = const()[name = string("input_423_interleave_0"), val = bool(false)]; tensor input_423_cast_fp16 = concat(axis = var_9890, interleave = input_423_interleave_0, values = (x_495, var_9896_cast_fp16))[name = string("input_423_cast_fp16")]; tensor normed_473_axes_0 = const()[name = string("normed_473_axes_0"), val = tensor([-1])]; fp16 var_9888_to_fp16 = const()[name = string("op_9888_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_473_cast_fp16 = layer_norm(axes = normed_473_axes_0, epsilon = var_9888_to_fp16, x = input_423_cast_fp16)[name = string("normed_473_cast_fp16")]; tensor var_9901_split_sizes_0 = const()[name = string("op_9901_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_9901_axis_0 = const()[name = string("op_9901_axis_0"), val = int32(-1)]; tensor var_9901_cast_fp16_0, tensor var_9901_cast_fp16_1 = split(axis = var_9901_axis_0, split_sizes = var_9901_split_sizes_0, x = normed_473_cast_fp16)[name = string("op_9901_cast_fp16")]; tensor const_285_to_fp16 = const()[name = string("const_285_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2272584768)))]; tensor var_9904_cast_fp16 = mul(x = var_9901_cast_fp16_0, y = const_285_to_fp16)[name = string("op_9904_cast_fp16")]; tensor hidden_states_149_cast_fp16 = add(x = hidden_states_145_cast_fp16, y = var_9904_cast_fp16)[name = string("hidden_states_149_cast_fp16")]; tensor layers_11_layer_scalar_to_fp16 = const()[name = string("layers_11_layer_scalar_to_fp16"), val = tensor([0x1.7ap-2])]; tensor x_499_cast_fp16 = mul(x = hidden_states_149_cast_fp16, y = layers_11_layer_scalar_to_fp16)[name = string("x_499_cast_fp16")]; int32 var_9912 = const()[name = string("op_9912"), val = int32(-1)]; fp16 const_286_promoted_to_fp16 = const()[name = string("const_286_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9918_cast_fp16 = mul(x = x_499_cast_fp16, y = const_286_promoted_to_fp16)[name = string("op_9918_cast_fp16")]; bool input_425_interleave_0 = const()[name = string("input_425_interleave_0"), val = bool(false)]; tensor input_425_cast_fp16 = concat(axis = var_9912, interleave = input_425_interleave_0, values = (x_499_cast_fp16, var_9918_cast_fp16))[name = string("input_425_cast_fp16")]; tensor normed_477_axes_0 = const()[name = string("normed_477_axes_0"), val = tensor([-1])]; fp16 var_9910_to_fp16 = const()[name = string("op_9910_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_477_cast_fp16 = layer_norm(axes = normed_477_axes_0, epsilon = var_9910_to_fp16, x = input_425_cast_fp16)[name = string("normed_477_cast_fp16")]; tensor var_9923_split_sizes_0 = const()[name = string("op_9923_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_9923_axis_0 = const()[name = string("op_9923_axis_0"), val = int32(-1)]; tensor var_9923_cast_fp16_0, tensor var_9923_cast_fp16_1 = split(axis = var_9923_axis_0, split_sizes = var_9923_split_sizes_0, x = normed_477_cast_fp16)[name = string("op_9923_cast_fp16")]; tensor const_287_to_fp16 = const()[name = string("const_287_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2272587904)))]; tensor var_9926_cast_fp16 = mul(x = var_9923_cast_fp16_0, y = const_287_to_fp16)[name = string("op_9926_cast_fp16")]; tensor var_9934 = const()[name = string("op_9934"), val = tensor([0, 2, 1])]; tensor var_9937_axes_0 = const()[name = string("op_9937_axes_0"), val = tensor([2])]; tensor var_9935_cast_fp16 = transpose(perm = var_9934, x = var_9926_cast_fp16)[name = string("transpose_186")]; tensor var_9937_cast_fp16 = expand_dims(axes = var_9937_axes_0, x = var_9935_cast_fp16)[name = string("op_9937_cast_fp16")]; string var_9953_pad_type_0 = const()[name = string("op_9953_pad_type_0"), val = string("valid")]; tensor var_9953_strides_0 = const()[name = string("op_9953_strides_0"), val = tensor([1, 1])]; tensor var_9953_pad_0 = const()[name = string("op_9953_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9953_dilations_0 = const()[name = string("op_9953_dilations_0"), val = tensor([1, 1])]; int32 var_9953_groups_0 = const()[name = string("op_9953_groups_0"), val = int32(1)]; tensor var_9953 = conv(dilations = var_9953_dilations_0, groups = var_9953_groups_0, pad = var_9953_pad_0, pad_type = var_9953_pad_type_0, strides = var_9953_strides_0, weight = layers_12_self_attn_q_proj_weight_palettized, x = var_9937_cast_fp16)[name = string("op_9953")]; tensor var_9958 = const()[name = string("op_9958"), val = tensor([1, 8, 256, 1])]; tensor var_9959 = reshape(shape = var_9958, x = var_9953)[name = string("op_9959")]; tensor var_9964 = const()[name = string("op_9964"), val = tensor([0, 1, 3, 2])]; tensor var_9974 = const()[name = string("op_9974"), val = tensor([1, 8, 256])]; tensor var_9965 = transpose(perm = var_9964, x = var_9959)[name = string("transpose_185")]; tensor x_503 = reshape(shape = var_9974, x = var_9965)[name = string("x_503")]; int32 var_9980 = const()[name = string("op_9980"), val = int32(-1)]; fp16 const_288_promoted_to_fp16 = const()[name = string("const_288_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9986_cast_fp16 = mul(x = x_503, y = const_288_promoted_to_fp16)[name = string("op_9986_cast_fp16")]; bool input_429_interleave_0 = const()[name = string("input_429_interleave_0"), val = bool(false)]; tensor input_429_cast_fp16 = concat(axis = var_9980, interleave = input_429_interleave_0, values = (x_503, var_9986_cast_fp16))[name = string("input_429_cast_fp16")]; tensor normed_481_axes_0 = const()[name = string("normed_481_axes_0"), val = tensor([-1])]; fp16 var_9978_to_fp16 = const()[name = string("op_9978_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_481_cast_fp16 = layer_norm(axes = normed_481_axes_0, epsilon = var_9978_to_fp16, x = input_429_cast_fp16)[name = string("normed_481_cast_fp16")]; tensor var_9991_split_sizes_0 = const()[name = string("op_9991_split_sizes_0"), val = tensor([256, 256])]; int32 var_9991_axis_0 = const()[name = string("op_9991_axis_0"), val = int32(-1)]; tensor var_9991_cast_fp16_0, tensor var_9991_cast_fp16_1 = split(axis = var_9991_axis_0, split_sizes = var_9991_split_sizes_0, x = normed_481_cast_fp16)[name = string("op_9991_cast_fp16")]; tensor var_9994_cast_fp16 = mul(x = var_9991_cast_fp16_0, y = const_95_to_fp16)[name = string("op_9994_cast_fp16")]; tensor var_10000 = const()[name = string("op_10000"), val = tensor([1, 8, 1, 256])]; tensor q_99 = reshape(shape = var_10000, x = var_9994_cast_fp16)[name = string("q_99")]; tensor var_10002 = mul(x = q_99, y = cos_1)[name = string("op_10002")]; tensor var_10003_split_sizes_0 = const()[name = string("op_10003_split_sizes_0"), val = tensor([128, 128])]; int32 var_10003_axis_0 = const()[name = string("op_10003_axis_0"), val = int32(-1)]; tensor var_10003_0, tensor var_10003_1 = split(axis = var_10003_axis_0, split_sizes = var_10003_split_sizes_0, x = q_99)[name = string("op_10003")]; fp16 const_290_promoted = const()[name = string("const_290_promoted"), val = fp16(-0x1p+0)]; tensor var_10005 = mul(x = var_10003_1, y = const_290_promoted)[name = string("op_10005")]; int32 var_10007 = const()[name = string("op_10007"), val = int32(-1)]; bool var_10008_interleave_0 = const()[name = string("op_10008_interleave_0"), val = bool(false)]; tensor var_10008 = concat(axis = var_10007, interleave = var_10008_interleave_0, values = (var_10005, var_10003_0))[name = string("op_10008")]; tensor var_10009 = mul(x = var_10008, y = sin_1)[name = string("op_10009")]; tensor q_103 = add(x = var_10002, y = var_10009)[name = string("q_103")]; string var_10022_pad_type_0 = const()[name = string("op_10022_pad_type_0"), val = string("valid")]; tensor var_10022_strides_0 = const()[name = string("op_10022_strides_0"), val = tensor([1, 1])]; tensor var_10022_pad_0 = const()[name = string("op_10022_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_10022_dilations_0 = const()[name = string("op_10022_dilations_0"), val = tensor([1, 1])]; int32 var_10022_groups_0 = const()[name = string("op_10022_groups_0"), val = int32(1)]; tensor var_10022 = conv(dilations = var_10022_dilations_0, groups = var_10022_groups_0, pad = var_10022_pad_0, pad_type = var_10022_pad_type_0, strides = var_10022_strides_0, weight = layers_12_self_attn_k_proj_weight_palettized, x = var_9937_cast_fp16)[name = string("op_10022")]; tensor var_10027 = const()[name = string("op_10027"), val = tensor([1, 1, 256, 1])]; tensor var_10028 = reshape(shape = var_10027, x = var_10022)[name = string("op_10028")]; tensor var_10033 = const()[name = string("op_10033"), val = tensor([0, 1, 3, 2])]; string var_10050_pad_type_0 = const()[name = string("op_10050_pad_type_0"), val = string("valid")]; tensor var_10050_strides_0 = const()[name = string("op_10050_strides_0"), val = tensor([1, 1])]; tensor var_10050_pad_0 = const()[name = string("op_10050_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_10050_dilations_0 = const()[name = string("op_10050_dilations_0"), val = tensor([1, 1])]; int32 var_10050_groups_0 = const()[name = string("op_10050_groups_0"), val = int32(1)]; tensor var_10050 = conv(dilations = var_10050_dilations_0, groups = var_10050_groups_0, pad = var_10050_pad_0, pad_type = var_10050_pad_type_0, strides = var_10050_strides_0, weight = layers_12_self_attn_v_proj_weight_palettized, x = var_9937_cast_fp16)[name = string("op_10050")]; tensor var_10055 = const()[name = string("op_10055"), val = tensor([1, 1, 256, 1])]; tensor var_10056 = reshape(shape = var_10055, x = var_10050)[name = string("op_10056")]; tensor var_10061 = const()[name = string("op_10061"), val = tensor([0, 1, 3, 2])]; tensor var_10071 = const()[name = string("op_10071"), val = tensor([1, 1, 256])]; tensor var_10034 = transpose(perm = var_10033, x = var_10028)[name = string("transpose_184")]; tensor x_507 = reshape(shape = var_10071, x = var_10034)[name = string("x_507")]; int32 var_10077 = const()[name = string("op_10077"), val = int32(-1)]; fp16 const_291_promoted_to_fp16 = const()[name = string("const_291_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_10083_cast_fp16 = mul(x = x_507, y = const_291_promoted_to_fp16)[name = string("op_10083_cast_fp16")]; bool input_431_interleave_0 = const()[name = string("input_431_interleave_0"), val = bool(false)]; tensor input_431_cast_fp16 = concat(axis = var_10077, interleave = input_431_interleave_0, values = (x_507, var_10083_cast_fp16))[name = string("input_431_cast_fp16")]; tensor normed_485_axes_0 = const()[name = string("normed_485_axes_0"), val = tensor([-1])]; fp16 var_10075_to_fp16 = const()[name = string("op_10075_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_485_cast_fp16 = layer_norm(axes = normed_485_axes_0, epsilon = var_10075_to_fp16, x = input_431_cast_fp16)[name = string("normed_485_cast_fp16")]; tensor var_10088_split_sizes_0 = const()[name = string("op_10088_split_sizes_0"), val = tensor([256, 256])]; int32 var_10088_axis_0 = const()[name = string("op_10088_axis_0"), val = int32(-1)]; tensor var_10088_cast_fp16_0, tensor var_10088_cast_fp16_1 = split(axis = var_10088_axis_0, split_sizes = var_10088_split_sizes_0, x = normed_485_cast_fp16)[name = string("op_10088_cast_fp16")]; tensor const_292_to_fp16 = const()[name = string("const_292_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2272591040)))]; tensor var_10091_cast_fp16 = mul(x = var_10088_cast_fp16_0, y = const_292_to_fp16)[name = string("op_10091_cast_fp16")]; tensor var_10097 = const()[name = string("op_10097"), val = tensor([1, 1, 1, 256])]; tensor q_101 = reshape(shape = var_10097, x = var_10091_cast_fp16)[name = string("q_101")]; fp16 var_10104_promoted_to_fp16 = const()[name = string("op_10104_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_10062 = transpose(perm = var_10061, x = var_10056)[name = string("transpose_183")]; tensor var_10105_cast_fp16 = pow(x = var_10062, y = var_10104_promoted_to_fp16)[name = string("op_10105_cast_fp16")]; tensor var_10110_axes_0 = const()[name = string("op_10110_axes_0"), val = tensor([-1])]; bool var_10110_keep_dims_0 = const()[name = string("op_10110_keep_dims_0"), val = bool(true)]; tensor var_10110_cast_fp16 = reduce_mean(axes = var_10110_axes_0, keep_dims = var_10110_keep_dims_0, x = var_10105_cast_fp16)[name = string("op_10110_cast_fp16")]; fp16 var_10112_to_fp16 = const()[name = string("op_10112_to_fp16"), val = fp16(0x1.1p-20)]; tensor mean_sq_25_cast_fp16 = add(x = var_10110_cast_fp16, y = var_10112_to_fp16)[name = string("mean_sq_25_cast_fp16")]; fp16 var_10119_to_fp16 = const()[name = string("op_10119_to_fp16"), val = fp16(-0x1p-1)]; tensor var_10120_cast_fp16 = pow(x = mean_sq_25_cast_fp16, y = var_10119_to_fp16)[name = string("op_10120_cast_fp16")]; tensor var_10121_cast_fp16 = mul(x = var_10062, y = var_10120_cast_fp16)[name = string("op_10121_cast_fp16")]; tensor var_10127 = mul(x = q_101, y = cos_1)[name = string("op_10127")]; tensor var_10128_split_sizes_0 = const()[name = string("op_10128_split_sizes_0"), val = tensor([128, 128])]; int32 var_10128_axis_0 = const()[name = string("op_10128_axis_0"), val = int32(-1)]; tensor var_10128_0, tensor var_10128_1 = split(axis = var_10128_axis_0, split_sizes = var_10128_split_sizes_0, x = q_101)[name = string("op_10128")]; fp16 const_293_promoted = const()[name = string("const_293_promoted"), val = fp16(-0x1p+0)]; tensor var_10130 = mul(x = var_10128_1, y = const_293_promoted)[name = string("op_10130")]; int32 var_10132 = const()[name = string("op_10132"), val = int32(-1)]; bool var_10133_interleave_0 = const()[name = string("op_10133_interleave_0"), val = bool(false)]; tensor var_10133 = concat(axis = var_10132, interleave = var_10133_interleave_0, values = (var_10130, var_10128_0))[name = string("op_10133")]; tensor var_10134 = mul(x = var_10133, y = sin_1)[name = string("op_10134")]; tensor input_433 = add(x = var_10127, y = var_10134)[name = string("input_433")]; tensor var_10139_begin_0 = const()[name = string("op_10139_begin_0"), val = tensor([12, 0, 0, 0])]; tensor var_10139_end_0 = const()[name = string("op_10139_end_0"), val = tensor([13, 1, 512, 512])]; tensor var_10139_end_mask_0 = const()[name = string("op_10139_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_10139_squeeze_mask_0 = const()[name = string("op_10139_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_10139_cast_fp16 = slice_by_index(begin = var_10139_begin_0, end = var_10139_end_0, end_mask = var_10139_end_mask_0, squeeze_mask = var_10139_squeeze_mask_0, x = coreml_update_state_53)[name = string("op_10139_cast_fp16")]; tensor K_cache_25_axes_0 = const()[name = string("K_cache_25_axes_0"), val = tensor([0])]; tensor K_cache_25_cast_fp16 = expand_dims(axes = K_cache_25_axes_0, x = var_10139_cast_fp16)[name = string("K_cache_25_cast_fp16")]; tensor var_10144_begin_0 = const()[name = string("op_10144_begin_0"), val = tensor([47, 0, 0, 0])]; tensor var_10144_end_0 = const()[name = string("op_10144_end_0"), val = tensor([48, 1, 512, 512])]; tensor var_10144_end_mask_0 = const()[name = string("op_10144_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_10144_squeeze_mask_0 = const()[name = string("op_10144_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_10144_cast_fp16 = slice_by_index(begin = var_10144_begin_0, end = var_10144_end_0, end_mask = var_10144_end_mask_0, squeeze_mask = var_10144_squeeze_mask_0, x = coreml_update_state_53)[name = string("op_10144_cast_fp16")]; tensor V_cache_25_axes_0 = const()[name = string("V_cache_25_axes_0"), val = tensor([0])]; tensor V_cache_25_cast_fp16 = expand_dims(axes = V_cache_25_axes_0, x = var_10144_cast_fp16)[name = string("V_cache_25_cast_fp16")]; tensor k_padded_21_pad_0 = const()[name = string("k_padded_21_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; string k_padded_21_mode_0 = const()[name = string("k_padded_21_mode_0"), val = string("constant")]; fp16 const_294_to_fp16 = const()[name = string("const_294_to_fp16"), val = fp16(0x0p+0)]; tensor k_padded_21_cast_fp16 = pad(constant_val = const_294_to_fp16, mode = k_padded_21_mode_0, pad = k_padded_21_pad_0, x = input_433)[name = string("k_padded_21_cast_fp16")]; tensor v_padded_21_pad_0 = const()[name = string("v_padded_21_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; string v_padded_21_mode_0 = const()[name = string("v_padded_21_mode_0"), val = string("constant")]; fp16 const_295_to_fp16 = const()[name = string("const_295_to_fp16"), val = fp16(0x0p+0)]; tensor v_padded_21_cast_fp16 = pad(constant_val = const_295_to_fp16, mode = v_padded_21_mode_0, pad = v_padded_21_pad_0, x = var_10121_cast_fp16)[name = string("v_padded_21_cast_fp16")]; tensor var_10162_cast_fp16 = mul(x = K_cache_25_cast_fp16, y = var_3515_cast_fp16)[name = string("op_10162_cast_fp16")]; tensor var_10163_reps_0 = const()[name = string("op_10163_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_10163_cast_fp16 = tile(reps = var_10163_reps_0, x = k_padded_21_cast_fp16)[name = string("op_10163_cast_fp16")]; tensor var_10164_cast_fp16 = mul(x = var_10163_cast_fp16, y = update_mask)[name = string("op_10164_cast_fp16")]; tensor K_new_25_cast_fp16 = add(x = var_10162_cast_fp16, y = var_10164_cast_fp16)[name = string("K_new_25_cast_fp16")]; tensor var_10170_cast_fp16 = mul(x = V_cache_25_cast_fp16, y = var_3515_cast_fp16)[name = string("op_10170_cast_fp16")]; tensor var_10171_reps_0 = const()[name = string("op_10171_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_10171_cast_fp16 = tile(reps = var_10171_reps_0, x = v_padded_21_cast_fp16)[name = string("op_10171_cast_fp16")]; tensor var_10172_cast_fp16 = mul(x = var_10171_cast_fp16, y = update_mask)[name = string("op_10172_cast_fp16")]; tensor V_new_25_cast_fp16 = add(x = var_10170_cast_fp16, y = var_10172_cast_fp16)[name = string("V_new_25_cast_fp16")]; tensor var_10176_axes_0 = const()[name = string("op_10176_axes_0"), val = tensor([0])]; tensor var_10176_cast_fp16 = squeeze(axes = var_10176_axes_0, x = K_new_25_cast_fp16)[name = string("op_10176_cast_fp16")]; tensor concat_96 = const()[name = string("concat_96"), val = tensor([12, 0, 0, 0])]; tensor concat_97 = const()[name = string("concat_97"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_25_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_25_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_25_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_96, begin_mask = kv_cache_0_internal_tensor_assign_25_begin_mask_0, end = concat_97, end_mask = kv_cache_0_internal_tensor_assign_25_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_25_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_25_stride_0, update = var_10176_cast_fp16, x = coreml_update_state_53)[name = string("kv_cache_0_internal_tensor_assign_25_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_25_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_54_write_state")]; tensor coreml_update_state_54 = read_state(input = kv_cache_0)[name = string("coreml_update_state_54")]; tensor var_10183_axes_0 = const()[name = string("op_10183_axes_0"), val = tensor([0])]; tensor var_10183_cast_fp16 = squeeze(axes = var_10183_axes_0, x = V_new_25_cast_fp16)[name = string("op_10183_cast_fp16")]; tensor concat_98 = const()[name = string("concat_98"), val = tensor([47, 0, 0, 0])]; tensor concat_99 = const()[name = string("concat_99"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_26_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_26_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_26_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_98, begin_mask = kv_cache_0_internal_tensor_assign_26_begin_mask_0, end = concat_99, end_mask = kv_cache_0_internal_tensor_assign_26_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_26_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_26_stride_0, update = var_10183_cast_fp16, x = coreml_update_state_54)[name = string("kv_cache_0_internal_tensor_assign_26_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_26_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_55_write_state")]; tensor coreml_update_state_55 = read_state(input = kv_cache_0)[name = string("coreml_update_state_55")]; tensor K_for_attn_25_begin_0 = const()[name = string("K_for_attn_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor K_for_attn_25_end_0 = const()[name = string("K_for_attn_25_end_0"), val = tensor([1, 1, 512, 256])]; tensor K_for_attn_25_end_mask_0 = const()[name = string("K_for_attn_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor K_for_attn_25_cast_fp16 = slice_by_index(begin = K_for_attn_25_begin_0, end = K_for_attn_25_end_0, end_mask = K_for_attn_25_end_mask_0, x = K_new_25_cast_fp16)[name = string("K_for_attn_25_cast_fp16")]; tensor V_for_attn_25_begin_0 = const()[name = string("V_for_attn_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor V_for_attn_25_end_0 = const()[name = string("V_for_attn_25_end_0"), val = tensor([1, 1, 512, 256])]; tensor V_for_attn_25_end_mask_0 = const()[name = string("V_for_attn_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor V_for_attn_25_cast_fp16 = slice_by_index(begin = V_for_attn_25_begin_0, end = V_for_attn_25_end_0, end_mask = V_for_attn_25_end_mask_0, x = V_new_25_cast_fp16)[name = string("V_for_attn_25_cast_fp16")]; tensor transpose_48_perm_0 = const()[name = string("transpose_48_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_24_reps_0 = const()[name = string("tile_24_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_48_cast_fp16 = transpose(perm = transpose_48_perm_0, x = K_for_attn_25_cast_fp16)[name = string("transpose_182")]; tensor tile_24_cast_fp16 = tile(reps = tile_24_reps_0, x = transpose_48_cast_fp16)[name = string("tile_24_cast_fp16")]; tensor concat_100 = const()[name = string("concat_100"), val = tensor([8, 1, 1, 512, 256])]; tensor reshape_48_cast_fp16 = reshape(shape = concat_100, x = tile_24_cast_fp16)[name = string("reshape_48_cast_fp16")]; tensor transpose_49_perm_0 = const()[name = string("transpose_49_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_101 = const()[name = string("concat_101"), val = tensor([-1, 1, 512, 256])]; tensor transpose_49_cast_fp16 = transpose(perm = transpose_49_perm_0, x = reshape_48_cast_fp16)[name = string("transpose_181")]; tensor reshape_49_cast_fp16 = reshape(shape = concat_101, x = transpose_49_cast_fp16)[name = string("reshape_49_cast_fp16")]; tensor transpose_152_perm_0 = const()[name = string("transpose_152_perm_0"), val = tensor([1, 0, -1, -2])]; tensor transpose_50_perm_0 = const()[name = string("transpose_50_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_25_reps_0 = const()[name = string("tile_25_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_50_cast_fp16 = transpose(perm = transpose_50_perm_0, x = V_for_attn_25_cast_fp16)[name = string("transpose_180")]; tensor tile_25_cast_fp16 = tile(reps = tile_25_reps_0, x = transpose_50_cast_fp16)[name = string("tile_25_cast_fp16")]; tensor concat_102 = const()[name = string("concat_102"), val = tensor([8, 1, 1, 512, 256])]; tensor reshape_50_cast_fp16 = reshape(shape = concat_102, x = tile_25_cast_fp16)[name = string("reshape_50_cast_fp16")]; tensor transpose_51_perm_0 = const()[name = string("transpose_51_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_103 = const()[name = string("concat_103"), val = tensor([-1, 1, 512, 256])]; tensor transpose_51_cast_fp16 = transpose(perm = transpose_51_perm_0, x = reshape_50_cast_fp16)[name = string("transpose_179")]; tensor reshape_51_cast_fp16 = reshape(shape = concat_103, x = transpose_51_cast_fp16)[name = string("reshape_51_cast_fp16")]; tensor V_expanded_25_perm_0 = const()[name = string("V_expanded_25_perm_0"), val = tensor([1, 0, -2, -1])]; bool var_10220_transpose_x_0 = const()[name = string("op_10220_transpose_x_0"), val = bool(false)]; bool var_10220_transpose_y_0 = const()[name = string("op_10220_transpose_y_0"), val = bool(false)]; tensor transpose_152_cast_fp16 = transpose(perm = transpose_152_perm_0, x = reshape_49_cast_fp16)[name = string("transpose_178")]; tensor var_10220_cast_fp16 = matmul(transpose_x = var_10220_transpose_x_0, transpose_y = var_10220_transpose_y_0, x = q_103, y = transpose_152_cast_fp16)[name = string("op_10220_cast_fp16")]; tensor attn_weights_75_cast_fp16 = add(x = var_10220_cast_fp16, y = causal_mask)[name = string("attn_weights_75_cast_fp16")]; int32 var_10230 = const()[name = string("op_10230"), val = int32(-1)]; tensor var_10232_cast_fp16 = softmax(axis = var_10230, x = attn_weights_75_cast_fp16)[name = string("op_10232_cast_fp16")]; bool var_10248_transpose_x_0 = const()[name = string("op_10248_transpose_x_0"), val = bool(false)]; bool var_10248_transpose_y_0 = const()[name = string("op_10248_transpose_y_0"), val = bool(false)]; tensor V_expanded_25_cast_fp16 = transpose(perm = V_expanded_25_perm_0, x = reshape_51_cast_fp16)[name = string("transpose_177")]; tensor var_10248_cast_fp16 = matmul(transpose_x = var_10248_transpose_x_0, transpose_y = var_10248_transpose_y_0, x = var_10232_cast_fp16, y = V_expanded_25_cast_fp16)[name = string("op_10248_cast_fp16")]; tensor var_10258 = const()[name = string("op_10258"), val = tensor([0, 2, 1, 3])]; tensor var_10265 = const()[name = string("op_10265"), val = tensor([1, 1, -1])]; tensor var_10259 = transpose(perm = var_10258, x = var_10248_cast_fp16)[name = string("transpose_176")]; tensor attn_output_75 = reshape(shape = var_10265, x = var_10259)[name = string("attn_output_75")]; tensor var_10270 = const()[name = string("op_10270"), val = tensor([0, 2, 1])]; tensor squeeze_12_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2272591616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2274164544))))[name = string("squeeze_12_palettized")]; string var_10286_pad_type_0 = const()[name = string("op_10286_pad_type_0"), val = string("valid")]; int32 var_10286_groups_0 = const()[name = string("op_10286_groups_0"), val = int32(1)]; tensor var_10286_strides_0 = const()[name = string("op_10286_strides_0"), val = tensor([1])]; tensor var_10286_pad_0 = const()[name = string("op_10286_pad_0"), val = tensor([0, 0])]; tensor var_10286_dilations_0 = const()[name = string("op_10286_dilations_0"), val = tensor([1])]; tensor var_10271 = transpose(perm = var_10270, x = attn_output_75)[name = string("transpose_175")]; tensor var_10286 = conv(dilations = var_10286_dilations_0, groups = var_10286_groups_0, pad = var_10286_pad_0, pad_type = var_10286_pad_type_0, strides = var_10286_strides_0, weight = squeeze_12_palettized, x = var_10271)[name = string("op_10286")]; tensor var_10290 = const()[name = string("op_10290"), val = tensor([0, 2, 1])]; int32 var_10296 = const()[name = string("op_10296"), val = int32(-1)]; fp16 const_296_promoted_to_fp16 = const()[name = string("const_296_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_513 = transpose(perm = var_10290, x = var_10286)[name = string("transpose_174")]; tensor var_10302_cast_fp16 = mul(x = x_513, y = const_296_promoted_to_fp16)[name = string("op_10302_cast_fp16")]; bool input_439_interleave_0 = const()[name = string("input_439_interleave_0"), val = bool(false)]; tensor input_439_cast_fp16 = concat(axis = var_10296, interleave = input_439_interleave_0, values = (x_513, var_10302_cast_fp16))[name = string("input_439_cast_fp16")]; tensor normed_489_axes_0 = const()[name = string("normed_489_axes_0"), val = tensor([-1])]; fp16 var_10294_to_fp16 = const()[name = string("op_10294_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_489_cast_fp16 = layer_norm(axes = normed_489_axes_0, epsilon = var_10294_to_fp16, x = input_439_cast_fp16)[name = string("normed_489_cast_fp16")]; tensor var_10307_split_sizes_0 = const()[name = string("op_10307_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_10307_axis_0 = const()[name = string("op_10307_axis_0"), val = int32(-1)]; tensor var_10307_cast_fp16_0, tensor var_10307_cast_fp16_1 = split(axis = var_10307_axis_0, split_sizes = var_10307_split_sizes_0, x = normed_489_cast_fp16)[name = string("op_10307_cast_fp16")]; tensor const_297_to_fp16 = const()[name = string("const_297_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2274166144)))]; tensor var_10310_cast_fp16 = mul(x = var_10307_cast_fp16_0, y = const_297_to_fp16)[name = string("op_10310_cast_fp16")]; tensor x_517_cast_fp16 = add(x = x_499_cast_fp16, y = var_10310_cast_fp16)[name = string("x_517_cast_fp16")]; int32 var_10317 = const()[name = string("op_10317"), val = int32(-1)]; fp16 const_298_promoted_to_fp16 = const()[name = string("const_298_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_10323_cast_fp16 = mul(x = x_517_cast_fp16, y = const_298_promoted_to_fp16)[name = string("op_10323_cast_fp16")]; bool input_441_interleave_0 = const()[name = string("input_441_interleave_0"), val = bool(false)]; tensor input_441_cast_fp16 = concat(axis = var_10317, interleave = input_441_interleave_0, values = (x_517_cast_fp16, var_10323_cast_fp16))[name = string("input_441_cast_fp16")]; tensor normed_493_axes_0 = const()[name = string("normed_493_axes_0"), val = tensor([-1])]; fp16 var_10315_to_fp16 = const()[name = string("op_10315_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_493_cast_fp16 = layer_norm(axes = normed_493_axes_0, epsilon = var_10315_to_fp16, x = input_441_cast_fp16)[name = string("normed_493_cast_fp16")]; tensor var_10328_split_sizes_0 = const()[name = string("op_10328_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_10328_axis_0 = const()[name = string("op_10328_axis_0"), val = int32(-1)]; tensor var_10328_cast_fp16_0, tensor var_10328_cast_fp16_1 = split(axis = var_10328_axis_0, split_sizes = var_10328_split_sizes_0, x = normed_493_cast_fp16)[name = string("op_10328_cast_fp16")]; tensor const_299_to_fp16 = const()[name = string("const_299_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2274169280)))]; tensor var_10331_cast_fp16 = mul(x = var_10328_cast_fp16_0, y = const_299_to_fp16)[name = string("op_10331_cast_fp16")]; tensor var_10344 = const()[name = string("op_10344"), val = tensor([0, 2, 1])]; tensor input_443_axes_0 = const()[name = string("input_443_axes_0"), val = tensor([2])]; tensor var_10345 = transpose(perm = var_10344, x = var_10331_cast_fp16)[name = string("transpose_173")]; tensor input_443 = expand_dims(axes = input_443_axes_0, x = var_10345)[name = string("input_443")]; string gate_49_pad_type_0 = const()[name = string("gate_49_pad_type_0"), val = string("valid")]; tensor gate_49_strides_0 = const()[name = string("gate_49_strides_0"), val = tensor([1, 1])]; tensor gate_49_pad_0 = const()[name = string("gate_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_49_dilations_0 = const()[name = string("gate_49_dilations_0"), val = tensor([1, 1])]; int32 gate_49_groups_0 = const()[name = string("gate_49_groups_0"), val = int32(1)]; tensor gate_49 = conv(dilations = gate_49_dilations_0, groups = gate_49_groups_0, pad = gate_49_pad_0, pad_type = gate_49_pad_type_0, strides = gate_49_strides_0, weight = layers_12_mlp_gate_proj_weight_palettized, x = input_443)[name = string("gate_49")]; string up_25_pad_type_0 = const()[name = string("up_25_pad_type_0"), val = string("valid")]; tensor up_25_strides_0 = const()[name = string("up_25_strides_0"), val = tensor([1, 1])]; tensor up_25_pad_0 = const()[name = string("up_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_25_dilations_0 = const()[name = string("up_25_dilations_0"), val = tensor([1, 1])]; int32 up_25_groups_0 = const()[name = string("up_25_groups_0"), val = int32(1)]; tensor up_25 = conv(dilations = up_25_dilations_0, groups = up_25_groups_0, pad = up_25_pad_0, pad_type = up_25_pad_type_0, strides = up_25_strides_0, weight = layers_12_mlp_up_proj_weight_palettized, x = input_443)[name = string("up_25")]; string gate_51_mode_0 = const()[name = string("gate_51_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_51 = gelu(mode = gate_51_mode_0, x = gate_49)[name = string("gate_51")]; tensor input_445 = mul(x = gate_51, y = up_25)[name = string("input_445")]; string mlp_out_25_pad_type_0 = const()[name = string("mlp_out_25_pad_type_0"), val = string("valid")]; tensor mlp_out_25_strides_0 = const()[name = string("mlp_out_25_strides_0"), val = tensor([1, 1])]; tensor mlp_out_25_pad_0 = const()[name = string("mlp_out_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_25_dilations_0 = const()[name = string("mlp_out_25_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_25_groups_0 = const()[name = string("mlp_out_25_groups_0"), val = int32(1)]; tensor mlp_out_25 = conv(dilations = mlp_out_25_dilations_0, groups = mlp_out_25_groups_0, pad = mlp_out_25_pad_0, pad_type = mlp_out_25_pad_type_0, strides = mlp_out_25_strides_0, weight = layers_12_mlp_down_proj_weight_palettized, x = input_445)[name = string("mlp_out_25")]; tensor var_10385_axes_0 = const()[name = string("op_10385_axes_0"), val = tensor([2])]; tensor var_10385 = squeeze(axes = var_10385_axes_0, x = mlp_out_25)[name = string("op_10385")]; tensor var_10389 = const()[name = string("op_10389"), val = tensor([0, 2, 1])]; int32 var_10395 = const()[name = string("op_10395"), val = int32(-1)]; fp16 const_300_promoted_to_fp16 = const()[name = string("const_300_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_521 = transpose(perm = var_10389, x = var_10385)[name = string("transpose_172")]; tensor var_10401_cast_fp16 = mul(x = x_521, y = const_300_promoted_to_fp16)[name = string("op_10401_cast_fp16")]; bool input_447_interleave_0 = const()[name = string("input_447_interleave_0"), val = bool(false)]; tensor input_447_cast_fp16 = concat(axis = var_10395, interleave = input_447_interleave_0, values = (x_521, var_10401_cast_fp16))[name = string("input_447_cast_fp16")]; tensor normed_497_axes_0 = const()[name = string("normed_497_axes_0"), val = tensor([-1])]; fp16 var_10393_to_fp16 = const()[name = string("op_10393_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_497_cast_fp16 = layer_norm(axes = normed_497_axes_0, epsilon = var_10393_to_fp16, x = input_447_cast_fp16)[name = string("normed_497_cast_fp16")]; tensor var_10406_split_sizes_0 = const()[name = string("op_10406_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_10406_axis_0 = const()[name = string("op_10406_axis_0"), val = int32(-1)]; tensor var_10406_cast_fp16_0, tensor var_10406_cast_fp16_1 = split(axis = var_10406_axis_0, split_sizes = var_10406_split_sizes_0, x = normed_497_cast_fp16)[name = string("op_10406_cast_fp16")]; tensor const_301_to_fp16 = const()[name = string("const_301_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2274172416)))]; tensor var_10409_cast_fp16 = mul(x = var_10406_cast_fp16_0, y = const_301_to_fp16)[name = string("op_10409_cast_fp16")]; tensor hidden_states_157_cast_fp16 = add(x = x_517_cast_fp16, y = var_10409_cast_fp16)[name = string("hidden_states_157_cast_fp16")]; tensor per_layer_slice_25_begin_0 = const()[name = string("per_layer_slice_25_begin_0"), val = tensor([0, 0, 3072])]; tensor per_layer_slice_25_end_0 = const()[name = string("per_layer_slice_25_end_0"), val = tensor([1, 1, 3328])]; tensor per_layer_slice_25_end_mask_0 = const()[name = string("per_layer_slice_25_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_25 = slice_by_index(begin = per_layer_slice_25_begin_0, end = per_layer_slice_25_end_0, end_mask = per_layer_slice_25_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_25")]; tensor gated_49 = linear(bias = linear_1_bias_0, weight = layers_12_per_layer_input_gate_weight_palettized, x = hidden_states_157_cast_fp16)[name = string("linear_25")]; string gated_51_mode_0 = const()[name = string("gated_51_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_51 = gelu(mode = gated_51_mode_0, x = gated_49)[name = string("gated_51")]; tensor input_451 = mul(x = gated_51, y = per_layer_slice_25)[name = string("input_451")]; tensor x_525 = linear(bias = linear_2_bias_0, weight = layers_12_per_layer_projection_weight_palettized, x = input_451)[name = string("linear_26")]; int32 var_10446 = const()[name = string("op_10446"), val = int32(-1)]; fp16 const_302_promoted_to_fp16 = const()[name = string("const_302_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_10452_cast_fp16 = mul(x = x_525, y = const_302_promoted_to_fp16)[name = string("op_10452_cast_fp16")]; bool input_453_interleave_0 = const()[name = string("input_453_interleave_0"), val = bool(false)]; tensor input_453_cast_fp16 = concat(axis = var_10446, interleave = input_453_interleave_0, values = (x_525, var_10452_cast_fp16))[name = string("input_453_cast_fp16")]; tensor normed_501_axes_0 = const()[name = string("normed_501_axes_0"), val = tensor([-1])]; fp16 var_10444_to_fp16 = const()[name = string("op_10444_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_501_cast_fp16 = layer_norm(axes = normed_501_axes_0, epsilon = var_10444_to_fp16, x = input_453_cast_fp16)[name = string("normed_501_cast_fp16")]; tensor var_10457_split_sizes_0 = const()[name = string("op_10457_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_10457_axis_0 = const()[name = string("op_10457_axis_0"), val = int32(-1)]; tensor var_10457_cast_fp16_0, tensor var_10457_cast_fp16_1 = split(axis = var_10457_axis_0, split_sizes = var_10457_split_sizes_0, x = normed_501_cast_fp16)[name = string("op_10457_cast_fp16")]; tensor const_303_to_fp16 = const()[name = string("const_303_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2274175552)))]; tensor var_10460_cast_fp16 = mul(x = var_10457_cast_fp16_0, y = const_303_to_fp16)[name = string("op_10460_cast_fp16")]; tensor hidden_states_161_cast_fp16 = add(x = hidden_states_157_cast_fp16, y = var_10460_cast_fp16)[name = string("hidden_states_161_cast_fp16")]; tensor layers_12_layer_scalar_to_fp16 = const()[name = string("layers_12_layer_scalar_to_fp16"), val = tensor([0x1.4cp-2])]; tensor x_529_cast_fp16 = mul(x = hidden_states_161_cast_fp16, y = layers_12_layer_scalar_to_fp16)[name = string("x_529_cast_fp16")]; int32 var_10468 = const()[name = string("op_10468"), val = int32(-1)]; fp16 const_304_promoted_to_fp16 = const()[name = string("const_304_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_10474_cast_fp16 = mul(x = x_529_cast_fp16, y = const_304_promoted_to_fp16)[name = string("op_10474_cast_fp16")]; bool input_455_interleave_0 = const()[name = string("input_455_interleave_0"), val = bool(false)]; tensor input_455_cast_fp16 = concat(axis = var_10468, interleave = input_455_interleave_0, values = (x_529_cast_fp16, var_10474_cast_fp16))[name = string("input_455_cast_fp16")]; tensor normed_505_axes_0 = const()[name = string("normed_505_axes_0"), val = tensor([-1])]; fp16 var_10466_to_fp16 = const()[name = string("op_10466_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_505_cast_fp16 = layer_norm(axes = normed_505_axes_0, epsilon = var_10466_to_fp16, x = input_455_cast_fp16)[name = string("normed_505_cast_fp16")]; tensor var_10479_split_sizes_0 = const()[name = string("op_10479_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_10479_axis_0 = const()[name = string("op_10479_axis_0"), val = int32(-1)]; tensor var_10479_cast_fp16_0, tensor var_10479_cast_fp16_1 = split(axis = var_10479_axis_0, split_sizes = var_10479_split_sizes_0, x = normed_505_cast_fp16)[name = string("op_10479_cast_fp16")]; tensor const_305_to_fp16 = const()[name = string("const_305_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2274178688)))]; tensor var_10482_cast_fp16 = mul(x = var_10479_cast_fp16_0, y = const_305_to_fp16)[name = string("op_10482_cast_fp16")]; tensor var_10490 = const()[name = string("op_10490"), val = tensor([0, 2, 1])]; tensor var_10493_axes_0 = const()[name = string("op_10493_axes_0"), val = tensor([2])]; tensor var_10491_cast_fp16 = transpose(perm = var_10490, x = var_10482_cast_fp16)[name = string("transpose_171")]; tensor var_10493_cast_fp16 = expand_dims(axes = var_10493_axes_0, x = var_10491_cast_fp16)[name = string("op_10493_cast_fp16")]; string var_10509_pad_type_0 = const()[name = string("op_10509_pad_type_0"), val = string("valid")]; tensor var_10509_strides_0 = const()[name = string("op_10509_strides_0"), val = tensor([1, 1])]; tensor var_10509_pad_0 = const()[name = string("op_10509_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_10509_dilations_0 = const()[name = string("op_10509_dilations_0"), val = tensor([1, 1])]; int32 var_10509_groups_0 = const()[name = string("op_10509_groups_0"), val = int32(1)]; tensor var_10509 = conv(dilations = var_10509_dilations_0, groups = var_10509_groups_0, pad = var_10509_pad_0, pad_type = var_10509_pad_type_0, strides = var_10509_strides_0, weight = layers_13_self_attn_q_proj_weight_palettized, x = var_10493_cast_fp16)[name = string("op_10509")]; tensor var_10514 = const()[name = string("op_10514"), val = tensor([1, 8, 256, 1])]; tensor var_10515 = reshape(shape = var_10514, x = var_10509)[name = string("op_10515")]; tensor var_10520 = const()[name = string("op_10520"), val = tensor([0, 1, 3, 2])]; tensor var_10530 = const()[name = string("op_10530"), val = tensor([1, 8, 256])]; tensor var_10521 = transpose(perm = var_10520, x = var_10515)[name = string("transpose_170")]; tensor x_533 = reshape(shape = var_10530, x = var_10521)[name = string("x_533")]; int32 var_10536 = const()[name = string("op_10536"), val = int32(-1)]; fp16 const_306_promoted_to_fp16 = const()[name = string("const_306_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_10542_cast_fp16 = mul(x = x_533, y = const_306_promoted_to_fp16)[name = string("op_10542_cast_fp16")]; bool input_459_interleave_0 = const()[name = string("input_459_interleave_0"), val = bool(false)]; tensor input_459_cast_fp16 = concat(axis = var_10536, interleave = input_459_interleave_0, values = (x_533, var_10542_cast_fp16))[name = string("input_459_cast_fp16")]; tensor normed_509_axes_0 = const()[name = string("normed_509_axes_0"), val = tensor([-1])]; fp16 var_10534_to_fp16 = const()[name = string("op_10534_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_509_cast_fp16 = layer_norm(axes = normed_509_axes_0, epsilon = var_10534_to_fp16, x = input_459_cast_fp16)[name = string("normed_509_cast_fp16")]; tensor var_10547_split_sizes_0 = const()[name = string("op_10547_split_sizes_0"), val = tensor([256, 256])]; int32 var_10547_axis_0 = const()[name = string("op_10547_axis_0"), val = int32(-1)]; tensor var_10547_cast_fp16_0, tensor var_10547_cast_fp16_1 = split(axis = var_10547_axis_0, split_sizes = var_10547_split_sizes_0, x = normed_509_cast_fp16)[name = string("op_10547_cast_fp16")]; tensor const_307_to_fp16 = const()[name = string("const_307_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2274181824)))]; tensor var_10550_cast_fp16 = mul(x = var_10547_cast_fp16_0, y = const_307_to_fp16)[name = string("op_10550_cast_fp16")]; tensor var_10556 = const()[name = string("op_10556"), val = tensor([1, 8, 1, 256])]; tensor q_107 = reshape(shape = var_10556, x = var_10550_cast_fp16)[name = string("q_107")]; tensor var_10558 = mul(x = q_107, y = cos_1)[name = string("op_10558")]; tensor var_10559_split_sizes_0 = const()[name = string("op_10559_split_sizes_0"), val = tensor([128, 128])]; int32 var_10559_axis_0 = const()[name = string("op_10559_axis_0"), val = int32(-1)]; tensor var_10559_0, tensor var_10559_1 = split(axis = var_10559_axis_0, split_sizes = var_10559_split_sizes_0, x = q_107)[name = string("op_10559")]; fp16 const_308_promoted = const()[name = string("const_308_promoted"), val = fp16(-0x1p+0)]; tensor var_10561 = mul(x = var_10559_1, y = const_308_promoted)[name = string("op_10561")]; int32 var_10563 = const()[name = string("op_10563"), val = int32(-1)]; bool var_10564_interleave_0 = const()[name = string("op_10564_interleave_0"), val = bool(false)]; tensor var_10564 = concat(axis = var_10563, interleave = var_10564_interleave_0, values = (var_10561, var_10559_0))[name = string("op_10564")]; tensor var_10565 = mul(x = var_10564, y = sin_1)[name = string("op_10565")]; tensor q_111 = add(x = var_10558, y = var_10565)[name = string("q_111")]; string var_10578_pad_type_0 = const()[name = string("op_10578_pad_type_0"), val = string("valid")]; tensor var_10578_strides_0 = const()[name = string("op_10578_strides_0"), val = tensor([1, 1])]; tensor var_10578_pad_0 = const()[name = string("op_10578_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_10578_dilations_0 = const()[name = string("op_10578_dilations_0"), val = tensor([1, 1])]; int32 var_10578_groups_0 = const()[name = string("op_10578_groups_0"), val = int32(1)]; tensor var_10578 = conv(dilations = var_10578_dilations_0, groups = var_10578_groups_0, pad = var_10578_pad_0, pad_type = var_10578_pad_type_0, strides = var_10578_strides_0, weight = layers_13_self_attn_k_proj_weight_palettized, x = var_10493_cast_fp16)[name = string("op_10578")]; tensor var_10583 = const()[name = string("op_10583"), val = tensor([1, 1, 256, 1])]; tensor var_10584 = reshape(shape = var_10583, x = var_10578)[name = string("op_10584")]; tensor var_10589 = const()[name = string("op_10589"), val = tensor([0, 1, 3, 2])]; string var_10606_pad_type_0 = const()[name = string("op_10606_pad_type_0"), val = string("valid")]; tensor var_10606_strides_0 = const()[name = string("op_10606_strides_0"), val = tensor([1, 1])]; tensor var_10606_pad_0 = const()[name = string("op_10606_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_10606_dilations_0 = const()[name = string("op_10606_dilations_0"), val = tensor([1, 1])]; int32 var_10606_groups_0 = const()[name = string("op_10606_groups_0"), val = int32(1)]; tensor var_10606 = conv(dilations = var_10606_dilations_0, groups = var_10606_groups_0, pad = var_10606_pad_0, pad_type = var_10606_pad_type_0, strides = var_10606_strides_0, weight = layers_13_self_attn_v_proj_weight_palettized, x = var_10493_cast_fp16)[name = string("op_10606")]; tensor var_10611 = const()[name = string("op_10611"), val = tensor([1, 1, 256, 1])]; tensor var_10612 = reshape(shape = var_10611, x = var_10606)[name = string("op_10612")]; tensor var_10617 = const()[name = string("op_10617"), val = tensor([0, 1, 3, 2])]; tensor var_10627 = const()[name = string("op_10627"), val = tensor([1, 1, 256])]; tensor var_10590 = transpose(perm = var_10589, x = var_10584)[name = string("transpose_169")]; tensor x_537 = reshape(shape = var_10627, x = var_10590)[name = string("x_537")]; int32 var_10633 = const()[name = string("op_10633"), val = int32(-1)]; fp16 const_309_promoted_to_fp16 = const()[name = string("const_309_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_10639_cast_fp16 = mul(x = x_537, y = const_309_promoted_to_fp16)[name = string("op_10639_cast_fp16")]; bool input_461_interleave_0 = const()[name = string("input_461_interleave_0"), val = bool(false)]; tensor input_461_cast_fp16 = concat(axis = var_10633, interleave = input_461_interleave_0, values = (x_537, var_10639_cast_fp16))[name = string("input_461_cast_fp16")]; tensor normed_513_axes_0 = const()[name = string("normed_513_axes_0"), val = tensor([-1])]; fp16 var_10631_to_fp16 = const()[name = string("op_10631_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_513_cast_fp16 = layer_norm(axes = normed_513_axes_0, epsilon = var_10631_to_fp16, x = input_461_cast_fp16)[name = string("normed_513_cast_fp16")]; tensor var_10644_split_sizes_0 = const()[name = string("op_10644_split_sizes_0"), val = tensor([256, 256])]; int32 var_10644_axis_0 = const()[name = string("op_10644_axis_0"), val = int32(-1)]; tensor var_10644_cast_fp16_0, tensor var_10644_cast_fp16_1 = split(axis = var_10644_axis_0, split_sizes = var_10644_split_sizes_0, x = normed_513_cast_fp16)[name = string("op_10644_cast_fp16")]; tensor const_310_to_fp16 = const()[name = string("const_310_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2274182400)))]; tensor var_10647_cast_fp16 = mul(x = var_10644_cast_fp16_0, y = const_310_to_fp16)[name = string("op_10647_cast_fp16")]; tensor var_10653 = const()[name = string("op_10653"), val = tensor([1, 1, 1, 256])]; tensor q_109 = reshape(shape = var_10653, x = var_10647_cast_fp16)[name = string("q_109")]; fp16 var_10660_promoted_to_fp16 = const()[name = string("op_10660_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_10618 = transpose(perm = var_10617, x = var_10612)[name = string("transpose_168")]; tensor var_10661_cast_fp16 = pow(x = var_10618, y = var_10660_promoted_to_fp16)[name = string("op_10661_cast_fp16")]; tensor var_10666_axes_0 = const()[name = string("op_10666_axes_0"), val = tensor([-1])]; bool var_10666_keep_dims_0 = const()[name = string("op_10666_keep_dims_0"), val = bool(true)]; tensor var_10666_cast_fp16 = reduce_mean(axes = var_10666_axes_0, keep_dims = var_10666_keep_dims_0, x = var_10661_cast_fp16)[name = string("op_10666_cast_fp16")]; fp16 var_10668_to_fp16 = const()[name = string("op_10668_to_fp16"), val = fp16(0x1.1p-20)]; tensor mean_sq_27_cast_fp16 = add(x = var_10666_cast_fp16, y = var_10668_to_fp16)[name = string("mean_sq_27_cast_fp16")]; fp16 var_10675_to_fp16 = const()[name = string("op_10675_to_fp16"), val = fp16(-0x1p-1)]; tensor var_10676_cast_fp16 = pow(x = mean_sq_27_cast_fp16, y = var_10675_to_fp16)[name = string("op_10676_cast_fp16")]; tensor var_10677_cast_fp16 = mul(x = var_10618, y = var_10676_cast_fp16)[name = string("op_10677_cast_fp16")]; tensor var_10683 = mul(x = q_109, y = cos_1)[name = string("op_10683")]; tensor var_10684_split_sizes_0 = const()[name = string("op_10684_split_sizes_0"), val = tensor([128, 128])]; int32 var_10684_axis_0 = const()[name = string("op_10684_axis_0"), val = int32(-1)]; tensor var_10684_0, tensor var_10684_1 = split(axis = var_10684_axis_0, split_sizes = var_10684_split_sizes_0, x = q_109)[name = string("op_10684")]; fp16 const_311_promoted = const()[name = string("const_311_promoted"), val = fp16(-0x1p+0)]; tensor var_10686 = mul(x = var_10684_1, y = const_311_promoted)[name = string("op_10686")]; int32 var_10688 = const()[name = string("op_10688"), val = int32(-1)]; bool var_10689_interleave_0 = const()[name = string("op_10689_interleave_0"), val = bool(false)]; tensor var_10689 = concat(axis = var_10688, interleave = var_10689_interleave_0, values = (var_10686, var_10684_0))[name = string("op_10689")]; tensor var_10690 = mul(x = var_10689, y = sin_1)[name = string("op_10690")]; tensor input_463 = add(x = var_10683, y = var_10690)[name = string("input_463")]; tensor var_10695_begin_0 = const()[name = string("op_10695_begin_0"), val = tensor([13, 0, 0, 0])]; tensor var_10695_end_0 = const()[name = string("op_10695_end_0"), val = tensor([14, 1, 512, 512])]; tensor var_10695_end_mask_0 = const()[name = string("op_10695_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_10695_squeeze_mask_0 = const()[name = string("op_10695_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_10695_cast_fp16 = slice_by_index(begin = var_10695_begin_0, end = var_10695_end_0, end_mask = var_10695_end_mask_0, squeeze_mask = var_10695_squeeze_mask_0, x = coreml_update_state_55)[name = string("op_10695_cast_fp16")]; tensor K_cache_27_axes_0 = const()[name = string("K_cache_27_axes_0"), val = tensor([0])]; tensor K_cache_27_cast_fp16 = expand_dims(axes = K_cache_27_axes_0, x = var_10695_cast_fp16)[name = string("K_cache_27_cast_fp16")]; tensor var_10700_begin_0 = const()[name = string("op_10700_begin_0"), val = tensor([48, 0, 0, 0])]; tensor var_10700_end_0 = const()[name = string("op_10700_end_0"), val = tensor([49, 1, 512, 512])]; tensor var_10700_end_mask_0 = const()[name = string("op_10700_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_10700_squeeze_mask_0 = const()[name = string("op_10700_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_10700_cast_fp16 = slice_by_index(begin = var_10700_begin_0, end = var_10700_end_0, end_mask = var_10700_end_mask_0, squeeze_mask = var_10700_squeeze_mask_0, x = coreml_update_state_55)[name = string("op_10700_cast_fp16")]; tensor V_cache_27_axes_0 = const()[name = string("V_cache_27_axes_0"), val = tensor([0])]; tensor V_cache_27_cast_fp16 = expand_dims(axes = V_cache_27_axes_0, x = var_10700_cast_fp16)[name = string("V_cache_27_cast_fp16")]; tensor k_padded_pad_0 = const()[name = string("k_padded_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; string k_padded_mode_0 = const()[name = string("k_padded_mode_0"), val = string("constant")]; fp16 const_312_to_fp16 = const()[name = string("const_312_to_fp16"), val = fp16(0x0p+0)]; tensor k_padded_cast_fp16 = pad(constant_val = const_312_to_fp16, mode = k_padded_mode_0, pad = k_padded_pad_0, x = input_463)[name = string("k_padded_cast_fp16")]; tensor v_padded_pad_0 = const()[name = string("v_padded_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; string v_padded_mode_0 = const()[name = string("v_padded_mode_0"), val = string("constant")]; fp16 const_313_to_fp16 = const()[name = string("const_313_to_fp16"), val = fp16(0x0p+0)]; tensor v_padded_cast_fp16 = pad(constant_val = const_313_to_fp16, mode = v_padded_mode_0, pad = v_padded_pad_0, x = var_10677_cast_fp16)[name = string("v_padded_cast_fp16")]; tensor var_10718_cast_fp16 = mul(x = K_cache_27_cast_fp16, y = var_3515_cast_fp16)[name = string("op_10718_cast_fp16")]; tensor var_10719_reps_0 = const()[name = string("op_10719_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_10719_cast_fp16 = tile(reps = var_10719_reps_0, x = k_padded_cast_fp16)[name = string("op_10719_cast_fp16")]; tensor var_10720_cast_fp16 = mul(x = var_10719_cast_fp16, y = update_mask)[name = string("op_10720_cast_fp16")]; tensor K_new_27_cast_fp16 = add(x = var_10718_cast_fp16, y = var_10720_cast_fp16)[name = string("K_new_27_cast_fp16")]; tensor var_10726_cast_fp16 = mul(x = V_cache_27_cast_fp16, y = var_3515_cast_fp16)[name = string("op_10726_cast_fp16")]; tensor var_10727_reps_0 = const()[name = string("op_10727_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_10727_cast_fp16 = tile(reps = var_10727_reps_0, x = v_padded_cast_fp16)[name = string("op_10727_cast_fp16")]; tensor var_10728_cast_fp16 = mul(x = var_10727_cast_fp16, y = update_mask)[name = string("op_10728_cast_fp16")]; tensor V_new_27_cast_fp16 = add(x = var_10726_cast_fp16, y = var_10728_cast_fp16)[name = string("V_new_27_cast_fp16")]; tensor var_10732_axes_0 = const()[name = string("op_10732_axes_0"), val = tensor([0])]; tensor var_10732_cast_fp16 = squeeze(axes = var_10732_axes_0, x = K_new_27_cast_fp16)[name = string("op_10732_cast_fp16")]; tensor concat_104 = const()[name = string("concat_104"), val = tensor([13, 0, 0, 0])]; tensor concat_105 = const()[name = string("concat_105"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_27_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_27_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_27_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_104, begin_mask = kv_cache_0_internal_tensor_assign_27_begin_mask_0, end = concat_105, end_mask = kv_cache_0_internal_tensor_assign_27_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_27_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_27_stride_0, update = var_10732_cast_fp16, x = coreml_update_state_55)[name = string("kv_cache_0_internal_tensor_assign_27_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_27_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_56_write_state")]; tensor coreml_update_state_56 = read_state(input = kv_cache_0)[name = string("coreml_update_state_56")]; tensor var_10739_axes_0 = const()[name = string("op_10739_axes_0"), val = tensor([0])]; tensor var_10739_cast_fp16 = squeeze(axes = var_10739_axes_0, x = V_new_27_cast_fp16)[name = string("op_10739_cast_fp16")]; tensor concat_106 = const()[name = string("concat_106"), val = tensor([48, 0, 0, 0])]; tensor concat_107 = const()[name = string("concat_107"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_28_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_28_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_28_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_106, begin_mask = kv_cache_0_internal_tensor_assign_28_begin_mask_0, end = concat_107, end_mask = kv_cache_0_internal_tensor_assign_28_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_28_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_28_stride_0, update = var_10739_cast_fp16, x = coreml_update_state_56)[name = string("kv_cache_0_internal_tensor_assign_28_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_28_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_57_write_state")]; tensor coreml_update_state_57 = read_state(input = kv_cache_0)[name = string("coreml_update_state_57")]; tensor K_for_attn_27_begin_0 = const()[name = string("K_for_attn_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor K_for_attn_27_end_0 = const()[name = string("K_for_attn_27_end_0"), val = tensor([1, 1, 512, 256])]; tensor K_for_attn_27_end_mask_0 = const()[name = string("K_for_attn_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor K_for_attn_27_cast_fp16 = slice_by_index(begin = K_for_attn_27_begin_0, end = K_for_attn_27_end_0, end_mask = K_for_attn_27_end_mask_0, x = K_new_27_cast_fp16)[name = string("K_for_attn_27_cast_fp16")]; tensor V_for_attn_27_begin_0 = const()[name = string("V_for_attn_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor V_for_attn_27_end_0 = const()[name = string("V_for_attn_27_end_0"), val = tensor([1, 1, 512, 256])]; tensor V_for_attn_27_end_mask_0 = const()[name = string("V_for_attn_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor V_for_attn_27_cast_fp16 = slice_by_index(begin = V_for_attn_27_begin_0, end = V_for_attn_27_end_0, end_mask = V_for_attn_27_end_mask_0, x = V_new_27_cast_fp16)[name = string("V_for_attn_27_cast_fp16")]; tensor transpose_52_perm_0 = const()[name = string("transpose_52_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_26_reps_0 = const()[name = string("tile_26_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_52_cast_fp16 = transpose(perm = transpose_52_perm_0, x = K_for_attn_27_cast_fp16)[name = string("transpose_167")]; tensor tile_26_cast_fp16 = tile(reps = tile_26_reps_0, x = transpose_52_cast_fp16)[name = string("tile_26_cast_fp16")]; tensor concat_108 = const()[name = string("concat_108"), val = tensor([8, 1, 1, 512, 256])]; tensor reshape_52_cast_fp16 = reshape(shape = concat_108, x = tile_26_cast_fp16)[name = string("reshape_52_cast_fp16")]; tensor transpose_53_perm_0 = const()[name = string("transpose_53_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_109 = const()[name = string("concat_109"), val = tensor([-1, 1, 512, 256])]; tensor transpose_53_cast_fp16 = transpose(perm = transpose_53_perm_0, x = reshape_52_cast_fp16)[name = string("transpose_166")]; tensor reshape_53_cast_fp16 = reshape(shape = concat_109, x = transpose_53_cast_fp16)[name = string("reshape_53_cast_fp16")]; tensor transpose_153_perm_0 = const()[name = string("transpose_153_perm_0"), val = tensor([1, 0, -1, -2])]; tensor transpose_54_perm_0 = const()[name = string("transpose_54_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_27_reps_0 = const()[name = string("tile_27_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_54_cast_fp16 = transpose(perm = transpose_54_perm_0, x = V_for_attn_27_cast_fp16)[name = string("transpose_165")]; tensor tile_27_cast_fp16 = tile(reps = tile_27_reps_0, x = transpose_54_cast_fp16)[name = string("tile_27_cast_fp16")]; tensor concat_110 = const()[name = string("concat_110"), val = tensor([8, 1, 1, 512, 256])]; tensor reshape_54_cast_fp16 = reshape(shape = concat_110, x = tile_27_cast_fp16)[name = string("reshape_54_cast_fp16")]; tensor transpose_55_perm_0 = const()[name = string("transpose_55_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_111 = const()[name = string("concat_111"), val = tensor([-1, 1, 512, 256])]; tensor transpose_55_cast_fp16 = transpose(perm = transpose_55_perm_0, x = reshape_54_cast_fp16)[name = string("transpose_164")]; tensor reshape_55_cast_fp16 = reshape(shape = concat_111, x = transpose_55_cast_fp16)[name = string("reshape_55_cast_fp16")]; tensor V_expanded_27_perm_0 = const()[name = string("V_expanded_27_perm_0"), val = tensor([1, 0, -2, -1])]; bool var_10786_transpose_x_0 = const()[name = string("op_10786_transpose_x_0"), val = bool(false)]; bool var_10786_transpose_y_0 = const()[name = string("op_10786_transpose_y_0"), val = bool(false)]; tensor transpose_153_cast_fp16 = transpose(perm = transpose_153_perm_0, x = reshape_53_cast_fp16)[name = string("transpose_163")]; tensor var_10786_cast_fp16 = matmul(transpose_x = var_10786_transpose_x_0, transpose_y = var_10786_transpose_y_0, x = q_111, y = transpose_153_cast_fp16)[name = string("op_10786_cast_fp16")]; tensor attn_weights_81_cast_fp16 = add(x = var_10786_cast_fp16, y = causal_mask)[name = string("attn_weights_81_cast_fp16")]; int32 var_10796 = const()[name = string("op_10796"), val = int32(-1)]; tensor var_10798_cast_fp16 = softmax(axis = var_10796, x = attn_weights_81_cast_fp16)[name = string("op_10798_cast_fp16")]; bool var_10814_transpose_x_0 = const()[name = string("op_10814_transpose_x_0"), val = bool(false)]; bool var_10814_transpose_y_0 = const()[name = string("op_10814_transpose_y_0"), val = bool(false)]; tensor V_expanded_27_cast_fp16 = transpose(perm = V_expanded_27_perm_0, x = reshape_55_cast_fp16)[name = string("transpose_162")]; tensor var_10814_cast_fp16 = matmul(transpose_x = var_10814_transpose_x_0, transpose_y = var_10814_transpose_y_0, x = var_10798_cast_fp16, y = V_expanded_27_cast_fp16)[name = string("op_10814_cast_fp16")]; tensor var_10824 = const()[name = string("op_10824"), val = tensor([0, 2, 1, 3])]; tensor var_10831 = const()[name = string("op_10831"), val = tensor([1, 1, -1])]; tensor var_10825 = transpose(perm = var_10824, x = var_10814_cast_fp16)[name = string("transpose_161")]; tensor attn_output_81 = reshape(shape = var_10831, x = var_10825)[name = string("attn_output_81")]; tensor var_10836 = const()[name = string("op_10836"), val = tensor([0, 2, 1])]; tensor squeeze_13_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2274182976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2275755904))))[name = string("squeeze_13_palettized")]; string var_10852_pad_type_0 = const()[name = string("op_10852_pad_type_0"), val = string("valid")]; int32 var_10852_groups_0 = const()[name = string("op_10852_groups_0"), val = int32(1)]; tensor var_10852_strides_0 = const()[name = string("op_10852_strides_0"), val = tensor([1])]; tensor var_10852_pad_0 = const()[name = string("op_10852_pad_0"), val = tensor([0, 0])]; tensor var_10852_dilations_0 = const()[name = string("op_10852_dilations_0"), val = tensor([1])]; tensor var_10837 = transpose(perm = var_10836, x = attn_output_81)[name = string("transpose_160")]; tensor var_10852 = conv(dilations = var_10852_dilations_0, groups = var_10852_groups_0, pad = var_10852_pad_0, pad_type = var_10852_pad_type_0, strides = var_10852_strides_0, weight = squeeze_13_palettized, x = var_10837)[name = string("op_10852")]; tensor var_10856 = const()[name = string("op_10856"), val = tensor([0, 2, 1])]; int32 var_10862 = const()[name = string("op_10862"), val = int32(-1)]; fp16 const_314_promoted_to_fp16 = const()[name = string("const_314_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_543 = transpose(perm = var_10856, x = var_10852)[name = string("transpose_159")]; tensor var_10868_cast_fp16 = mul(x = x_543, y = const_314_promoted_to_fp16)[name = string("op_10868_cast_fp16")]; bool input_469_interleave_0 = const()[name = string("input_469_interleave_0"), val = bool(false)]; tensor input_469_cast_fp16 = concat(axis = var_10862, interleave = input_469_interleave_0, values = (x_543, var_10868_cast_fp16))[name = string("input_469_cast_fp16")]; tensor normed_517_axes_0 = const()[name = string("normed_517_axes_0"), val = tensor([-1])]; fp16 var_10860_to_fp16 = const()[name = string("op_10860_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_517_cast_fp16 = layer_norm(axes = normed_517_axes_0, epsilon = var_10860_to_fp16, x = input_469_cast_fp16)[name = string("normed_517_cast_fp16")]; tensor var_10873_split_sizes_0 = const()[name = string("op_10873_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_10873_axis_0 = const()[name = string("op_10873_axis_0"), val = int32(-1)]; tensor var_10873_cast_fp16_0, tensor var_10873_cast_fp16_1 = split(axis = var_10873_axis_0, split_sizes = var_10873_split_sizes_0, x = normed_517_cast_fp16)[name = string("op_10873_cast_fp16")]; tensor const_315_to_fp16 = const()[name = string("const_315_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2275757504)))]; tensor var_10876_cast_fp16 = mul(x = var_10873_cast_fp16_0, y = const_315_to_fp16)[name = string("op_10876_cast_fp16")]; tensor x_547_cast_fp16 = add(x = x_529_cast_fp16, y = var_10876_cast_fp16)[name = string("x_547_cast_fp16")]; int32 var_10883 = const()[name = string("op_10883"), val = int32(-1)]; fp16 const_316_promoted_to_fp16 = const()[name = string("const_316_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_10889_cast_fp16 = mul(x = x_547_cast_fp16, y = const_316_promoted_to_fp16)[name = string("op_10889_cast_fp16")]; bool input_471_interleave_0 = const()[name = string("input_471_interleave_0"), val = bool(false)]; tensor input_471_cast_fp16 = concat(axis = var_10883, interleave = input_471_interleave_0, values = (x_547_cast_fp16, var_10889_cast_fp16))[name = string("input_471_cast_fp16")]; tensor normed_521_axes_0 = const()[name = string("normed_521_axes_0"), val = tensor([-1])]; fp16 var_10881_to_fp16 = const()[name = string("op_10881_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_521_cast_fp16 = layer_norm(axes = normed_521_axes_0, epsilon = var_10881_to_fp16, x = input_471_cast_fp16)[name = string("normed_521_cast_fp16")]; tensor var_10894_split_sizes_0 = const()[name = string("op_10894_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_10894_axis_0 = const()[name = string("op_10894_axis_0"), val = int32(-1)]; tensor var_10894_cast_fp16_0, tensor var_10894_cast_fp16_1 = split(axis = var_10894_axis_0, split_sizes = var_10894_split_sizes_0, x = normed_521_cast_fp16)[name = string("op_10894_cast_fp16")]; tensor const_317_to_fp16 = const()[name = string("const_317_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2275760640)))]; tensor var_10897_cast_fp16 = mul(x = var_10894_cast_fp16_0, y = const_317_to_fp16)[name = string("op_10897_cast_fp16")]; tensor var_10910 = const()[name = string("op_10910"), val = tensor([0, 2, 1])]; tensor input_473_axes_0 = const()[name = string("input_473_axes_0"), val = tensor([2])]; tensor var_10911 = transpose(perm = var_10910, x = var_10897_cast_fp16)[name = string("transpose_158")]; tensor input_473 = expand_dims(axes = input_473_axes_0, x = var_10911)[name = string("input_473")]; string gate_53_pad_type_0 = const()[name = string("gate_53_pad_type_0"), val = string("valid")]; tensor gate_53_strides_0 = const()[name = string("gate_53_strides_0"), val = tensor([1, 1])]; tensor gate_53_pad_0 = const()[name = string("gate_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_53_dilations_0 = const()[name = string("gate_53_dilations_0"), val = tensor([1, 1])]; int32 gate_53_groups_0 = const()[name = string("gate_53_groups_0"), val = int32(1)]; tensor gate_53 = conv(dilations = gate_53_dilations_0, groups = gate_53_groups_0, pad = gate_53_pad_0, pad_type = gate_53_pad_type_0, strides = gate_53_strides_0, weight = layers_13_mlp_gate_proj_weight_palettized, x = input_473)[name = string("gate_53")]; string up_27_pad_type_0 = const()[name = string("up_27_pad_type_0"), val = string("valid")]; tensor up_27_strides_0 = const()[name = string("up_27_strides_0"), val = tensor([1, 1])]; tensor up_27_pad_0 = const()[name = string("up_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_27_dilations_0 = const()[name = string("up_27_dilations_0"), val = tensor([1, 1])]; int32 up_27_groups_0 = const()[name = string("up_27_groups_0"), val = int32(1)]; tensor up_27 = conv(dilations = up_27_dilations_0, groups = up_27_groups_0, pad = up_27_pad_0, pad_type = up_27_pad_type_0, strides = up_27_strides_0, weight = layers_13_mlp_up_proj_weight_palettized, x = input_473)[name = string("up_27")]; string gate_55_mode_0 = const()[name = string("gate_55_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_55 = gelu(mode = gate_55_mode_0, x = gate_53)[name = string("gate_55")]; tensor input_475 = mul(x = gate_55, y = up_27)[name = string("input_475")]; string mlp_out_27_pad_type_0 = const()[name = string("mlp_out_27_pad_type_0"), val = string("valid")]; tensor mlp_out_27_strides_0 = const()[name = string("mlp_out_27_strides_0"), val = tensor([1, 1])]; tensor mlp_out_27_pad_0 = const()[name = string("mlp_out_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_27_dilations_0 = const()[name = string("mlp_out_27_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_27_groups_0 = const()[name = string("mlp_out_27_groups_0"), val = int32(1)]; tensor mlp_out_27 = conv(dilations = mlp_out_27_dilations_0, groups = mlp_out_27_groups_0, pad = mlp_out_27_pad_0, pad_type = mlp_out_27_pad_type_0, strides = mlp_out_27_strides_0, weight = layers_13_mlp_down_proj_weight_palettized, x = input_475)[name = string("mlp_out_27")]; tensor var_10951_axes_0 = const()[name = string("op_10951_axes_0"), val = tensor([2])]; tensor var_10951 = squeeze(axes = var_10951_axes_0, x = mlp_out_27)[name = string("op_10951")]; tensor var_10955 = const()[name = string("op_10955"), val = tensor([0, 2, 1])]; int32 var_10961 = const()[name = string("op_10961"), val = int32(-1)]; fp16 const_318_promoted_to_fp16 = const()[name = string("const_318_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_551 = transpose(perm = var_10955, x = var_10951)[name = string("transpose_157")]; tensor var_10967_cast_fp16 = mul(x = x_551, y = const_318_promoted_to_fp16)[name = string("op_10967_cast_fp16")]; bool input_477_interleave_0 = const()[name = string("input_477_interleave_0"), val = bool(false)]; tensor input_477_cast_fp16 = concat(axis = var_10961, interleave = input_477_interleave_0, values = (x_551, var_10967_cast_fp16))[name = string("input_477_cast_fp16")]; tensor normed_525_axes_0 = const()[name = string("normed_525_axes_0"), val = tensor([-1])]; fp16 var_10959_to_fp16 = const()[name = string("op_10959_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_525_cast_fp16 = layer_norm(axes = normed_525_axes_0, epsilon = var_10959_to_fp16, x = input_477_cast_fp16)[name = string("normed_525_cast_fp16")]; tensor var_10972_split_sizes_0 = const()[name = string("op_10972_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_10972_axis_0 = const()[name = string("op_10972_axis_0"), val = int32(-1)]; tensor var_10972_cast_fp16_0, tensor var_10972_cast_fp16_1 = split(axis = var_10972_axis_0, split_sizes = var_10972_split_sizes_0, x = normed_525_cast_fp16)[name = string("op_10972_cast_fp16")]; tensor const_319_to_fp16 = const()[name = string("const_319_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2275763776)))]; tensor var_10975_cast_fp16 = mul(x = var_10972_cast_fp16_0, y = const_319_to_fp16)[name = string("op_10975_cast_fp16")]; tensor hidden_states_169_cast_fp16 = add(x = x_547_cast_fp16, y = var_10975_cast_fp16)[name = string("hidden_states_169_cast_fp16")]; tensor per_layer_slice_27_begin_0 = const()[name = string("per_layer_slice_27_begin_0"), val = tensor([0, 0, 3328])]; tensor per_layer_slice_27_end_0 = const()[name = string("per_layer_slice_27_end_0"), val = tensor([1, 1, 3584])]; tensor per_layer_slice_27_end_mask_0 = const()[name = string("per_layer_slice_27_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_27 = slice_by_index(begin = per_layer_slice_27_begin_0, end = per_layer_slice_27_end_0, end_mask = per_layer_slice_27_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_27")]; tensor gated_53 = linear(bias = linear_1_bias_0, weight = layers_13_per_layer_input_gate_weight_palettized, x = hidden_states_169_cast_fp16)[name = string("linear_27")]; string gated_55_mode_0 = const()[name = string("gated_55_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_55 = gelu(mode = gated_55_mode_0, x = gated_53)[name = string("gated_55")]; tensor input_481 = mul(x = gated_55, y = per_layer_slice_27)[name = string("input_481")]; tensor x_555 = linear(bias = linear_2_bias_0, weight = layers_13_per_layer_projection_weight_palettized, x = input_481)[name = string("linear_28")]; int32 var_11012 = const()[name = string("op_11012"), val = int32(-1)]; fp16 const_320_promoted_to_fp16 = const()[name = string("const_320_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_11018_cast_fp16 = mul(x = x_555, y = const_320_promoted_to_fp16)[name = string("op_11018_cast_fp16")]; bool input_483_interleave_0 = const()[name = string("input_483_interleave_0"), val = bool(false)]; tensor input_483_cast_fp16 = concat(axis = var_11012, interleave = input_483_interleave_0, values = (x_555, var_11018_cast_fp16))[name = string("input_483_cast_fp16")]; tensor normed_529_axes_0 = const()[name = string("normed_529_axes_0"), val = tensor([-1])]; fp16 var_11010_to_fp16 = const()[name = string("op_11010_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_529_cast_fp16 = layer_norm(axes = normed_529_axes_0, epsilon = var_11010_to_fp16, x = input_483_cast_fp16)[name = string("normed_529_cast_fp16")]; tensor var_11023_split_sizes_0 = const()[name = string("op_11023_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_11023_axis_0 = const()[name = string("op_11023_axis_0"), val = int32(-1)]; tensor var_11023_cast_fp16_0, tensor var_11023_cast_fp16_1 = split(axis = var_11023_axis_0, split_sizes = var_11023_split_sizes_0, x = normed_529_cast_fp16)[name = string("op_11023_cast_fp16")]; tensor const_321_to_fp16 = const()[name = string("const_321_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2275766912)))]; tensor var_11026_cast_fp16 = mul(x = var_11023_cast_fp16_0, y = const_321_to_fp16)[name = string("op_11026_cast_fp16")]; tensor hidden_states_173_cast_fp16 = add(x = hidden_states_169_cast_fp16, y = var_11026_cast_fp16)[name = string("hidden_states_173_cast_fp16")]; tensor layers_13_layer_scalar_to_fp16 = const()[name = string("layers_13_layer_scalar_to_fp16"), val = tensor([0x1.6ap-4])]; tensor x_559_cast_fp16 = mul(x = hidden_states_173_cast_fp16, y = layers_13_layer_scalar_to_fp16)[name = string("x_559_cast_fp16")]; int32 var_11034 = const()[name = string("op_11034"), val = int32(-1)]; fp16 const_322_promoted_to_fp16 = const()[name = string("const_322_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_11040_cast_fp16 = mul(x = x_559_cast_fp16, y = const_322_promoted_to_fp16)[name = string("op_11040_cast_fp16")]; bool input_485_interleave_0 = const()[name = string("input_485_interleave_0"), val = bool(false)]; tensor input_485_cast_fp16 = concat(axis = var_11034, interleave = input_485_interleave_0, values = (x_559_cast_fp16, var_11040_cast_fp16))[name = string("input_485_cast_fp16")]; tensor normed_533_axes_0 = const()[name = string("normed_533_axes_0"), val = tensor([-1])]; fp16 var_11032_to_fp16 = const()[name = string("op_11032_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_533_cast_fp16 = layer_norm(axes = normed_533_axes_0, epsilon = var_11032_to_fp16, x = input_485_cast_fp16)[name = string("normed_533_cast_fp16")]; tensor var_11045_split_sizes_0 = const()[name = string("op_11045_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_11045_axis_0 = const()[name = string("op_11045_axis_0"), val = int32(-1)]; tensor var_11045_cast_fp16_0, tensor var_11045_cast_fp16_1 = split(axis = var_11045_axis_0, split_sizes = var_11045_split_sizes_0, x = normed_533_cast_fp16)[name = string("op_11045_cast_fp16")]; tensor const_323_to_fp16 = const()[name = string("const_323_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2275770048)))]; tensor var_11048_cast_fp16 = mul(x = var_11045_cast_fp16_0, y = const_323_to_fp16)[name = string("op_11048_cast_fp16")]; tensor var_11056 = const()[name = string("op_11056"), val = tensor([0, 2, 1])]; tensor var_11059_axes_0 = const()[name = string("op_11059_axes_0"), val = tensor([2])]; tensor var_11057_cast_fp16 = transpose(perm = var_11056, x = var_11048_cast_fp16)[name = string("transpose_156")]; tensor var_11059_cast_fp16 = expand_dims(axes = var_11059_axes_0, x = var_11057_cast_fp16)[name = string("op_11059_cast_fp16")]; string var_11075_pad_type_0 = const()[name = string("op_11075_pad_type_0"), val = string("valid")]; tensor var_11075_strides_0 = const()[name = string("op_11075_strides_0"), val = tensor([1, 1])]; tensor var_11075_pad_0 = const()[name = string("op_11075_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_11075_dilations_0 = const()[name = string("op_11075_dilations_0"), val = tensor([1, 1])]; int32 var_11075_groups_0 = const()[name = string("op_11075_groups_0"), val = int32(1)]; tensor var_11075 = conv(dilations = var_11075_dilations_0, groups = var_11075_groups_0, pad = var_11075_pad_0, pad_type = var_11075_pad_type_0, strides = var_11075_strides_0, weight = layers_14_self_attn_q_proj_weight_palettized, x = var_11059_cast_fp16)[name = string("op_11075")]; tensor var_11080 = const()[name = string("op_11080"), val = tensor([1, 8, 512, 1])]; tensor var_11081 = reshape(shape = var_11080, x = var_11075)[name = string("op_11081")]; tensor var_11086 = const()[name = string("op_11086"), val = tensor([0, 1, 3, 2])]; tensor var_11096 = const()[name = string("op_11096"), val = tensor([1, 8, 512])]; tensor var_11087 = transpose(perm = var_11086, x = var_11081)[name = string("transpose_155")]; tensor x_563 = reshape(shape = var_11096, x = var_11087)[name = string("x_563")]; int32 var_11102 = const()[name = string("op_11102"), val = int32(-1)]; fp16 const_324_promoted_to_fp16 = const()[name = string("const_324_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_11108_cast_fp16 = mul(x = x_563, y = const_324_promoted_to_fp16)[name = string("op_11108_cast_fp16")]; bool input_489_interleave_0 = const()[name = string("input_489_interleave_0"), val = bool(false)]; tensor input_489_cast_fp16 = concat(axis = var_11102, interleave = input_489_interleave_0, values = (x_563, var_11108_cast_fp16))[name = string("input_489_cast_fp16")]; tensor normed_537_axes_0 = const()[name = string("normed_537_axes_0"), val = tensor([-1])]; fp16 var_11100_to_fp16 = const()[name = string("op_11100_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_537_cast_fp16 = layer_norm(axes = normed_537_axes_0, epsilon = var_11100_to_fp16, x = input_489_cast_fp16)[name = string("normed_537_cast_fp16")]; tensor var_11113_split_sizes_0 = const()[name = string("op_11113_split_sizes_0"), val = tensor([512, 512])]; int32 var_11113_axis_0 = const()[name = string("op_11113_axis_0"), val = int32(-1)]; tensor var_11113_cast_fp16_0, tensor var_11113_cast_fp16_1 = split(axis = var_11113_axis_0, split_sizes = var_11113_split_sizes_0, x = normed_537_cast_fp16)[name = string("op_11113_cast_fp16")]; tensor const_325_to_fp16 = const()[name = string("const_325_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2275773184)))]; tensor var_11116_cast_fp16 = mul(x = var_11113_cast_fp16_0, y = const_325_to_fp16)[name = string("op_11116_cast_fp16")]; tensor var_11122 = const()[name = string("op_11122"), val = tensor([1, 8, 1, 512])]; tensor q_115 = reshape(shape = var_11122, x = var_11116_cast_fp16)[name = string("q_115")]; tensor var_11124 = mul(x = q_115, y = cos)[name = string("op_11124")]; tensor var_11125_split_sizes_0 = const()[name = string("op_11125_split_sizes_0"), val = tensor([256, 256])]; int32 var_11125_axis_0 = const()[name = string("op_11125_axis_0"), val = int32(-1)]; tensor var_11125_0, tensor var_11125_1 = split(axis = var_11125_axis_0, split_sizes = var_11125_split_sizes_0, x = q_115)[name = string("op_11125")]; fp16 const_326_promoted = const()[name = string("const_326_promoted"), val = fp16(-0x1p+0)]; tensor var_11127 = mul(x = var_11125_1, y = const_326_promoted)[name = string("op_11127")]; int32 var_11129 = const()[name = string("op_11129"), val = int32(-1)]; bool var_11130_interleave_0 = const()[name = string("op_11130_interleave_0"), val = bool(false)]; tensor var_11130 = concat(axis = var_11129, interleave = var_11130_interleave_0, values = (var_11127, var_11125_0))[name = string("op_11130")]; tensor var_11131 = mul(x = var_11130, y = sin)[name = string("op_11131")]; tensor q_119 = add(x = var_11124, y = var_11131)[name = string("q_119")]; string var_11144_pad_type_0 = const()[name = string("op_11144_pad_type_0"), val = string("valid")]; tensor var_11144_strides_0 = const()[name = string("op_11144_strides_0"), val = tensor([1, 1])]; tensor var_11144_pad_0 = const()[name = string("op_11144_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_11144_dilations_0 = const()[name = string("op_11144_dilations_0"), val = tensor([1, 1])]; int32 var_11144_groups_0 = const()[name = string("op_11144_groups_0"), val = int32(1)]; tensor var_11144 = conv(dilations = var_11144_dilations_0, groups = var_11144_groups_0, pad = var_11144_pad_0, pad_type = var_11144_pad_type_0, strides = var_11144_strides_0, weight = layers_14_self_attn_k_proj_weight_palettized, x = var_11059_cast_fp16)[name = string("op_11144")]; tensor var_11149 = const()[name = string("op_11149"), val = tensor([1, 1, 512, 1])]; tensor var_11150 = reshape(shape = var_11149, x = var_11144)[name = string("op_11150")]; tensor var_11155 = const()[name = string("op_11155"), val = tensor([0, 1, 3, 2])]; string var_11172_pad_type_0 = const()[name = string("op_11172_pad_type_0"), val = string("valid")]; tensor var_11172_strides_0 = const()[name = string("op_11172_strides_0"), val = tensor([1, 1])]; tensor var_11172_pad_0 = const()[name = string("op_11172_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_11172_dilations_0 = const()[name = string("op_11172_dilations_0"), val = tensor([1, 1])]; int32 var_11172_groups_0 = const()[name = string("op_11172_groups_0"), val = int32(1)]; tensor var_11172 = conv(dilations = var_11172_dilations_0, groups = var_11172_groups_0, pad = var_11172_pad_0, pad_type = var_11172_pad_type_0, strides = var_11172_strides_0, weight = layers_14_self_attn_v_proj_weight_palettized, x = var_11059_cast_fp16)[name = string("op_11172")]; tensor var_11177 = const()[name = string("op_11177"), val = tensor([1, 1, 512, 1])]; tensor var_11178 = reshape(shape = var_11177, x = var_11172)[name = string("op_11178")]; tensor var_11183 = const()[name = string("op_11183"), val = tensor([0, 1, 3, 2])]; tensor var_11193 = const()[name = string("op_11193"), val = tensor([1, 1, 512])]; tensor var_11156 = transpose(perm = var_11155, x = var_11150)[name = string("transpose_154")]; tensor x_567 = reshape(shape = var_11193, x = var_11156)[name = string("x_567")]; int32 var_11199 = const()[name = string("op_11199"), val = int32(-1)]; fp16 const_327_promoted_to_fp16 = const()[name = string("const_327_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_11205_cast_fp16 = mul(x = x_567, y = const_327_promoted_to_fp16)[name = string("op_11205_cast_fp16")]; bool input_491_interleave_0 = const()[name = string("input_491_interleave_0"), val = bool(false)]; tensor input_491_cast_fp16 = concat(axis = var_11199, interleave = input_491_interleave_0, values = (x_567, var_11205_cast_fp16))[name = string("input_491_cast_fp16")]; tensor normed_541_axes_0 = const()[name = string("normed_541_axes_0"), val = tensor([-1])]; fp16 var_11197_to_fp16 = const()[name = string("op_11197_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_541_cast_fp16 = layer_norm(axes = normed_541_axes_0, epsilon = var_11197_to_fp16, x = input_491_cast_fp16)[name = string("normed_541_cast_fp16")]; tensor var_11210_split_sizes_0 = const()[name = string("op_11210_split_sizes_0"), val = tensor([512, 512])]; int32 var_11210_axis_0 = const()[name = string("op_11210_axis_0"), val = int32(-1)]; tensor var_11210_cast_fp16_0, tensor var_11210_cast_fp16_1 = split(axis = var_11210_axis_0, split_sizes = var_11210_split_sizes_0, x = normed_541_cast_fp16)[name = string("op_11210_cast_fp16")]; tensor const_328_to_fp16 = const()[name = string("const_328_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2275774272)))]; tensor var_11213_cast_fp16 = mul(x = var_11210_cast_fp16_0, y = const_328_to_fp16)[name = string("op_11213_cast_fp16")]; tensor var_11219 = const()[name = string("op_11219"), val = tensor([1, 1, 1, 512])]; tensor q_117 = reshape(shape = var_11219, x = var_11213_cast_fp16)[name = string("q_117")]; fp16 var_11226_promoted_to_fp16 = const()[name = string("op_11226_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_11184 = transpose(perm = var_11183, x = var_11178)[name = string("transpose_153")]; tensor var_11227_cast_fp16 = pow(x = var_11184, y = var_11226_promoted_to_fp16)[name = string("op_11227_cast_fp16")]; tensor var_11232_axes_0 = const()[name = string("op_11232_axes_0"), val = tensor([-1])]; bool var_11232_keep_dims_0 = const()[name = string("op_11232_keep_dims_0"), val = bool(true)]; tensor var_11232_cast_fp16 = reduce_mean(axes = var_11232_axes_0, keep_dims = var_11232_keep_dims_0, x = var_11227_cast_fp16)[name = string("op_11232_cast_fp16")]; fp16 var_11234_to_fp16 = const()[name = string("op_11234_to_fp16"), val = fp16(0x1.1p-20)]; tensor mean_sq_cast_fp16 = add(x = var_11232_cast_fp16, y = var_11234_to_fp16)[name = string("mean_sq_cast_fp16")]; fp16 var_11241_to_fp16 = const()[name = string("op_11241_to_fp16"), val = fp16(-0x1p-1)]; tensor var_11242_cast_fp16 = pow(x = mean_sq_cast_fp16, y = var_11241_to_fp16)[name = string("op_11242_cast_fp16")]; tensor var_11243_cast_fp16 = mul(x = var_11184, y = var_11242_cast_fp16)[name = string("op_11243_cast_fp16")]; tensor var_11249 = mul(x = q_117, y = cos)[name = string("op_11249")]; tensor var_11250_split_sizes_0 = const()[name = string("op_11250_split_sizes_0"), val = tensor([256, 256])]; int32 var_11250_axis_0 = const()[name = string("op_11250_axis_0"), val = int32(-1)]; tensor var_11250_0, tensor var_11250_1 = split(axis = var_11250_axis_0, split_sizes = var_11250_split_sizes_0, x = q_117)[name = string("op_11250")]; fp16 const_329_promoted = const()[name = string("const_329_promoted"), val = fp16(-0x1p+0)]; tensor var_11252 = mul(x = var_11250_1, y = const_329_promoted)[name = string("op_11252")]; int32 var_11254 = const()[name = string("op_11254"), val = int32(-1)]; bool var_11255_interleave_0 = const()[name = string("op_11255_interleave_0"), val = bool(false)]; tensor var_11255 = concat(axis = var_11254, interleave = var_11255_interleave_0, values = (var_11252, var_11250_0))[name = string("op_11255")]; tensor var_11256 = mul(x = var_11255, y = sin)[name = string("op_11256")]; tensor k = add(x = var_11249, y = var_11256)[name = string("k")]; tensor var_11261_begin_0 = const()[name = string("op_11261_begin_0"), val = tensor([14, 0, 0, 0])]; tensor var_11261_end_0 = const()[name = string("op_11261_end_0"), val = tensor([15, 1, 512, 512])]; tensor var_11261_end_mask_0 = const()[name = string("op_11261_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_11261_squeeze_mask_0 = const()[name = string("op_11261_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_11261_cast_fp16 = slice_by_index(begin = var_11261_begin_0, end = var_11261_end_0, end_mask = var_11261_end_mask_0, squeeze_mask = var_11261_squeeze_mask_0, x = coreml_update_state_57)[name = string("op_11261_cast_fp16")]; tensor K_cache_axes_0 = const()[name = string("K_cache_axes_0"), val = tensor([0])]; tensor K_cache_cast_fp16 = expand_dims(axes = K_cache_axes_0, x = var_11261_cast_fp16)[name = string("K_cache_cast_fp16")]; tensor var_11266_begin_0 = const()[name = string("op_11266_begin_0"), val = tensor([49, 0, 0, 0])]; tensor var_11266_end_0 = const()[name = string("op_11266_end_0"), val = tensor([50, 1, 512, 512])]; tensor var_11266_end_mask_0 = const()[name = string("op_11266_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_11266_squeeze_mask_0 = const()[name = string("op_11266_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor var_11266_cast_fp16 = slice_by_index(begin = var_11266_begin_0, end = var_11266_end_0, end_mask = var_11266_end_mask_0, squeeze_mask = var_11266_squeeze_mask_0, x = coreml_update_state_57)[name = string("op_11266_cast_fp16")]; tensor V_cache_axes_0 = const()[name = string("V_cache_axes_0"), val = tensor([0])]; tensor V_cache_cast_fp16 = expand_dims(axes = V_cache_axes_0, x = var_11266_cast_fp16)[name = string("V_cache_cast_fp16")]; tensor var_11272_cast_fp16 = mul(x = K_cache_cast_fp16, y = var_3515_cast_fp16)[name = string("op_11272_cast_fp16")]; tensor var_11273_reps_0 = const()[name = string("op_11273_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_11273 = tile(reps = var_11273_reps_0, x = k)[name = string("op_11273")]; tensor var_11274_cast_fp16 = mul(x = var_11273, y = update_mask)[name = string("op_11274_cast_fp16")]; tensor K_new_cast_fp16 = add(x = var_11272_cast_fp16, y = var_11274_cast_fp16)[name = string("K_new_cast_fp16")]; tensor var_11280_cast_fp16 = mul(x = V_cache_cast_fp16, y = var_3515_cast_fp16)[name = string("op_11280_cast_fp16")]; tensor var_11281_reps_0 = const()[name = string("op_11281_reps_0"), val = tensor([1, 1, 512, 1])]; tensor var_11281 = tile(reps = var_11281_reps_0, x = var_11243_cast_fp16)[name = string("op_11281")]; tensor var_11282_cast_fp16 = mul(x = var_11281, y = update_mask)[name = string("op_11282_cast_fp16")]; tensor V_new_cast_fp16 = add(x = var_11280_cast_fp16, y = var_11282_cast_fp16)[name = string("V_new_cast_fp16")]; tensor var_11286_axes_0 = const()[name = string("op_11286_axes_0"), val = tensor([0])]; tensor var_11286_cast_fp16 = squeeze(axes = var_11286_axes_0, x = K_new_cast_fp16)[name = string("op_11286_cast_fp16")]; tensor concat_112 = const()[name = string("concat_112"), val = tensor([14, 0, 0, 0])]; tensor concat_113 = const()[name = string("concat_113"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_29_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_29_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_29_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_112, begin_mask = kv_cache_0_internal_tensor_assign_29_begin_mask_0, end = concat_113, end_mask = kv_cache_0_internal_tensor_assign_29_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_29_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_29_stride_0, update = var_11286_cast_fp16, x = coreml_update_state_57)[name = string("kv_cache_0_internal_tensor_assign_29_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_29_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_58_write_state")]; tensor coreml_update_state_58 = read_state(input = kv_cache_0)[name = string("coreml_update_state_58")]; tensor var_11293_axes_0 = const()[name = string("op_11293_axes_0"), val = tensor([0])]; tensor var_11293_cast_fp16 = squeeze(axes = var_11293_axes_0, x = V_new_cast_fp16)[name = string("op_11293_cast_fp16")]; tensor concat_114 = const()[name = string("concat_114"), val = tensor([49, 0, 0, 0])]; tensor concat_115 = const()[name = string("concat_115"), val = tensor([0, 0, 0, 0])]; tensor kv_cache_0_internal_tensor_assign_30_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; tensor kv_cache_0_internal_tensor_assign_30_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_30_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, true, true])]; tensor kv_cache_0_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor kv_cache_0_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_114, begin_mask = kv_cache_0_internal_tensor_assign_30_begin_mask_0, end = concat_115, end_mask = kv_cache_0_internal_tensor_assign_30_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_30_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_30_stride_0, update = var_11293_cast_fp16, x = coreml_update_state_58)[name = string("kv_cache_0_internal_tensor_assign_30_cast_fp16")]; write_state(data = kv_cache_0_internal_tensor_assign_30_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_59_write_state")]; tensor transpose_56_perm_0 = const()[name = string("transpose_56_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_28_reps_0 = const()[name = string("tile_28_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_56_cast_fp16 = transpose(perm = transpose_56_perm_0, x = K_new_cast_fp16)[name = string("transpose_152")]; tensor tile_28_cast_fp16 = tile(reps = tile_28_reps_0, x = transpose_56_cast_fp16)[name = string("tile_28_cast_fp16")]; tensor concat_116 = const()[name = string("concat_116"), val = tensor([8, 1, 1, 512, 512])]; tensor reshape_56_cast_fp16 = reshape(shape = concat_116, x = tile_28_cast_fp16)[name = string("reshape_56_cast_fp16")]; tensor transpose_57_perm_0 = const()[name = string("transpose_57_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_117 = const()[name = string("concat_117"), val = tensor([-1, 1, 512, 512])]; tensor transpose_57_cast_fp16 = transpose(perm = transpose_57_perm_0, x = reshape_56_cast_fp16)[name = string("transpose_151")]; tensor reshape_57_cast_fp16 = reshape(shape = concat_117, x = transpose_57_cast_fp16)[name = string("reshape_57_cast_fp16")]; tensor transpose_154_perm_0 = const()[name = string("transpose_154_perm_0"), val = tensor([1, 0, -1, -2])]; tensor transpose_58_perm_0 = const()[name = string("transpose_58_perm_0"), val = tensor([1, 0, 2, 3])]; tensor tile_29_reps_0 = const()[name = string("tile_29_reps_0"), val = tensor([8, 1, 1, 1])]; tensor transpose_58_cast_fp16 = transpose(perm = transpose_58_perm_0, x = V_new_cast_fp16)[name = string("transpose_150")]; tensor tile_29_cast_fp16 = tile(reps = tile_29_reps_0, x = transpose_58_cast_fp16)[name = string("tile_29_cast_fp16")]; tensor concat_118 = const()[name = string("concat_118"), val = tensor([8, 1, 1, 512, 512])]; tensor reshape_58_cast_fp16 = reshape(shape = concat_118, x = tile_29_cast_fp16)[name = string("reshape_58_cast_fp16")]; tensor transpose_59_perm_0 = const()[name = string("transpose_59_perm_0"), val = tensor([1, 0, 2, 3, 4])]; tensor concat_119 = const()[name = string("concat_119"), val = tensor([-1, 1, 512, 512])]; tensor transpose_59_cast_fp16 = transpose(perm = transpose_59_perm_0, x = reshape_58_cast_fp16)[name = string("transpose_149")]; tensor reshape_59_cast_fp16 = reshape(shape = concat_119, x = transpose_59_cast_fp16)[name = string("reshape_59_cast_fp16")]; tensor V_expanded_29_perm_0 = const()[name = string("V_expanded_29_perm_0"), val = tensor([1, 0, -2, -1])]; bool var_11340_transpose_x_0 = const()[name = string("op_11340_transpose_x_0"), val = bool(false)]; bool var_11340_transpose_y_0 = const()[name = string("op_11340_transpose_y_0"), val = bool(false)]; tensor transpose_154_cast_fp16 = transpose(perm = transpose_154_perm_0, x = reshape_57_cast_fp16)[name = string("transpose_148")]; tensor var_11340_cast_fp16 = matmul(transpose_x = var_11340_transpose_x_0, transpose_y = var_11340_transpose_y_0, x = q_119, y = transpose_154_cast_fp16)[name = string("op_11340_cast_fp16")]; tensor attn_weights_87_cast_fp16 = add(x = var_11340_cast_fp16, y = causal_mask)[name = string("attn_weights_87_cast_fp16")]; int32 var_11350 = const()[name = string("op_11350"), val = int32(-1)]; tensor var_11352_cast_fp16 = softmax(axis = var_11350, x = attn_weights_87_cast_fp16)[name = string("op_11352_cast_fp16")]; bool var_11368_transpose_x_0 = const()[name = string("op_11368_transpose_x_0"), val = bool(false)]; bool var_11368_transpose_y_0 = const()[name = string("op_11368_transpose_y_0"), val = bool(false)]; tensor V_expanded_29_cast_fp16 = transpose(perm = V_expanded_29_perm_0, x = reshape_59_cast_fp16)[name = string("transpose_147")]; tensor var_11368_cast_fp16 = matmul(transpose_x = var_11368_transpose_x_0, transpose_y = var_11368_transpose_y_0, x = var_11352_cast_fp16, y = V_expanded_29_cast_fp16)[name = string("op_11368_cast_fp16")]; tensor var_11378 = const()[name = string("op_11378"), val = tensor([0, 2, 1, 3])]; tensor var_11385 = const()[name = string("op_11385"), val = tensor([1, 1, -1])]; tensor var_11379 = transpose(perm = var_11378, x = var_11368_cast_fp16)[name = string("transpose_146")]; tensor attn_output_87 = reshape(shape = var_11385, x = var_11379)[name = string("attn_output_87")]; tensor var_11390 = const()[name = string("op_11390"), val = tensor([0, 2, 1])]; tensor squeeze_14_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2275775360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2278921152))))[name = string("squeeze_14_palettized")]; string var_11406_pad_type_0 = const()[name = string("op_11406_pad_type_0"), val = string("valid")]; int32 var_11406_groups_0 = const()[name = string("op_11406_groups_0"), val = int32(1)]; tensor var_11406_strides_0 = const()[name = string("op_11406_strides_0"), val = tensor([1])]; tensor var_11406_pad_0 = const()[name = string("op_11406_pad_0"), val = tensor([0, 0])]; tensor var_11406_dilations_0 = const()[name = string("op_11406_dilations_0"), val = tensor([1])]; tensor var_11391 = transpose(perm = var_11390, x = attn_output_87)[name = string("transpose_145")]; tensor var_11406 = conv(dilations = var_11406_dilations_0, groups = var_11406_groups_0, pad = var_11406_pad_0, pad_type = var_11406_pad_type_0, strides = var_11406_strides_0, weight = squeeze_14_palettized, x = var_11391)[name = string("op_11406")]; tensor var_11410 = const()[name = string("op_11410"), val = tensor([0, 2, 1])]; int32 var_11416 = const()[name = string("op_11416"), val = int32(-1)]; fp16 const_330_promoted_to_fp16 = const()[name = string("const_330_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_573 = transpose(perm = var_11410, x = var_11406)[name = string("transpose_144")]; tensor var_11422_cast_fp16 = mul(x = x_573, y = const_330_promoted_to_fp16)[name = string("op_11422_cast_fp16")]; bool input_495_interleave_0 = const()[name = string("input_495_interleave_0"), val = bool(false)]; tensor input_495_cast_fp16 = concat(axis = var_11416, interleave = input_495_interleave_0, values = (x_573, var_11422_cast_fp16))[name = string("input_495_cast_fp16")]; tensor normed_545_axes_0 = const()[name = string("normed_545_axes_0"), val = tensor([-1])]; fp16 var_11414_to_fp16 = const()[name = string("op_11414_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_545_cast_fp16 = layer_norm(axes = normed_545_axes_0, epsilon = var_11414_to_fp16, x = input_495_cast_fp16)[name = string("normed_545_cast_fp16")]; tensor var_11427_split_sizes_0 = const()[name = string("op_11427_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_11427_axis_0 = const()[name = string("op_11427_axis_0"), val = int32(-1)]; tensor var_11427_cast_fp16_0, tensor var_11427_cast_fp16_1 = split(axis = var_11427_axis_0, split_sizes = var_11427_split_sizes_0, x = normed_545_cast_fp16)[name = string("op_11427_cast_fp16")]; tensor const_331_to_fp16 = const()[name = string("const_331_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2278922752)))]; tensor var_11430_cast_fp16 = mul(x = var_11427_cast_fp16_0, y = const_331_to_fp16)[name = string("op_11430_cast_fp16")]; tensor x_577_cast_fp16 = add(x = x_559_cast_fp16, y = var_11430_cast_fp16)[name = string("x_577_cast_fp16")]; int32 var_11437 = const()[name = string("op_11437"), val = int32(-1)]; fp16 const_332_promoted_to_fp16 = const()[name = string("const_332_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_11443_cast_fp16 = mul(x = x_577_cast_fp16, y = const_332_promoted_to_fp16)[name = string("op_11443_cast_fp16")]; bool input_497_interleave_0 = const()[name = string("input_497_interleave_0"), val = bool(false)]; tensor input_497_cast_fp16 = concat(axis = var_11437, interleave = input_497_interleave_0, values = (x_577_cast_fp16, var_11443_cast_fp16))[name = string("input_497_cast_fp16")]; tensor normed_549_axes_0 = const()[name = string("normed_549_axes_0"), val = tensor([-1])]; fp16 var_11435_to_fp16 = const()[name = string("op_11435_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_549_cast_fp16 = layer_norm(axes = normed_549_axes_0, epsilon = var_11435_to_fp16, x = input_497_cast_fp16)[name = string("normed_549_cast_fp16")]; tensor var_11448_split_sizes_0 = const()[name = string("op_11448_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_11448_axis_0 = const()[name = string("op_11448_axis_0"), val = int32(-1)]; tensor var_11448_cast_fp16_0, tensor var_11448_cast_fp16_1 = split(axis = var_11448_axis_0, split_sizes = var_11448_split_sizes_0, x = normed_549_cast_fp16)[name = string("op_11448_cast_fp16")]; tensor const_333_to_fp16 = const()[name = string("const_333_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2278925888)))]; tensor var_11451_cast_fp16 = mul(x = var_11448_cast_fp16_0, y = const_333_to_fp16)[name = string("op_11451_cast_fp16")]; tensor var_11464 = const()[name = string("op_11464"), val = tensor([0, 2, 1])]; tensor input_499_axes_0 = const()[name = string("input_499_axes_0"), val = tensor([2])]; tensor var_11465 = transpose(perm = var_11464, x = var_11451_cast_fp16)[name = string("transpose_143")]; tensor input_499 = expand_dims(axes = input_499_axes_0, x = var_11465)[name = string("input_499")]; string gate_57_pad_type_0 = const()[name = string("gate_57_pad_type_0"), val = string("valid")]; tensor gate_57_strides_0 = const()[name = string("gate_57_strides_0"), val = tensor([1, 1])]; tensor gate_57_pad_0 = const()[name = string("gate_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_57_dilations_0 = const()[name = string("gate_57_dilations_0"), val = tensor([1, 1])]; int32 gate_57_groups_0 = const()[name = string("gate_57_groups_0"), val = int32(1)]; tensor gate_57 = conv(dilations = gate_57_dilations_0, groups = gate_57_groups_0, pad = gate_57_pad_0, pad_type = gate_57_pad_type_0, strides = gate_57_strides_0, weight = layers_14_mlp_gate_proj_weight_palettized, x = input_499)[name = string("gate_57")]; string up_29_pad_type_0 = const()[name = string("up_29_pad_type_0"), val = string("valid")]; tensor up_29_strides_0 = const()[name = string("up_29_strides_0"), val = tensor([1, 1])]; tensor up_29_pad_0 = const()[name = string("up_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_29_dilations_0 = const()[name = string("up_29_dilations_0"), val = tensor([1, 1])]; int32 up_29_groups_0 = const()[name = string("up_29_groups_0"), val = int32(1)]; tensor up_29 = conv(dilations = up_29_dilations_0, groups = up_29_groups_0, pad = up_29_pad_0, pad_type = up_29_pad_type_0, strides = up_29_strides_0, weight = layers_14_mlp_up_proj_weight_palettized, x = input_499)[name = string("up_29")]; string gate_59_mode_0 = const()[name = string("gate_59_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_59 = gelu(mode = gate_59_mode_0, x = gate_57)[name = string("gate_59")]; tensor input_501 = mul(x = gate_59, y = up_29)[name = string("input_501")]; string mlp_out_29_pad_type_0 = const()[name = string("mlp_out_29_pad_type_0"), val = string("valid")]; tensor mlp_out_29_strides_0 = const()[name = string("mlp_out_29_strides_0"), val = tensor([1, 1])]; tensor mlp_out_29_pad_0 = const()[name = string("mlp_out_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_29_dilations_0 = const()[name = string("mlp_out_29_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_29_groups_0 = const()[name = string("mlp_out_29_groups_0"), val = int32(1)]; tensor mlp_out_29 = conv(dilations = mlp_out_29_dilations_0, groups = mlp_out_29_groups_0, pad = mlp_out_29_pad_0, pad_type = mlp_out_29_pad_type_0, strides = mlp_out_29_strides_0, weight = layers_14_mlp_down_proj_weight_palettized, x = input_501)[name = string("mlp_out_29")]; tensor var_11505_axes_0 = const()[name = string("op_11505_axes_0"), val = tensor([2])]; tensor var_11505 = squeeze(axes = var_11505_axes_0, x = mlp_out_29)[name = string("op_11505")]; tensor var_11509 = const()[name = string("op_11509"), val = tensor([0, 2, 1])]; int32 var_11515 = const()[name = string("op_11515"), val = int32(-1)]; fp16 const_334_promoted_to_fp16 = const()[name = string("const_334_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_581 = transpose(perm = var_11509, x = var_11505)[name = string("transpose_142")]; tensor var_11521_cast_fp16 = mul(x = x_581, y = const_334_promoted_to_fp16)[name = string("op_11521_cast_fp16")]; bool input_503_interleave_0 = const()[name = string("input_503_interleave_0"), val = bool(false)]; tensor input_503_cast_fp16 = concat(axis = var_11515, interleave = input_503_interleave_0, values = (x_581, var_11521_cast_fp16))[name = string("input_503_cast_fp16")]; tensor normed_553_axes_0 = const()[name = string("normed_553_axes_0"), val = tensor([-1])]; fp16 var_11513_to_fp16 = const()[name = string("op_11513_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_553_cast_fp16 = layer_norm(axes = normed_553_axes_0, epsilon = var_11513_to_fp16, x = input_503_cast_fp16)[name = string("normed_553_cast_fp16")]; tensor var_11526_split_sizes_0 = const()[name = string("op_11526_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_11526_axis_0 = const()[name = string("op_11526_axis_0"), val = int32(-1)]; tensor var_11526_cast_fp16_0, tensor var_11526_cast_fp16_1 = split(axis = var_11526_axis_0, split_sizes = var_11526_split_sizes_0, x = normed_553_cast_fp16)[name = string("op_11526_cast_fp16")]; tensor const_335_to_fp16 = const()[name = string("const_335_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2278929024)))]; tensor var_11529_cast_fp16 = mul(x = var_11526_cast_fp16_0, y = const_335_to_fp16)[name = string("op_11529_cast_fp16")]; tensor hidden_states_181_cast_fp16 = add(x = x_577_cast_fp16, y = var_11529_cast_fp16)[name = string("hidden_states_181_cast_fp16")]; tensor per_layer_slice_29_begin_0 = const()[name = string("per_layer_slice_29_begin_0"), val = tensor([0, 0, 3584])]; tensor per_layer_slice_29_end_0 = const()[name = string("per_layer_slice_29_end_0"), val = tensor([1, 1, 3840])]; tensor per_layer_slice_29_end_mask_0 = const()[name = string("per_layer_slice_29_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_29 = slice_by_index(begin = per_layer_slice_29_begin_0, end = per_layer_slice_29_end_0, end_mask = per_layer_slice_29_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_29")]; tensor gated_57 = linear(bias = linear_1_bias_0, weight = layers_14_per_layer_input_gate_weight_palettized, x = hidden_states_181_cast_fp16)[name = string("linear_29")]; string gated_59_mode_0 = const()[name = string("gated_59_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_59 = gelu(mode = gated_59_mode_0, x = gated_57)[name = string("gated_59")]; tensor input_507 = mul(x = gated_59, y = per_layer_slice_29)[name = string("input_507")]; tensor x_585 = linear(bias = linear_2_bias_0, weight = layers_14_per_layer_projection_weight_palettized, x = input_507)[name = string("linear_30")]; int32 var_11566 = const()[name = string("op_11566"), val = int32(-1)]; fp16 const_336_promoted_to_fp16 = const()[name = string("const_336_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_11572_cast_fp16 = mul(x = x_585, y = const_336_promoted_to_fp16)[name = string("op_11572_cast_fp16")]; bool input_509_interleave_0 = const()[name = string("input_509_interleave_0"), val = bool(false)]; tensor input_509_cast_fp16 = concat(axis = var_11566, interleave = input_509_interleave_0, values = (x_585, var_11572_cast_fp16))[name = string("input_509_cast_fp16")]; tensor normed_557_axes_0 = const()[name = string("normed_557_axes_0"), val = tensor([-1])]; fp16 var_11564_to_fp16 = const()[name = string("op_11564_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_557_cast_fp16 = layer_norm(axes = normed_557_axes_0, epsilon = var_11564_to_fp16, x = input_509_cast_fp16)[name = string("normed_557_cast_fp16")]; tensor var_11577_split_sizes_0 = const()[name = string("op_11577_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_11577_axis_0 = const()[name = string("op_11577_axis_0"), val = int32(-1)]; tensor var_11577_cast_fp16_0, tensor var_11577_cast_fp16_1 = split(axis = var_11577_axis_0, split_sizes = var_11577_split_sizes_0, x = normed_557_cast_fp16)[name = string("op_11577_cast_fp16")]; tensor const_337_to_fp16 = const()[name = string("const_337_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2278932160)))]; tensor var_11580_cast_fp16 = mul(x = var_11577_cast_fp16_0, y = const_337_to_fp16)[name = string("op_11580_cast_fp16")]; tensor hidden_states_185_cast_fp16 = add(x = hidden_states_181_cast_fp16, y = var_11580_cast_fp16)[name = string("hidden_states_185_cast_fp16")]; tensor layers_14_layer_scalar_to_fp16 = const()[name = string("layers_14_layer_scalar_to_fp16"), val = tensor([0x1.d4p-6])]; tensor x_589_cast_fp16 = mul(x = hidden_states_185_cast_fp16, y = layers_14_layer_scalar_to_fp16)[name = string("x_589_cast_fp16")]; int32 var_11588 = const()[name = string("op_11588"), val = int32(-1)]; fp16 const_338_promoted_to_fp16 = const()[name = string("const_338_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_11594_cast_fp16 = mul(x = x_589_cast_fp16, y = const_338_promoted_to_fp16)[name = string("op_11594_cast_fp16")]; bool input_511_interleave_0 = const()[name = string("input_511_interleave_0"), val = bool(false)]; tensor input_511_cast_fp16 = concat(axis = var_11588, interleave = input_511_interleave_0, values = (x_589_cast_fp16, var_11594_cast_fp16))[name = string("input_511_cast_fp16")]; tensor normed_561_axes_0 = const()[name = string("normed_561_axes_0"), val = tensor([-1])]; fp16 var_11586_to_fp16 = const()[name = string("op_11586_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_561_cast_fp16 = layer_norm(axes = normed_561_axes_0, epsilon = var_11586_to_fp16, x = input_511_cast_fp16)[name = string("normed_561_cast_fp16")]; tensor var_11599_split_sizes_0 = const()[name = string("op_11599_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_11599_axis_0 = const()[name = string("op_11599_axis_0"), val = int32(-1)]; tensor var_11599_cast_fp16_0, tensor var_11599_cast_fp16_1 = split(axis = var_11599_axis_0, split_sizes = var_11599_split_sizes_0, x = normed_561_cast_fp16)[name = string("op_11599_cast_fp16")]; tensor const_339_to_fp16 = const()[name = string("const_339_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2278935296)))]; tensor var_11602_cast_fp16 = mul(x = var_11599_cast_fp16_0, y = const_339_to_fp16)[name = string("op_11602_cast_fp16")]; tensor var_11610 = const()[name = string("op_11610"), val = tensor([0, 2, 1])]; tensor var_11613_axes_0 = const()[name = string("op_11613_axes_0"), val = tensor([2])]; tensor var_11611_cast_fp16 = transpose(perm = var_11610, x = var_11602_cast_fp16)[name = string("transpose_141")]; tensor var_11613_cast_fp16 = expand_dims(axes = var_11613_axes_0, x = var_11611_cast_fp16)[name = string("op_11613_cast_fp16")]; string var_11629_pad_type_0 = const()[name = string("op_11629_pad_type_0"), val = string("valid")]; tensor var_11629_strides_0 = const()[name = string("op_11629_strides_0"), val = tensor([1, 1])]; tensor var_11629_pad_0 = const()[name = string("op_11629_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_11629_dilations_0 = const()[name = string("op_11629_dilations_0"), val = tensor([1, 1])]; int32 var_11629_groups_0 = const()[name = string("op_11629_groups_0"), val = int32(1)]; tensor var_11629 = conv(dilations = var_11629_dilations_0, groups = var_11629_groups_0, pad = var_11629_pad_0, pad_type = var_11629_pad_type_0, strides = var_11629_strides_0, weight = layers_15_self_attn_q_proj_weight_palettized, x = var_11613_cast_fp16)[name = string("op_11629")]; tensor var_11634 = const()[name = string("op_11634"), val = tensor([1, 8, 256, 1])]; tensor var_11635 = reshape(shape = var_11634, x = var_11629)[name = string("op_11635")]; tensor var_11640 = const()[name = string("op_11640"), val = tensor([0, 1, 3, 2])]; tensor var_11650 = const()[name = string("op_11650"), val = tensor([1, 8, 256])]; tensor var_11641 = transpose(perm = var_11640, x = var_11635)[name = string("transpose_140")]; tensor x_593 = reshape(shape = var_11650, x = var_11641)[name = string("x_593")]; int32 var_11656 = const()[name = string("op_11656"), val = int32(-1)]; fp16 const_340_promoted_to_fp16 = const()[name = string("const_340_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_11662_cast_fp16 = mul(x = x_593, y = const_340_promoted_to_fp16)[name = string("op_11662_cast_fp16")]; bool input_515_interleave_0 = const()[name = string("input_515_interleave_0"), val = bool(false)]; tensor input_515_cast_fp16 = concat(axis = var_11656, interleave = input_515_interleave_0, values = (x_593, var_11662_cast_fp16))[name = string("input_515_cast_fp16")]; tensor normed_565_axes_0 = const()[name = string("normed_565_axes_0"), val = tensor([-1])]; fp16 var_11654_to_fp16 = const()[name = string("op_11654_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_565_cast_fp16 = layer_norm(axes = normed_565_axes_0, epsilon = var_11654_to_fp16, x = input_515_cast_fp16)[name = string("normed_565_cast_fp16")]; tensor var_11667_split_sizes_0 = const()[name = string("op_11667_split_sizes_0"), val = tensor([256, 256])]; int32 var_11667_axis_0 = const()[name = string("op_11667_axis_0"), val = int32(-1)]; tensor var_11667_cast_fp16_0, tensor var_11667_cast_fp16_1 = split(axis = var_11667_axis_0, split_sizes = var_11667_split_sizes_0, x = normed_565_cast_fp16)[name = string("op_11667_cast_fp16")]; tensor var_11670_cast_fp16 = mul(x = var_11667_cast_fp16_0, y = const_307_to_fp16)[name = string("op_11670_cast_fp16")]; tensor var_11676 = const()[name = string("op_11676"), val = tensor([1, 8, 1, 256])]; tensor q_123 = reshape(shape = var_11676, x = var_11670_cast_fp16)[name = string("q_123")]; tensor var_11678 = mul(x = q_123, y = cos_1)[name = string("op_11678")]; tensor var_11679_split_sizes_0 = const()[name = string("op_11679_split_sizes_0"), val = tensor([128, 128])]; int32 var_11679_axis_0 = const()[name = string("op_11679_axis_0"), val = int32(-1)]; tensor var_11679_0, tensor var_11679_1 = split(axis = var_11679_axis_0, split_sizes = var_11679_split_sizes_0, x = q_123)[name = string("op_11679")]; fp16 const_342_promoted = const()[name = string("const_342_promoted"), val = fp16(-0x1p+0)]; tensor var_11681 = mul(x = var_11679_1, y = const_342_promoted)[name = string("op_11681")]; int32 var_11683 = const()[name = string("op_11683"), val = int32(-1)]; bool var_11684_interleave_0 = const()[name = string("op_11684_interleave_0"), val = bool(false)]; tensor var_11684 = concat(axis = var_11683, interleave = var_11684_interleave_0, values = (var_11681, var_11679_0))[name = string("op_11684")]; tensor var_11685 = mul(x = var_11684, y = sin_1)[name = string("op_11685")]; tensor q_125 = add(x = var_11678, y = var_11685)[name = string("q_125")]; bool var_11709_transpose_x_0 = const()[name = string("op_11709_transpose_x_0"), val = bool(false)]; bool var_11709_transpose_y_0 = const()[name = string("op_11709_transpose_y_0"), val = bool(false)]; tensor var_11709_cast_fp16 = matmul(transpose_x = var_11709_transpose_x_0, transpose_y = var_11709_transpose_y_0, x = q_125, y = transpose_153_cast_fp16)[name = string("op_11709_cast_fp16")]; tensor attn_weights_93_cast_fp16 = add(x = var_11709_cast_fp16, y = causal_mask)[name = string("attn_weights_93_cast_fp16")]; int32 var_11719 = const()[name = string("op_11719"), val = int32(-1)]; tensor var_11721_cast_fp16 = softmax(axis = var_11719, x = attn_weights_93_cast_fp16)[name = string("op_11721_cast_fp16")]; bool var_11737_transpose_x_0 = const()[name = string("op_11737_transpose_x_0"), val = bool(false)]; bool var_11737_transpose_y_0 = const()[name = string("op_11737_transpose_y_0"), val = bool(false)]; tensor var_11737_cast_fp16 = matmul(transpose_x = var_11737_transpose_x_0, transpose_y = var_11737_transpose_y_0, x = var_11721_cast_fp16, y = V_expanded_27_cast_fp16)[name = string("op_11737_cast_fp16")]; tensor var_11747 = const()[name = string("op_11747"), val = tensor([0, 2, 1, 3])]; tensor var_11754 = const()[name = string("op_11754"), val = tensor([1, 1, -1])]; tensor var_11748 = transpose(perm = var_11747, x = var_11737_cast_fp16)[name = string("transpose_139")]; tensor attn_output_93 = reshape(shape = var_11754, x = var_11748)[name = string("attn_output_93")]; tensor var_11759 = const()[name = string("op_11759"), val = tensor([0, 2, 1])]; tensor squeeze_15_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2278938432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2280511360))))[name = string("squeeze_15_palettized")]; string var_11775_pad_type_0 = const()[name = string("op_11775_pad_type_0"), val = string("valid")]; int32 var_11775_groups_0 = const()[name = string("op_11775_groups_0"), val = int32(1)]; tensor var_11775_strides_0 = const()[name = string("op_11775_strides_0"), val = tensor([1])]; tensor var_11775_pad_0 = const()[name = string("op_11775_pad_0"), val = tensor([0, 0])]; tensor var_11775_dilations_0 = const()[name = string("op_11775_dilations_0"), val = tensor([1])]; tensor var_11760 = transpose(perm = var_11759, x = attn_output_93)[name = string("transpose_138")]; tensor var_11775 = conv(dilations = var_11775_dilations_0, groups = var_11775_groups_0, pad = var_11775_pad_0, pad_type = var_11775_pad_type_0, strides = var_11775_strides_0, weight = squeeze_15_palettized, x = var_11760)[name = string("op_11775")]; tensor var_11779 = const()[name = string("op_11779"), val = tensor([0, 2, 1])]; int32 var_11785 = const()[name = string("op_11785"), val = int32(-1)]; fp16 const_343_promoted_to_fp16 = const()[name = string("const_343_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_597 = transpose(perm = var_11779, x = var_11775)[name = string("transpose_137")]; tensor var_11791_cast_fp16 = mul(x = x_597, y = const_343_promoted_to_fp16)[name = string("op_11791_cast_fp16")]; bool input_519_interleave_0 = const()[name = string("input_519_interleave_0"), val = bool(false)]; tensor input_519_cast_fp16 = concat(axis = var_11785, interleave = input_519_interleave_0, values = (x_597, var_11791_cast_fp16))[name = string("input_519_cast_fp16")]; tensor normed_569_axes_0 = const()[name = string("normed_569_axes_0"), val = tensor([-1])]; fp16 var_11783_to_fp16 = const()[name = string("op_11783_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_569_cast_fp16 = layer_norm(axes = normed_569_axes_0, epsilon = var_11783_to_fp16, x = input_519_cast_fp16)[name = string("normed_569_cast_fp16")]; tensor var_11796_split_sizes_0 = const()[name = string("op_11796_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_11796_axis_0 = const()[name = string("op_11796_axis_0"), val = int32(-1)]; tensor var_11796_cast_fp16_0, tensor var_11796_cast_fp16_1 = split(axis = var_11796_axis_0, split_sizes = var_11796_split_sizes_0, x = normed_569_cast_fp16)[name = string("op_11796_cast_fp16")]; tensor const_344_to_fp16 = const()[name = string("const_344_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2280512960)))]; tensor var_11799_cast_fp16 = mul(x = var_11796_cast_fp16_0, y = const_344_to_fp16)[name = string("op_11799_cast_fp16")]; tensor x_601_cast_fp16 = add(x = x_589_cast_fp16, y = var_11799_cast_fp16)[name = string("x_601_cast_fp16")]; int32 var_11806 = const()[name = string("op_11806"), val = int32(-1)]; fp16 const_345_promoted_to_fp16 = const()[name = string("const_345_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_11812_cast_fp16 = mul(x = x_601_cast_fp16, y = const_345_promoted_to_fp16)[name = string("op_11812_cast_fp16")]; bool input_521_interleave_0 = const()[name = string("input_521_interleave_0"), val = bool(false)]; tensor input_521_cast_fp16 = concat(axis = var_11806, interleave = input_521_interleave_0, values = (x_601_cast_fp16, var_11812_cast_fp16))[name = string("input_521_cast_fp16")]; tensor normed_573_axes_0 = const()[name = string("normed_573_axes_0"), val = tensor([-1])]; fp16 var_11804_to_fp16 = const()[name = string("op_11804_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_573_cast_fp16 = layer_norm(axes = normed_573_axes_0, epsilon = var_11804_to_fp16, x = input_521_cast_fp16)[name = string("normed_573_cast_fp16")]; tensor var_11817_split_sizes_0 = const()[name = string("op_11817_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_11817_axis_0 = const()[name = string("op_11817_axis_0"), val = int32(-1)]; tensor var_11817_cast_fp16_0, tensor var_11817_cast_fp16_1 = split(axis = var_11817_axis_0, split_sizes = var_11817_split_sizes_0, x = normed_573_cast_fp16)[name = string("op_11817_cast_fp16")]; tensor const_346_to_fp16 = const()[name = string("const_346_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2280516096)))]; tensor var_11820_cast_fp16 = mul(x = var_11817_cast_fp16_0, y = const_346_to_fp16)[name = string("op_11820_cast_fp16")]; tensor var_11833 = const()[name = string("op_11833"), val = tensor([0, 2, 1])]; tensor input_523_axes_0 = const()[name = string("input_523_axes_0"), val = tensor([2])]; tensor var_11834 = transpose(perm = var_11833, x = var_11820_cast_fp16)[name = string("transpose_136")]; tensor input_523 = expand_dims(axes = input_523_axes_0, x = var_11834)[name = string("input_523")]; string gate_61_pad_type_0 = const()[name = string("gate_61_pad_type_0"), val = string("valid")]; tensor gate_61_strides_0 = const()[name = string("gate_61_strides_0"), val = tensor([1, 1])]; tensor gate_61_pad_0 = const()[name = string("gate_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_61_dilations_0 = const()[name = string("gate_61_dilations_0"), val = tensor([1, 1])]; int32 gate_61_groups_0 = const()[name = string("gate_61_groups_0"), val = int32(1)]; tensor gate_61 = conv(dilations = gate_61_dilations_0, groups = gate_61_groups_0, pad = gate_61_pad_0, pad_type = gate_61_pad_type_0, strides = gate_61_strides_0, weight = layers_15_mlp_gate_proj_weight_palettized, x = input_523)[name = string("gate_61")]; string up_31_pad_type_0 = const()[name = string("up_31_pad_type_0"), val = string("valid")]; tensor up_31_strides_0 = const()[name = string("up_31_strides_0"), val = tensor([1, 1])]; tensor up_31_pad_0 = const()[name = string("up_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_31_dilations_0 = const()[name = string("up_31_dilations_0"), val = tensor([1, 1])]; int32 up_31_groups_0 = const()[name = string("up_31_groups_0"), val = int32(1)]; tensor up_31 = conv(dilations = up_31_dilations_0, groups = up_31_groups_0, pad = up_31_pad_0, pad_type = up_31_pad_type_0, strides = up_31_strides_0, weight = layers_15_mlp_up_proj_weight_palettized, x = input_523)[name = string("up_31")]; string gate_63_mode_0 = const()[name = string("gate_63_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_63 = gelu(mode = gate_63_mode_0, x = gate_61)[name = string("gate_63")]; tensor input_525 = mul(x = gate_63, y = up_31)[name = string("input_525")]; string mlp_out_31_pad_type_0 = const()[name = string("mlp_out_31_pad_type_0"), val = string("valid")]; tensor mlp_out_31_strides_0 = const()[name = string("mlp_out_31_strides_0"), val = tensor([1, 1])]; tensor mlp_out_31_pad_0 = const()[name = string("mlp_out_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_31_dilations_0 = const()[name = string("mlp_out_31_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_31_groups_0 = const()[name = string("mlp_out_31_groups_0"), val = int32(1)]; tensor mlp_out_31 = conv(dilations = mlp_out_31_dilations_0, groups = mlp_out_31_groups_0, pad = mlp_out_31_pad_0, pad_type = mlp_out_31_pad_type_0, strides = mlp_out_31_strides_0, weight = layers_15_mlp_down_proj_weight_palettized, x = input_525)[name = string("mlp_out_31")]; tensor var_11874_axes_0 = const()[name = string("op_11874_axes_0"), val = tensor([2])]; tensor var_11874 = squeeze(axes = var_11874_axes_0, x = mlp_out_31)[name = string("op_11874")]; tensor var_11878 = const()[name = string("op_11878"), val = tensor([0, 2, 1])]; int32 var_11884 = const()[name = string("op_11884"), val = int32(-1)]; fp16 const_347_promoted_to_fp16 = const()[name = string("const_347_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_605 = transpose(perm = var_11878, x = var_11874)[name = string("transpose_135")]; tensor var_11890_cast_fp16 = mul(x = x_605, y = const_347_promoted_to_fp16)[name = string("op_11890_cast_fp16")]; bool input_527_interleave_0 = const()[name = string("input_527_interleave_0"), val = bool(false)]; tensor input_527_cast_fp16 = concat(axis = var_11884, interleave = input_527_interleave_0, values = (x_605, var_11890_cast_fp16))[name = string("input_527_cast_fp16")]; tensor normed_577_axes_0 = const()[name = string("normed_577_axes_0"), val = tensor([-1])]; fp16 var_11882_to_fp16 = const()[name = string("op_11882_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_577_cast_fp16 = layer_norm(axes = normed_577_axes_0, epsilon = var_11882_to_fp16, x = input_527_cast_fp16)[name = string("normed_577_cast_fp16")]; tensor var_11895_split_sizes_0 = const()[name = string("op_11895_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_11895_axis_0 = const()[name = string("op_11895_axis_0"), val = int32(-1)]; tensor var_11895_cast_fp16_0, tensor var_11895_cast_fp16_1 = split(axis = var_11895_axis_0, split_sizes = var_11895_split_sizes_0, x = normed_577_cast_fp16)[name = string("op_11895_cast_fp16")]; tensor const_348_to_fp16 = const()[name = string("const_348_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2280519232)))]; tensor var_11898_cast_fp16 = mul(x = var_11895_cast_fp16_0, y = const_348_to_fp16)[name = string("op_11898_cast_fp16")]; tensor hidden_states_193_cast_fp16 = add(x = x_601_cast_fp16, y = var_11898_cast_fp16)[name = string("hidden_states_193_cast_fp16")]; tensor per_layer_slice_31_begin_0 = const()[name = string("per_layer_slice_31_begin_0"), val = tensor([0, 0, 3840])]; tensor per_layer_slice_31_end_0 = const()[name = string("per_layer_slice_31_end_0"), val = tensor([1, 1, 4096])]; tensor per_layer_slice_31_end_mask_0 = const()[name = string("per_layer_slice_31_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_31 = slice_by_index(begin = per_layer_slice_31_begin_0, end = per_layer_slice_31_end_0, end_mask = per_layer_slice_31_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_31")]; tensor gated_61 = linear(bias = linear_1_bias_0, weight = layers_15_per_layer_input_gate_weight_palettized, x = hidden_states_193_cast_fp16)[name = string("linear_31")]; string gated_63_mode_0 = const()[name = string("gated_63_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_63 = gelu(mode = gated_63_mode_0, x = gated_61)[name = string("gated_63")]; tensor input_531 = mul(x = gated_63, y = per_layer_slice_31)[name = string("input_531")]; tensor x_609 = linear(bias = linear_2_bias_0, weight = layers_15_per_layer_projection_weight_palettized, x = input_531)[name = string("linear_32")]; int32 var_11935 = const()[name = string("op_11935"), val = int32(-1)]; fp16 const_349_promoted_to_fp16 = const()[name = string("const_349_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_11941_cast_fp16 = mul(x = x_609, y = const_349_promoted_to_fp16)[name = string("op_11941_cast_fp16")]; bool input_533_interleave_0 = const()[name = string("input_533_interleave_0"), val = bool(false)]; tensor input_533_cast_fp16 = concat(axis = var_11935, interleave = input_533_interleave_0, values = (x_609, var_11941_cast_fp16))[name = string("input_533_cast_fp16")]; tensor normed_581_axes_0 = const()[name = string("normed_581_axes_0"), val = tensor([-1])]; fp16 var_11933_to_fp16 = const()[name = string("op_11933_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_581_cast_fp16 = layer_norm(axes = normed_581_axes_0, epsilon = var_11933_to_fp16, x = input_533_cast_fp16)[name = string("normed_581_cast_fp16")]; tensor var_11946_split_sizes_0 = const()[name = string("op_11946_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_11946_axis_0 = const()[name = string("op_11946_axis_0"), val = int32(-1)]; tensor var_11946_cast_fp16_0, tensor var_11946_cast_fp16_1 = split(axis = var_11946_axis_0, split_sizes = var_11946_split_sizes_0, x = normed_581_cast_fp16)[name = string("op_11946_cast_fp16")]; tensor const_350_to_fp16 = const()[name = string("const_350_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2280522368)))]; tensor var_11949_cast_fp16 = mul(x = var_11946_cast_fp16_0, y = const_350_to_fp16)[name = string("op_11949_cast_fp16")]; tensor hidden_states_197_cast_fp16 = add(x = hidden_states_193_cast_fp16, y = var_11949_cast_fp16)[name = string("hidden_states_197_cast_fp16")]; tensor layers_15_layer_scalar_to_fp16 = const()[name = string("layers_15_layer_scalar_to_fp16"), val = tensor([0x1.04p-2])]; tensor x_613_cast_fp16 = mul(x = hidden_states_197_cast_fp16, y = layers_15_layer_scalar_to_fp16)[name = string("x_613_cast_fp16")]; int32 var_11957 = const()[name = string("op_11957"), val = int32(-1)]; fp16 const_351_promoted_to_fp16 = const()[name = string("const_351_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_11963_cast_fp16 = mul(x = x_613_cast_fp16, y = const_351_promoted_to_fp16)[name = string("op_11963_cast_fp16")]; bool input_535_interleave_0 = const()[name = string("input_535_interleave_0"), val = bool(false)]; tensor input_535_cast_fp16 = concat(axis = var_11957, interleave = input_535_interleave_0, values = (x_613_cast_fp16, var_11963_cast_fp16))[name = string("input_535_cast_fp16")]; tensor normed_585_axes_0 = const()[name = string("normed_585_axes_0"), val = tensor([-1])]; fp16 var_11955_to_fp16 = const()[name = string("op_11955_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_585_cast_fp16 = layer_norm(axes = normed_585_axes_0, epsilon = var_11955_to_fp16, x = input_535_cast_fp16)[name = string("normed_585_cast_fp16")]; tensor var_11968_split_sizes_0 = const()[name = string("op_11968_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_11968_axis_0 = const()[name = string("op_11968_axis_0"), val = int32(-1)]; tensor var_11968_cast_fp16_0, tensor var_11968_cast_fp16_1 = split(axis = var_11968_axis_0, split_sizes = var_11968_split_sizes_0, x = normed_585_cast_fp16)[name = string("op_11968_cast_fp16")]; tensor const_352_to_fp16 = const()[name = string("const_352_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2280525504)))]; tensor var_11971_cast_fp16 = mul(x = var_11968_cast_fp16_0, y = const_352_to_fp16)[name = string("op_11971_cast_fp16")]; tensor var_11979 = const()[name = string("op_11979"), val = tensor([0, 2, 1])]; tensor var_11982_axes_0 = const()[name = string("op_11982_axes_0"), val = tensor([2])]; tensor var_11980_cast_fp16 = transpose(perm = var_11979, x = var_11971_cast_fp16)[name = string("transpose_134")]; tensor var_11982_cast_fp16 = expand_dims(axes = var_11982_axes_0, x = var_11980_cast_fp16)[name = string("op_11982_cast_fp16")]; string var_11998_pad_type_0 = const()[name = string("op_11998_pad_type_0"), val = string("valid")]; tensor var_11998_strides_0 = const()[name = string("op_11998_strides_0"), val = tensor([1, 1])]; tensor var_11998_pad_0 = const()[name = string("op_11998_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_11998_dilations_0 = const()[name = string("op_11998_dilations_0"), val = tensor([1, 1])]; int32 var_11998_groups_0 = const()[name = string("op_11998_groups_0"), val = int32(1)]; tensor var_11998 = conv(dilations = var_11998_dilations_0, groups = var_11998_groups_0, pad = var_11998_pad_0, pad_type = var_11998_pad_type_0, strides = var_11998_strides_0, weight = layers_16_self_attn_q_proj_weight_palettized, x = var_11982_cast_fp16)[name = string("op_11998")]; tensor var_12003 = const()[name = string("op_12003"), val = tensor([1, 8, 256, 1])]; tensor var_12004 = reshape(shape = var_12003, x = var_11998)[name = string("op_12004")]; tensor var_12009 = const()[name = string("op_12009"), val = tensor([0, 1, 3, 2])]; tensor var_12019 = const()[name = string("op_12019"), val = tensor([1, 8, 256])]; tensor var_12010 = transpose(perm = var_12009, x = var_12004)[name = string("transpose_133")]; tensor x_617 = reshape(shape = var_12019, x = var_12010)[name = string("x_617")]; int32 var_12025 = const()[name = string("op_12025"), val = int32(-1)]; fp16 const_353_promoted_to_fp16 = const()[name = string("const_353_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_12031_cast_fp16 = mul(x = x_617, y = const_353_promoted_to_fp16)[name = string("op_12031_cast_fp16")]; bool input_539_interleave_0 = const()[name = string("input_539_interleave_0"), val = bool(false)]; tensor input_539_cast_fp16 = concat(axis = var_12025, interleave = input_539_interleave_0, values = (x_617, var_12031_cast_fp16))[name = string("input_539_cast_fp16")]; tensor normed_589_axes_0 = const()[name = string("normed_589_axes_0"), val = tensor([-1])]; fp16 var_12023_to_fp16 = const()[name = string("op_12023_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_589_cast_fp16 = layer_norm(axes = normed_589_axes_0, epsilon = var_12023_to_fp16, x = input_539_cast_fp16)[name = string("normed_589_cast_fp16")]; tensor var_12036_split_sizes_0 = const()[name = string("op_12036_split_sizes_0"), val = tensor([256, 256])]; int32 var_12036_axis_0 = const()[name = string("op_12036_axis_0"), val = int32(-1)]; tensor var_12036_cast_fp16_0, tensor var_12036_cast_fp16_1 = split(axis = var_12036_axis_0, split_sizes = var_12036_split_sizes_0, x = normed_589_cast_fp16)[name = string("op_12036_cast_fp16")]; tensor var_12039_cast_fp16 = mul(x = var_12036_cast_fp16_0, y = const_307_to_fp16)[name = string("op_12039_cast_fp16")]; tensor var_12045 = const()[name = string("op_12045"), val = tensor([1, 8, 1, 256])]; tensor q_129 = reshape(shape = var_12045, x = var_12039_cast_fp16)[name = string("q_129")]; tensor var_12047 = mul(x = q_129, y = cos_1)[name = string("op_12047")]; tensor var_12048_split_sizes_0 = const()[name = string("op_12048_split_sizes_0"), val = tensor([128, 128])]; int32 var_12048_axis_0 = const()[name = string("op_12048_axis_0"), val = int32(-1)]; tensor var_12048_0, tensor var_12048_1 = split(axis = var_12048_axis_0, split_sizes = var_12048_split_sizes_0, x = q_129)[name = string("op_12048")]; fp16 const_355_promoted = const()[name = string("const_355_promoted"), val = fp16(-0x1p+0)]; tensor var_12050 = mul(x = var_12048_1, y = const_355_promoted)[name = string("op_12050")]; int32 var_12052 = const()[name = string("op_12052"), val = int32(-1)]; bool var_12053_interleave_0 = const()[name = string("op_12053_interleave_0"), val = bool(false)]; tensor var_12053 = concat(axis = var_12052, interleave = var_12053_interleave_0, values = (var_12050, var_12048_0))[name = string("op_12053")]; tensor var_12054 = mul(x = var_12053, y = sin_1)[name = string("op_12054")]; tensor q_131 = add(x = var_12047, y = var_12054)[name = string("q_131")]; bool var_12078_transpose_x_0 = const()[name = string("op_12078_transpose_x_0"), val = bool(false)]; bool var_12078_transpose_y_0 = const()[name = string("op_12078_transpose_y_0"), val = bool(false)]; tensor var_12078_cast_fp16 = matmul(transpose_x = var_12078_transpose_x_0, transpose_y = var_12078_transpose_y_0, x = q_131, y = transpose_153_cast_fp16)[name = string("op_12078_cast_fp16")]; tensor attn_weights_99_cast_fp16 = add(x = var_12078_cast_fp16, y = causal_mask)[name = string("attn_weights_99_cast_fp16")]; int32 var_12088 = const()[name = string("op_12088"), val = int32(-1)]; tensor var_12090_cast_fp16 = softmax(axis = var_12088, x = attn_weights_99_cast_fp16)[name = string("op_12090_cast_fp16")]; bool var_12106_transpose_x_0 = const()[name = string("op_12106_transpose_x_0"), val = bool(false)]; bool var_12106_transpose_y_0 = const()[name = string("op_12106_transpose_y_0"), val = bool(false)]; tensor var_12106_cast_fp16 = matmul(transpose_x = var_12106_transpose_x_0, transpose_y = var_12106_transpose_y_0, x = var_12090_cast_fp16, y = V_expanded_27_cast_fp16)[name = string("op_12106_cast_fp16")]; tensor var_12116 = const()[name = string("op_12116"), val = tensor([0, 2, 1, 3])]; tensor var_12123 = const()[name = string("op_12123"), val = tensor([1, 1, -1])]; tensor var_12117 = transpose(perm = var_12116, x = var_12106_cast_fp16)[name = string("transpose_132")]; tensor attn_output_99 = reshape(shape = var_12123, x = var_12117)[name = string("attn_output_99")]; tensor var_12128 = const()[name = string("op_12128"), val = tensor([0, 2, 1])]; tensor squeeze_16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2280528640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2282101568))))[name = string("squeeze_16_palettized")]; string var_12144_pad_type_0 = const()[name = string("op_12144_pad_type_0"), val = string("valid")]; int32 var_12144_groups_0 = const()[name = string("op_12144_groups_0"), val = int32(1)]; tensor var_12144_strides_0 = const()[name = string("op_12144_strides_0"), val = tensor([1])]; tensor var_12144_pad_0 = const()[name = string("op_12144_pad_0"), val = tensor([0, 0])]; tensor var_12144_dilations_0 = const()[name = string("op_12144_dilations_0"), val = tensor([1])]; tensor var_12129 = transpose(perm = var_12128, x = attn_output_99)[name = string("transpose_131")]; tensor var_12144 = conv(dilations = var_12144_dilations_0, groups = var_12144_groups_0, pad = var_12144_pad_0, pad_type = var_12144_pad_type_0, strides = var_12144_strides_0, weight = squeeze_16_palettized, x = var_12129)[name = string("op_12144")]; tensor var_12148 = const()[name = string("op_12148"), val = tensor([0, 2, 1])]; int32 var_12154 = const()[name = string("op_12154"), val = int32(-1)]; fp16 const_356_promoted_to_fp16 = const()[name = string("const_356_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_621 = transpose(perm = var_12148, x = var_12144)[name = string("transpose_130")]; tensor var_12160_cast_fp16 = mul(x = x_621, y = const_356_promoted_to_fp16)[name = string("op_12160_cast_fp16")]; bool input_543_interleave_0 = const()[name = string("input_543_interleave_0"), val = bool(false)]; tensor input_543_cast_fp16 = concat(axis = var_12154, interleave = input_543_interleave_0, values = (x_621, var_12160_cast_fp16))[name = string("input_543_cast_fp16")]; tensor normed_593_axes_0 = const()[name = string("normed_593_axes_0"), val = tensor([-1])]; fp16 var_12152_to_fp16 = const()[name = string("op_12152_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_593_cast_fp16 = layer_norm(axes = normed_593_axes_0, epsilon = var_12152_to_fp16, x = input_543_cast_fp16)[name = string("normed_593_cast_fp16")]; tensor var_12165_split_sizes_0 = const()[name = string("op_12165_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_12165_axis_0 = const()[name = string("op_12165_axis_0"), val = int32(-1)]; tensor var_12165_cast_fp16_0, tensor var_12165_cast_fp16_1 = split(axis = var_12165_axis_0, split_sizes = var_12165_split_sizes_0, x = normed_593_cast_fp16)[name = string("op_12165_cast_fp16")]; tensor const_357_to_fp16 = const()[name = string("const_357_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2282103168)))]; tensor var_12168_cast_fp16 = mul(x = var_12165_cast_fp16_0, y = const_357_to_fp16)[name = string("op_12168_cast_fp16")]; tensor x_625_cast_fp16 = add(x = x_613_cast_fp16, y = var_12168_cast_fp16)[name = string("x_625_cast_fp16")]; int32 var_12175 = const()[name = string("op_12175"), val = int32(-1)]; fp16 const_358_promoted_to_fp16 = const()[name = string("const_358_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_12181_cast_fp16 = mul(x = x_625_cast_fp16, y = const_358_promoted_to_fp16)[name = string("op_12181_cast_fp16")]; bool input_545_interleave_0 = const()[name = string("input_545_interleave_0"), val = bool(false)]; tensor input_545_cast_fp16 = concat(axis = var_12175, interleave = input_545_interleave_0, values = (x_625_cast_fp16, var_12181_cast_fp16))[name = string("input_545_cast_fp16")]; tensor normed_597_axes_0 = const()[name = string("normed_597_axes_0"), val = tensor([-1])]; fp16 var_12173_to_fp16 = const()[name = string("op_12173_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_597_cast_fp16 = layer_norm(axes = normed_597_axes_0, epsilon = var_12173_to_fp16, x = input_545_cast_fp16)[name = string("normed_597_cast_fp16")]; tensor var_12186_split_sizes_0 = const()[name = string("op_12186_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_12186_axis_0 = const()[name = string("op_12186_axis_0"), val = int32(-1)]; tensor var_12186_cast_fp16_0, tensor var_12186_cast_fp16_1 = split(axis = var_12186_axis_0, split_sizes = var_12186_split_sizes_0, x = normed_597_cast_fp16)[name = string("op_12186_cast_fp16")]; tensor const_359_to_fp16 = const()[name = string("const_359_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2282106304)))]; tensor var_12189_cast_fp16 = mul(x = var_12186_cast_fp16_0, y = const_359_to_fp16)[name = string("op_12189_cast_fp16")]; tensor var_12202 = const()[name = string("op_12202"), val = tensor([0, 2, 1])]; tensor input_547_axes_0 = const()[name = string("input_547_axes_0"), val = tensor([2])]; tensor var_12203 = transpose(perm = var_12202, x = var_12189_cast_fp16)[name = string("transpose_129")]; tensor input_547 = expand_dims(axes = input_547_axes_0, x = var_12203)[name = string("input_547")]; string gate_65_pad_type_0 = const()[name = string("gate_65_pad_type_0"), val = string("valid")]; tensor gate_65_strides_0 = const()[name = string("gate_65_strides_0"), val = tensor([1, 1])]; tensor gate_65_pad_0 = const()[name = string("gate_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_65_dilations_0 = const()[name = string("gate_65_dilations_0"), val = tensor([1, 1])]; int32 gate_65_groups_0 = const()[name = string("gate_65_groups_0"), val = int32(1)]; tensor gate_65 = conv(dilations = gate_65_dilations_0, groups = gate_65_groups_0, pad = gate_65_pad_0, pad_type = gate_65_pad_type_0, strides = gate_65_strides_0, weight = layers_16_mlp_gate_proj_weight_palettized, x = input_547)[name = string("gate_65")]; string up_33_pad_type_0 = const()[name = string("up_33_pad_type_0"), val = string("valid")]; tensor up_33_strides_0 = const()[name = string("up_33_strides_0"), val = tensor([1, 1])]; tensor up_33_pad_0 = const()[name = string("up_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_33_dilations_0 = const()[name = string("up_33_dilations_0"), val = tensor([1, 1])]; int32 up_33_groups_0 = const()[name = string("up_33_groups_0"), val = int32(1)]; tensor up_33 = conv(dilations = up_33_dilations_0, groups = up_33_groups_0, pad = up_33_pad_0, pad_type = up_33_pad_type_0, strides = up_33_strides_0, weight = layers_16_mlp_up_proj_weight_palettized, x = input_547)[name = string("up_33")]; string gate_67_mode_0 = const()[name = string("gate_67_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_67 = gelu(mode = gate_67_mode_0, x = gate_65)[name = string("gate_67")]; tensor input_549 = mul(x = gate_67, y = up_33)[name = string("input_549")]; string mlp_out_33_pad_type_0 = const()[name = string("mlp_out_33_pad_type_0"), val = string("valid")]; tensor mlp_out_33_strides_0 = const()[name = string("mlp_out_33_strides_0"), val = tensor([1, 1])]; tensor mlp_out_33_pad_0 = const()[name = string("mlp_out_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_33_dilations_0 = const()[name = string("mlp_out_33_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_33_groups_0 = const()[name = string("mlp_out_33_groups_0"), val = int32(1)]; tensor mlp_out_33 = conv(dilations = mlp_out_33_dilations_0, groups = mlp_out_33_groups_0, pad = mlp_out_33_pad_0, pad_type = mlp_out_33_pad_type_0, strides = mlp_out_33_strides_0, weight = layers_16_mlp_down_proj_weight_palettized, x = input_549)[name = string("mlp_out_33")]; tensor var_12243_axes_0 = const()[name = string("op_12243_axes_0"), val = tensor([2])]; tensor var_12243 = squeeze(axes = var_12243_axes_0, x = mlp_out_33)[name = string("op_12243")]; tensor var_12247 = const()[name = string("op_12247"), val = tensor([0, 2, 1])]; int32 var_12253 = const()[name = string("op_12253"), val = int32(-1)]; fp16 const_360_promoted_to_fp16 = const()[name = string("const_360_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_629 = transpose(perm = var_12247, x = var_12243)[name = string("transpose_128")]; tensor var_12259_cast_fp16 = mul(x = x_629, y = const_360_promoted_to_fp16)[name = string("op_12259_cast_fp16")]; bool input_551_interleave_0 = const()[name = string("input_551_interleave_0"), val = bool(false)]; tensor input_551_cast_fp16 = concat(axis = var_12253, interleave = input_551_interleave_0, values = (x_629, var_12259_cast_fp16))[name = string("input_551_cast_fp16")]; tensor normed_601_axes_0 = const()[name = string("normed_601_axes_0"), val = tensor([-1])]; fp16 var_12251_to_fp16 = const()[name = string("op_12251_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_601_cast_fp16 = layer_norm(axes = normed_601_axes_0, epsilon = var_12251_to_fp16, x = input_551_cast_fp16)[name = string("normed_601_cast_fp16")]; tensor var_12264_split_sizes_0 = const()[name = string("op_12264_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_12264_axis_0 = const()[name = string("op_12264_axis_0"), val = int32(-1)]; tensor var_12264_cast_fp16_0, tensor var_12264_cast_fp16_1 = split(axis = var_12264_axis_0, split_sizes = var_12264_split_sizes_0, x = normed_601_cast_fp16)[name = string("op_12264_cast_fp16")]; tensor const_361_to_fp16 = const()[name = string("const_361_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2282109440)))]; tensor var_12267_cast_fp16 = mul(x = var_12264_cast_fp16_0, y = const_361_to_fp16)[name = string("op_12267_cast_fp16")]; tensor hidden_states_205_cast_fp16 = add(x = x_625_cast_fp16, y = var_12267_cast_fp16)[name = string("hidden_states_205_cast_fp16")]; tensor per_layer_slice_33_begin_0 = const()[name = string("per_layer_slice_33_begin_0"), val = tensor([0, 0, 4096])]; tensor per_layer_slice_33_end_0 = const()[name = string("per_layer_slice_33_end_0"), val = tensor([1, 1, 4352])]; tensor per_layer_slice_33_end_mask_0 = const()[name = string("per_layer_slice_33_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_33 = slice_by_index(begin = per_layer_slice_33_begin_0, end = per_layer_slice_33_end_0, end_mask = per_layer_slice_33_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_33")]; tensor gated_65 = linear(bias = linear_1_bias_0, weight = layers_16_per_layer_input_gate_weight_palettized, x = hidden_states_205_cast_fp16)[name = string("linear_33")]; string gated_67_mode_0 = const()[name = string("gated_67_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_67 = gelu(mode = gated_67_mode_0, x = gated_65)[name = string("gated_67")]; tensor input_555 = mul(x = gated_67, y = per_layer_slice_33)[name = string("input_555")]; tensor x_633 = linear(bias = linear_2_bias_0, weight = layers_16_per_layer_projection_weight_palettized, x = input_555)[name = string("linear_34")]; int32 var_12304 = const()[name = string("op_12304"), val = int32(-1)]; fp16 const_362_promoted_to_fp16 = const()[name = string("const_362_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_12310_cast_fp16 = mul(x = x_633, y = const_362_promoted_to_fp16)[name = string("op_12310_cast_fp16")]; bool input_557_interleave_0 = const()[name = string("input_557_interleave_0"), val = bool(false)]; tensor input_557_cast_fp16 = concat(axis = var_12304, interleave = input_557_interleave_0, values = (x_633, var_12310_cast_fp16))[name = string("input_557_cast_fp16")]; tensor normed_605_axes_0 = const()[name = string("normed_605_axes_0"), val = tensor([-1])]; fp16 var_12302_to_fp16 = const()[name = string("op_12302_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_605_cast_fp16 = layer_norm(axes = normed_605_axes_0, epsilon = var_12302_to_fp16, x = input_557_cast_fp16)[name = string("normed_605_cast_fp16")]; tensor var_12315_split_sizes_0 = const()[name = string("op_12315_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_12315_axis_0 = const()[name = string("op_12315_axis_0"), val = int32(-1)]; tensor var_12315_cast_fp16_0, tensor var_12315_cast_fp16_1 = split(axis = var_12315_axis_0, split_sizes = var_12315_split_sizes_0, x = normed_605_cast_fp16)[name = string("op_12315_cast_fp16")]; tensor const_363_to_fp16 = const()[name = string("const_363_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2282112576)))]; tensor var_12318_cast_fp16 = mul(x = var_12315_cast_fp16_0, y = const_363_to_fp16)[name = string("op_12318_cast_fp16")]; tensor hidden_states_209_cast_fp16 = add(x = hidden_states_205_cast_fp16, y = var_12318_cast_fp16)[name = string("hidden_states_209_cast_fp16")]; tensor layers_16_layer_scalar_to_fp16 = const()[name = string("layers_16_layer_scalar_to_fp16"), val = tensor([0x1.2cp-1])]; tensor x_637_cast_fp16 = mul(x = hidden_states_209_cast_fp16, y = layers_16_layer_scalar_to_fp16)[name = string("x_637_cast_fp16")]; int32 var_12326 = const()[name = string("op_12326"), val = int32(-1)]; fp16 const_364_promoted_to_fp16 = const()[name = string("const_364_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_12332_cast_fp16 = mul(x = x_637_cast_fp16, y = const_364_promoted_to_fp16)[name = string("op_12332_cast_fp16")]; bool input_559_interleave_0 = const()[name = string("input_559_interleave_0"), val = bool(false)]; tensor input_559_cast_fp16 = concat(axis = var_12326, interleave = input_559_interleave_0, values = (x_637_cast_fp16, var_12332_cast_fp16))[name = string("input_559_cast_fp16")]; tensor normed_609_axes_0 = const()[name = string("normed_609_axes_0"), val = tensor([-1])]; fp16 var_12324_to_fp16 = const()[name = string("op_12324_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_609_cast_fp16 = layer_norm(axes = normed_609_axes_0, epsilon = var_12324_to_fp16, x = input_559_cast_fp16)[name = string("normed_609_cast_fp16")]; tensor var_12337_split_sizes_0 = const()[name = string("op_12337_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_12337_axis_0 = const()[name = string("op_12337_axis_0"), val = int32(-1)]; tensor var_12337_cast_fp16_0, tensor var_12337_cast_fp16_1 = split(axis = var_12337_axis_0, split_sizes = var_12337_split_sizes_0, x = normed_609_cast_fp16)[name = string("op_12337_cast_fp16")]; tensor const_365_to_fp16 = const()[name = string("const_365_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2282115712)))]; tensor var_12340_cast_fp16 = mul(x = var_12337_cast_fp16_0, y = const_365_to_fp16)[name = string("op_12340_cast_fp16")]; tensor var_12348 = const()[name = string("op_12348"), val = tensor([0, 2, 1])]; tensor var_12351_axes_0 = const()[name = string("op_12351_axes_0"), val = tensor([2])]; tensor var_12349_cast_fp16 = transpose(perm = var_12348, x = var_12340_cast_fp16)[name = string("transpose_127")]; tensor var_12351_cast_fp16 = expand_dims(axes = var_12351_axes_0, x = var_12349_cast_fp16)[name = string("op_12351_cast_fp16")]; string var_12367_pad_type_0 = const()[name = string("op_12367_pad_type_0"), val = string("valid")]; tensor var_12367_strides_0 = const()[name = string("op_12367_strides_0"), val = tensor([1, 1])]; tensor var_12367_pad_0 = const()[name = string("op_12367_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_12367_dilations_0 = const()[name = string("op_12367_dilations_0"), val = tensor([1, 1])]; int32 var_12367_groups_0 = const()[name = string("op_12367_groups_0"), val = int32(1)]; tensor var_12367 = conv(dilations = var_12367_dilations_0, groups = var_12367_groups_0, pad = var_12367_pad_0, pad_type = var_12367_pad_type_0, strides = var_12367_strides_0, weight = layers_17_self_attn_q_proj_weight_palettized, x = var_12351_cast_fp16)[name = string("op_12367")]; tensor var_12372 = const()[name = string("op_12372"), val = tensor([1, 8, 256, 1])]; tensor var_12373 = reshape(shape = var_12372, x = var_12367)[name = string("op_12373")]; tensor var_12378 = const()[name = string("op_12378"), val = tensor([0, 1, 3, 2])]; tensor var_12388 = const()[name = string("op_12388"), val = tensor([1, 8, 256])]; tensor var_12379 = transpose(perm = var_12378, x = var_12373)[name = string("transpose_126")]; tensor x_641 = reshape(shape = var_12388, x = var_12379)[name = string("x_641")]; int32 var_12394 = const()[name = string("op_12394"), val = int32(-1)]; fp16 const_366_promoted_to_fp16 = const()[name = string("const_366_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_12400_cast_fp16 = mul(x = x_641, y = const_366_promoted_to_fp16)[name = string("op_12400_cast_fp16")]; bool input_563_interleave_0 = const()[name = string("input_563_interleave_0"), val = bool(false)]; tensor input_563_cast_fp16 = concat(axis = var_12394, interleave = input_563_interleave_0, values = (x_641, var_12400_cast_fp16))[name = string("input_563_cast_fp16")]; tensor normed_613_axes_0 = const()[name = string("normed_613_axes_0"), val = tensor([-1])]; fp16 var_12392_to_fp16 = const()[name = string("op_12392_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_613_cast_fp16 = layer_norm(axes = normed_613_axes_0, epsilon = var_12392_to_fp16, x = input_563_cast_fp16)[name = string("normed_613_cast_fp16")]; tensor var_12405_split_sizes_0 = const()[name = string("op_12405_split_sizes_0"), val = tensor([256, 256])]; int32 var_12405_axis_0 = const()[name = string("op_12405_axis_0"), val = int32(-1)]; tensor var_12405_cast_fp16_0, tensor var_12405_cast_fp16_1 = split(axis = var_12405_axis_0, split_sizes = var_12405_split_sizes_0, x = normed_613_cast_fp16)[name = string("op_12405_cast_fp16")]; tensor var_12408_cast_fp16 = mul(x = var_12405_cast_fp16_0, y = const_307_to_fp16)[name = string("op_12408_cast_fp16")]; tensor var_12414 = const()[name = string("op_12414"), val = tensor([1, 8, 1, 256])]; tensor q_135 = reshape(shape = var_12414, x = var_12408_cast_fp16)[name = string("q_135")]; tensor var_12416 = mul(x = q_135, y = cos_1)[name = string("op_12416")]; tensor var_12417_split_sizes_0 = const()[name = string("op_12417_split_sizes_0"), val = tensor([128, 128])]; int32 var_12417_axis_0 = const()[name = string("op_12417_axis_0"), val = int32(-1)]; tensor var_12417_0, tensor var_12417_1 = split(axis = var_12417_axis_0, split_sizes = var_12417_split_sizes_0, x = q_135)[name = string("op_12417")]; fp16 const_368_promoted = const()[name = string("const_368_promoted"), val = fp16(-0x1p+0)]; tensor var_12419 = mul(x = var_12417_1, y = const_368_promoted)[name = string("op_12419")]; int32 var_12421 = const()[name = string("op_12421"), val = int32(-1)]; bool var_12422_interleave_0 = const()[name = string("op_12422_interleave_0"), val = bool(false)]; tensor var_12422 = concat(axis = var_12421, interleave = var_12422_interleave_0, values = (var_12419, var_12417_0))[name = string("op_12422")]; tensor var_12423 = mul(x = var_12422, y = sin_1)[name = string("op_12423")]; tensor q_137 = add(x = var_12416, y = var_12423)[name = string("q_137")]; bool var_12447_transpose_x_0 = const()[name = string("op_12447_transpose_x_0"), val = bool(false)]; bool var_12447_transpose_y_0 = const()[name = string("op_12447_transpose_y_0"), val = bool(false)]; tensor var_12447_cast_fp16 = matmul(transpose_x = var_12447_transpose_x_0, transpose_y = var_12447_transpose_y_0, x = q_137, y = transpose_153_cast_fp16)[name = string("op_12447_cast_fp16")]; tensor attn_weights_105_cast_fp16 = add(x = var_12447_cast_fp16, y = causal_mask)[name = string("attn_weights_105_cast_fp16")]; int32 var_12457 = const()[name = string("op_12457"), val = int32(-1)]; tensor var_12459_cast_fp16 = softmax(axis = var_12457, x = attn_weights_105_cast_fp16)[name = string("op_12459_cast_fp16")]; bool var_12475_transpose_x_0 = const()[name = string("op_12475_transpose_x_0"), val = bool(false)]; bool var_12475_transpose_y_0 = const()[name = string("op_12475_transpose_y_0"), val = bool(false)]; tensor var_12475_cast_fp16 = matmul(transpose_x = var_12475_transpose_x_0, transpose_y = var_12475_transpose_y_0, x = var_12459_cast_fp16, y = V_expanded_27_cast_fp16)[name = string("op_12475_cast_fp16")]; tensor var_12485 = const()[name = string("op_12485"), val = tensor([0, 2, 1, 3])]; tensor var_12492 = const()[name = string("op_12492"), val = tensor([1, 1, -1])]; tensor var_12486 = transpose(perm = var_12485, x = var_12475_cast_fp16)[name = string("transpose_125")]; tensor attn_output_105 = reshape(shape = var_12492, x = var_12486)[name = string("attn_output_105")]; tensor var_12497 = const()[name = string("op_12497"), val = tensor([0, 2, 1])]; tensor squeeze_17_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2282118848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2283691776))))[name = string("squeeze_17_palettized")]; string var_12513_pad_type_0 = const()[name = string("op_12513_pad_type_0"), val = string("valid")]; int32 var_12513_groups_0 = const()[name = string("op_12513_groups_0"), val = int32(1)]; tensor var_12513_strides_0 = const()[name = string("op_12513_strides_0"), val = tensor([1])]; tensor var_12513_pad_0 = const()[name = string("op_12513_pad_0"), val = tensor([0, 0])]; tensor var_12513_dilations_0 = const()[name = string("op_12513_dilations_0"), val = tensor([1])]; tensor var_12498 = transpose(perm = var_12497, x = attn_output_105)[name = string("transpose_124")]; tensor var_12513 = conv(dilations = var_12513_dilations_0, groups = var_12513_groups_0, pad = var_12513_pad_0, pad_type = var_12513_pad_type_0, strides = var_12513_strides_0, weight = squeeze_17_palettized, x = var_12498)[name = string("op_12513")]; tensor var_12517 = const()[name = string("op_12517"), val = tensor([0, 2, 1])]; int32 var_12523 = const()[name = string("op_12523"), val = int32(-1)]; fp16 const_369_promoted_to_fp16 = const()[name = string("const_369_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_645 = transpose(perm = var_12517, x = var_12513)[name = string("transpose_123")]; tensor var_12529_cast_fp16 = mul(x = x_645, y = const_369_promoted_to_fp16)[name = string("op_12529_cast_fp16")]; bool input_567_interleave_0 = const()[name = string("input_567_interleave_0"), val = bool(false)]; tensor input_567_cast_fp16 = concat(axis = var_12523, interleave = input_567_interleave_0, values = (x_645, var_12529_cast_fp16))[name = string("input_567_cast_fp16")]; tensor normed_617_axes_0 = const()[name = string("normed_617_axes_0"), val = tensor([-1])]; fp16 var_12521_to_fp16 = const()[name = string("op_12521_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_617_cast_fp16 = layer_norm(axes = normed_617_axes_0, epsilon = var_12521_to_fp16, x = input_567_cast_fp16)[name = string("normed_617_cast_fp16")]; tensor var_12534_split_sizes_0 = const()[name = string("op_12534_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_12534_axis_0 = const()[name = string("op_12534_axis_0"), val = int32(-1)]; tensor var_12534_cast_fp16_0, tensor var_12534_cast_fp16_1 = split(axis = var_12534_axis_0, split_sizes = var_12534_split_sizes_0, x = normed_617_cast_fp16)[name = string("op_12534_cast_fp16")]; tensor const_370_to_fp16 = const()[name = string("const_370_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2283693376)))]; tensor var_12537_cast_fp16 = mul(x = var_12534_cast_fp16_0, y = const_370_to_fp16)[name = string("op_12537_cast_fp16")]; tensor x_649_cast_fp16 = add(x = x_637_cast_fp16, y = var_12537_cast_fp16)[name = string("x_649_cast_fp16")]; int32 var_12544 = const()[name = string("op_12544"), val = int32(-1)]; fp16 const_371_promoted_to_fp16 = const()[name = string("const_371_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_12550_cast_fp16 = mul(x = x_649_cast_fp16, y = const_371_promoted_to_fp16)[name = string("op_12550_cast_fp16")]; bool input_569_interleave_0 = const()[name = string("input_569_interleave_0"), val = bool(false)]; tensor input_569_cast_fp16 = concat(axis = var_12544, interleave = input_569_interleave_0, values = (x_649_cast_fp16, var_12550_cast_fp16))[name = string("input_569_cast_fp16")]; tensor normed_621_axes_0 = const()[name = string("normed_621_axes_0"), val = tensor([-1])]; fp16 var_12542_to_fp16 = const()[name = string("op_12542_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_621_cast_fp16 = layer_norm(axes = normed_621_axes_0, epsilon = var_12542_to_fp16, x = input_569_cast_fp16)[name = string("normed_621_cast_fp16")]; tensor var_12555_split_sizes_0 = const()[name = string("op_12555_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_12555_axis_0 = const()[name = string("op_12555_axis_0"), val = int32(-1)]; tensor var_12555_cast_fp16_0, tensor var_12555_cast_fp16_1 = split(axis = var_12555_axis_0, split_sizes = var_12555_split_sizes_0, x = normed_621_cast_fp16)[name = string("op_12555_cast_fp16")]; tensor const_372_to_fp16 = const()[name = string("const_372_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2283696512)))]; tensor var_12558_cast_fp16 = mul(x = var_12555_cast_fp16_0, y = const_372_to_fp16)[name = string("op_12558_cast_fp16")]; tensor var_12571 = const()[name = string("op_12571"), val = tensor([0, 2, 1])]; tensor input_571_axes_0 = const()[name = string("input_571_axes_0"), val = tensor([2])]; tensor var_12572 = transpose(perm = var_12571, x = var_12558_cast_fp16)[name = string("transpose_122")]; tensor input_571 = expand_dims(axes = input_571_axes_0, x = var_12572)[name = string("input_571")]; string gate_69_pad_type_0 = const()[name = string("gate_69_pad_type_0"), val = string("valid")]; tensor gate_69_strides_0 = const()[name = string("gate_69_strides_0"), val = tensor([1, 1])]; tensor gate_69_pad_0 = const()[name = string("gate_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_69_dilations_0 = const()[name = string("gate_69_dilations_0"), val = tensor([1, 1])]; int32 gate_69_groups_0 = const()[name = string("gate_69_groups_0"), val = int32(1)]; tensor gate_69 = conv(dilations = gate_69_dilations_0, groups = gate_69_groups_0, pad = gate_69_pad_0, pad_type = gate_69_pad_type_0, strides = gate_69_strides_0, weight = layers_17_mlp_gate_proj_weight_palettized, x = input_571)[name = string("gate_69")]; string up_35_pad_type_0 = const()[name = string("up_35_pad_type_0"), val = string("valid")]; tensor up_35_strides_0 = const()[name = string("up_35_strides_0"), val = tensor([1, 1])]; tensor up_35_pad_0 = const()[name = string("up_35_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_35_dilations_0 = const()[name = string("up_35_dilations_0"), val = tensor([1, 1])]; int32 up_35_groups_0 = const()[name = string("up_35_groups_0"), val = int32(1)]; tensor up_35 = conv(dilations = up_35_dilations_0, groups = up_35_groups_0, pad = up_35_pad_0, pad_type = up_35_pad_type_0, strides = up_35_strides_0, weight = layers_17_mlp_up_proj_weight_palettized, x = input_571)[name = string("up_35")]; string gate_71_mode_0 = const()[name = string("gate_71_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_71 = gelu(mode = gate_71_mode_0, x = gate_69)[name = string("gate_71")]; tensor input_573 = mul(x = gate_71, y = up_35)[name = string("input_573")]; string mlp_out_35_pad_type_0 = const()[name = string("mlp_out_35_pad_type_0"), val = string("valid")]; tensor mlp_out_35_strides_0 = const()[name = string("mlp_out_35_strides_0"), val = tensor([1, 1])]; tensor mlp_out_35_pad_0 = const()[name = string("mlp_out_35_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_35_dilations_0 = const()[name = string("mlp_out_35_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_35_groups_0 = const()[name = string("mlp_out_35_groups_0"), val = int32(1)]; tensor mlp_out_35 = conv(dilations = mlp_out_35_dilations_0, groups = mlp_out_35_groups_0, pad = mlp_out_35_pad_0, pad_type = mlp_out_35_pad_type_0, strides = mlp_out_35_strides_0, weight = layers_17_mlp_down_proj_weight_palettized, x = input_573)[name = string("mlp_out_35")]; tensor var_12612_axes_0 = const()[name = string("op_12612_axes_0"), val = tensor([2])]; tensor var_12612 = squeeze(axes = var_12612_axes_0, x = mlp_out_35)[name = string("op_12612")]; tensor var_12616 = const()[name = string("op_12616"), val = tensor([0, 2, 1])]; int32 var_12622 = const()[name = string("op_12622"), val = int32(-1)]; fp16 const_373_promoted_to_fp16 = const()[name = string("const_373_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_653 = transpose(perm = var_12616, x = var_12612)[name = string("transpose_121")]; tensor var_12628_cast_fp16 = mul(x = x_653, y = const_373_promoted_to_fp16)[name = string("op_12628_cast_fp16")]; bool input_575_interleave_0 = const()[name = string("input_575_interleave_0"), val = bool(false)]; tensor input_575_cast_fp16 = concat(axis = var_12622, interleave = input_575_interleave_0, values = (x_653, var_12628_cast_fp16))[name = string("input_575_cast_fp16")]; tensor normed_625_axes_0 = const()[name = string("normed_625_axes_0"), val = tensor([-1])]; fp16 var_12620_to_fp16 = const()[name = string("op_12620_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_625_cast_fp16 = layer_norm(axes = normed_625_axes_0, epsilon = var_12620_to_fp16, x = input_575_cast_fp16)[name = string("normed_625_cast_fp16")]; tensor var_12633_split_sizes_0 = const()[name = string("op_12633_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_12633_axis_0 = const()[name = string("op_12633_axis_0"), val = int32(-1)]; tensor var_12633_cast_fp16_0, tensor var_12633_cast_fp16_1 = split(axis = var_12633_axis_0, split_sizes = var_12633_split_sizes_0, x = normed_625_cast_fp16)[name = string("op_12633_cast_fp16")]; tensor const_374_to_fp16 = const()[name = string("const_374_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2283699648)))]; tensor var_12636_cast_fp16 = mul(x = var_12633_cast_fp16_0, y = const_374_to_fp16)[name = string("op_12636_cast_fp16")]; tensor hidden_states_217_cast_fp16 = add(x = x_649_cast_fp16, y = var_12636_cast_fp16)[name = string("hidden_states_217_cast_fp16")]; tensor per_layer_slice_35_begin_0 = const()[name = string("per_layer_slice_35_begin_0"), val = tensor([0, 0, 4352])]; tensor per_layer_slice_35_end_0 = const()[name = string("per_layer_slice_35_end_0"), val = tensor([1, 1, 4608])]; tensor per_layer_slice_35_end_mask_0 = const()[name = string("per_layer_slice_35_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_35 = slice_by_index(begin = per_layer_slice_35_begin_0, end = per_layer_slice_35_end_0, end_mask = per_layer_slice_35_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_35")]; tensor gated_69 = linear(bias = linear_1_bias_0, weight = layers_17_per_layer_input_gate_weight_palettized, x = hidden_states_217_cast_fp16)[name = string("linear_35")]; string gated_71_mode_0 = const()[name = string("gated_71_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_71 = gelu(mode = gated_71_mode_0, x = gated_69)[name = string("gated_71")]; tensor input_579 = mul(x = gated_71, y = per_layer_slice_35)[name = string("input_579")]; tensor x_657 = linear(bias = linear_2_bias_0, weight = layers_17_per_layer_projection_weight_palettized, x = input_579)[name = string("linear_36")]; int32 var_12673 = const()[name = string("op_12673"), val = int32(-1)]; fp16 const_375_promoted_to_fp16 = const()[name = string("const_375_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_12679_cast_fp16 = mul(x = x_657, y = const_375_promoted_to_fp16)[name = string("op_12679_cast_fp16")]; bool input_581_interleave_0 = const()[name = string("input_581_interleave_0"), val = bool(false)]; tensor input_581_cast_fp16 = concat(axis = var_12673, interleave = input_581_interleave_0, values = (x_657, var_12679_cast_fp16))[name = string("input_581_cast_fp16")]; tensor normed_629_axes_0 = const()[name = string("normed_629_axes_0"), val = tensor([-1])]; fp16 var_12671_to_fp16 = const()[name = string("op_12671_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_629_cast_fp16 = layer_norm(axes = normed_629_axes_0, epsilon = var_12671_to_fp16, x = input_581_cast_fp16)[name = string("normed_629_cast_fp16")]; tensor var_12684_split_sizes_0 = const()[name = string("op_12684_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_12684_axis_0 = const()[name = string("op_12684_axis_0"), val = int32(-1)]; tensor var_12684_cast_fp16_0, tensor var_12684_cast_fp16_1 = split(axis = var_12684_axis_0, split_sizes = var_12684_split_sizes_0, x = normed_629_cast_fp16)[name = string("op_12684_cast_fp16")]; tensor const_376_to_fp16 = const()[name = string("const_376_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2283702784)))]; tensor var_12687_cast_fp16 = mul(x = var_12684_cast_fp16_0, y = const_376_to_fp16)[name = string("op_12687_cast_fp16")]; tensor hidden_states_221_cast_fp16 = add(x = hidden_states_217_cast_fp16, y = var_12687_cast_fp16)[name = string("hidden_states_221_cast_fp16")]; tensor layers_17_layer_scalar_to_fp16 = const()[name = string("layers_17_layer_scalar_to_fp16"), val = tensor([0x1.5p-1])]; tensor x_661_cast_fp16 = mul(x = hidden_states_221_cast_fp16, y = layers_17_layer_scalar_to_fp16)[name = string("x_661_cast_fp16")]; int32 var_12695 = const()[name = string("op_12695"), val = int32(-1)]; fp16 const_377_promoted_to_fp16 = const()[name = string("const_377_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_12701_cast_fp16 = mul(x = x_661_cast_fp16, y = const_377_promoted_to_fp16)[name = string("op_12701_cast_fp16")]; bool input_583_interleave_0 = const()[name = string("input_583_interleave_0"), val = bool(false)]; tensor input_583_cast_fp16 = concat(axis = var_12695, interleave = input_583_interleave_0, values = (x_661_cast_fp16, var_12701_cast_fp16))[name = string("input_583_cast_fp16")]; tensor normed_633_axes_0 = const()[name = string("normed_633_axes_0"), val = tensor([-1])]; fp16 var_12693_to_fp16 = const()[name = string("op_12693_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_633_cast_fp16 = layer_norm(axes = normed_633_axes_0, epsilon = var_12693_to_fp16, x = input_583_cast_fp16)[name = string("normed_633_cast_fp16")]; tensor var_12706_split_sizes_0 = const()[name = string("op_12706_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_12706_axis_0 = const()[name = string("op_12706_axis_0"), val = int32(-1)]; tensor var_12706_cast_fp16_0, tensor var_12706_cast_fp16_1 = split(axis = var_12706_axis_0, split_sizes = var_12706_split_sizes_0, x = normed_633_cast_fp16)[name = string("op_12706_cast_fp16")]; tensor const_378_to_fp16 = const()[name = string("const_378_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2283705920)))]; tensor var_12709_cast_fp16 = mul(x = var_12706_cast_fp16_0, y = const_378_to_fp16)[name = string("op_12709_cast_fp16")]; tensor var_12717 = const()[name = string("op_12717"), val = tensor([0, 2, 1])]; tensor var_12720_axes_0 = const()[name = string("op_12720_axes_0"), val = tensor([2])]; tensor var_12718_cast_fp16 = transpose(perm = var_12717, x = var_12709_cast_fp16)[name = string("transpose_120")]; tensor var_12720_cast_fp16 = expand_dims(axes = var_12720_axes_0, x = var_12718_cast_fp16)[name = string("op_12720_cast_fp16")]; string var_12736_pad_type_0 = const()[name = string("op_12736_pad_type_0"), val = string("valid")]; tensor var_12736_strides_0 = const()[name = string("op_12736_strides_0"), val = tensor([1, 1])]; tensor var_12736_pad_0 = const()[name = string("op_12736_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_12736_dilations_0 = const()[name = string("op_12736_dilations_0"), val = tensor([1, 1])]; int32 var_12736_groups_0 = const()[name = string("op_12736_groups_0"), val = int32(1)]; tensor var_12736 = conv(dilations = var_12736_dilations_0, groups = var_12736_groups_0, pad = var_12736_pad_0, pad_type = var_12736_pad_type_0, strides = var_12736_strides_0, weight = layers_18_self_attn_q_proj_weight_palettized, x = var_12720_cast_fp16)[name = string("op_12736")]; tensor var_12741 = const()[name = string("op_12741"), val = tensor([1, 8, 256, 1])]; tensor var_12742 = reshape(shape = var_12741, x = var_12736)[name = string("op_12742")]; tensor var_12747 = const()[name = string("op_12747"), val = tensor([0, 1, 3, 2])]; tensor var_12757 = const()[name = string("op_12757"), val = tensor([1, 8, 256])]; tensor var_12748 = transpose(perm = var_12747, x = var_12742)[name = string("transpose_119")]; tensor x_665 = reshape(shape = var_12757, x = var_12748)[name = string("x_665")]; int32 var_12763 = const()[name = string("op_12763"), val = int32(-1)]; fp16 const_379_promoted_to_fp16 = const()[name = string("const_379_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_12769_cast_fp16 = mul(x = x_665, y = const_379_promoted_to_fp16)[name = string("op_12769_cast_fp16")]; bool input_587_interleave_0 = const()[name = string("input_587_interleave_0"), val = bool(false)]; tensor input_587_cast_fp16 = concat(axis = var_12763, interleave = input_587_interleave_0, values = (x_665, var_12769_cast_fp16))[name = string("input_587_cast_fp16")]; tensor normed_637_axes_0 = const()[name = string("normed_637_axes_0"), val = tensor([-1])]; fp16 var_12761_to_fp16 = const()[name = string("op_12761_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_637_cast_fp16 = layer_norm(axes = normed_637_axes_0, epsilon = var_12761_to_fp16, x = input_587_cast_fp16)[name = string("normed_637_cast_fp16")]; tensor var_12774_split_sizes_0 = const()[name = string("op_12774_split_sizes_0"), val = tensor([256, 256])]; int32 var_12774_axis_0 = const()[name = string("op_12774_axis_0"), val = int32(-1)]; tensor var_12774_cast_fp16_0, tensor var_12774_cast_fp16_1 = split(axis = var_12774_axis_0, split_sizes = var_12774_split_sizes_0, x = normed_637_cast_fp16)[name = string("op_12774_cast_fp16")]; tensor var_12777_cast_fp16 = mul(x = var_12774_cast_fp16_0, y = const_307_to_fp16)[name = string("op_12777_cast_fp16")]; tensor var_12783 = const()[name = string("op_12783"), val = tensor([1, 8, 1, 256])]; tensor q_141 = reshape(shape = var_12783, x = var_12777_cast_fp16)[name = string("q_141")]; tensor var_12785 = mul(x = q_141, y = cos_1)[name = string("op_12785")]; tensor var_12786_split_sizes_0 = const()[name = string("op_12786_split_sizes_0"), val = tensor([128, 128])]; int32 var_12786_axis_0 = const()[name = string("op_12786_axis_0"), val = int32(-1)]; tensor var_12786_0, tensor var_12786_1 = split(axis = var_12786_axis_0, split_sizes = var_12786_split_sizes_0, x = q_141)[name = string("op_12786")]; fp16 const_381_promoted = const()[name = string("const_381_promoted"), val = fp16(-0x1p+0)]; tensor var_12788 = mul(x = var_12786_1, y = const_381_promoted)[name = string("op_12788")]; int32 var_12790 = const()[name = string("op_12790"), val = int32(-1)]; bool var_12791_interleave_0 = const()[name = string("op_12791_interleave_0"), val = bool(false)]; tensor var_12791 = concat(axis = var_12790, interleave = var_12791_interleave_0, values = (var_12788, var_12786_0))[name = string("op_12791")]; tensor var_12792 = mul(x = var_12791, y = sin_1)[name = string("op_12792")]; tensor q_143 = add(x = var_12785, y = var_12792)[name = string("q_143")]; bool var_12816_transpose_x_0 = const()[name = string("op_12816_transpose_x_0"), val = bool(false)]; bool var_12816_transpose_y_0 = const()[name = string("op_12816_transpose_y_0"), val = bool(false)]; tensor var_12816_cast_fp16 = matmul(transpose_x = var_12816_transpose_x_0, transpose_y = var_12816_transpose_y_0, x = q_143, y = transpose_153_cast_fp16)[name = string("op_12816_cast_fp16")]; tensor attn_weights_111_cast_fp16 = add(x = var_12816_cast_fp16, y = causal_mask)[name = string("attn_weights_111_cast_fp16")]; int32 var_12826 = const()[name = string("op_12826"), val = int32(-1)]; tensor var_12828_cast_fp16 = softmax(axis = var_12826, x = attn_weights_111_cast_fp16)[name = string("op_12828_cast_fp16")]; bool var_12844_transpose_x_0 = const()[name = string("op_12844_transpose_x_0"), val = bool(false)]; bool var_12844_transpose_y_0 = const()[name = string("op_12844_transpose_y_0"), val = bool(false)]; tensor var_12844_cast_fp16 = matmul(transpose_x = var_12844_transpose_x_0, transpose_y = var_12844_transpose_y_0, x = var_12828_cast_fp16, y = V_expanded_27_cast_fp16)[name = string("op_12844_cast_fp16")]; tensor var_12854 = const()[name = string("op_12854"), val = tensor([0, 2, 1, 3])]; tensor var_12861 = const()[name = string("op_12861"), val = tensor([1, 1, -1])]; tensor var_12855 = transpose(perm = var_12854, x = var_12844_cast_fp16)[name = string("transpose_118")]; tensor attn_output_111 = reshape(shape = var_12861, x = var_12855)[name = string("attn_output_111")]; tensor var_12866 = const()[name = string("op_12866"), val = tensor([0, 2, 1])]; tensor squeeze_18_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2283709056))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2285281984))))[name = string("squeeze_18_palettized")]; string var_12882_pad_type_0 = const()[name = string("op_12882_pad_type_0"), val = string("valid")]; int32 var_12882_groups_0 = const()[name = string("op_12882_groups_0"), val = int32(1)]; tensor var_12882_strides_0 = const()[name = string("op_12882_strides_0"), val = tensor([1])]; tensor var_12882_pad_0 = const()[name = string("op_12882_pad_0"), val = tensor([0, 0])]; tensor var_12882_dilations_0 = const()[name = string("op_12882_dilations_0"), val = tensor([1])]; tensor var_12867 = transpose(perm = var_12866, x = attn_output_111)[name = string("transpose_117")]; tensor var_12882 = conv(dilations = var_12882_dilations_0, groups = var_12882_groups_0, pad = var_12882_pad_0, pad_type = var_12882_pad_type_0, strides = var_12882_strides_0, weight = squeeze_18_palettized, x = var_12867)[name = string("op_12882")]; tensor var_12886 = const()[name = string("op_12886"), val = tensor([0, 2, 1])]; int32 var_12892 = const()[name = string("op_12892"), val = int32(-1)]; fp16 const_382_promoted_to_fp16 = const()[name = string("const_382_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_669 = transpose(perm = var_12886, x = var_12882)[name = string("transpose_116")]; tensor var_12898_cast_fp16 = mul(x = x_669, y = const_382_promoted_to_fp16)[name = string("op_12898_cast_fp16")]; bool input_591_interleave_0 = const()[name = string("input_591_interleave_0"), val = bool(false)]; tensor input_591_cast_fp16 = concat(axis = var_12892, interleave = input_591_interleave_0, values = (x_669, var_12898_cast_fp16))[name = string("input_591_cast_fp16")]; tensor normed_641_axes_0 = const()[name = string("normed_641_axes_0"), val = tensor([-1])]; fp16 var_12890_to_fp16 = const()[name = string("op_12890_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_641_cast_fp16 = layer_norm(axes = normed_641_axes_0, epsilon = var_12890_to_fp16, x = input_591_cast_fp16)[name = string("normed_641_cast_fp16")]; tensor var_12903_split_sizes_0 = const()[name = string("op_12903_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_12903_axis_0 = const()[name = string("op_12903_axis_0"), val = int32(-1)]; tensor var_12903_cast_fp16_0, tensor var_12903_cast_fp16_1 = split(axis = var_12903_axis_0, split_sizes = var_12903_split_sizes_0, x = normed_641_cast_fp16)[name = string("op_12903_cast_fp16")]; tensor const_383_to_fp16 = const()[name = string("const_383_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2285283584)))]; tensor var_12906_cast_fp16 = mul(x = var_12903_cast_fp16_0, y = const_383_to_fp16)[name = string("op_12906_cast_fp16")]; tensor x_673_cast_fp16 = add(x = x_661_cast_fp16, y = var_12906_cast_fp16)[name = string("x_673_cast_fp16")]; int32 var_12913 = const()[name = string("op_12913"), val = int32(-1)]; fp16 const_384_promoted_to_fp16 = const()[name = string("const_384_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_12919_cast_fp16 = mul(x = x_673_cast_fp16, y = const_384_promoted_to_fp16)[name = string("op_12919_cast_fp16")]; bool input_593_interleave_0 = const()[name = string("input_593_interleave_0"), val = bool(false)]; tensor input_593_cast_fp16 = concat(axis = var_12913, interleave = input_593_interleave_0, values = (x_673_cast_fp16, var_12919_cast_fp16))[name = string("input_593_cast_fp16")]; tensor normed_645_axes_0 = const()[name = string("normed_645_axes_0"), val = tensor([-1])]; fp16 var_12911_to_fp16 = const()[name = string("op_12911_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_645_cast_fp16 = layer_norm(axes = normed_645_axes_0, epsilon = var_12911_to_fp16, x = input_593_cast_fp16)[name = string("normed_645_cast_fp16")]; tensor var_12924_split_sizes_0 = const()[name = string("op_12924_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_12924_axis_0 = const()[name = string("op_12924_axis_0"), val = int32(-1)]; tensor var_12924_cast_fp16_0, tensor var_12924_cast_fp16_1 = split(axis = var_12924_axis_0, split_sizes = var_12924_split_sizes_0, x = normed_645_cast_fp16)[name = string("op_12924_cast_fp16")]; tensor const_385_to_fp16 = const()[name = string("const_385_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2285286720)))]; tensor var_12927_cast_fp16 = mul(x = var_12924_cast_fp16_0, y = const_385_to_fp16)[name = string("op_12927_cast_fp16")]; tensor var_12940 = const()[name = string("op_12940"), val = tensor([0, 2, 1])]; tensor input_595_axes_0 = const()[name = string("input_595_axes_0"), val = tensor([2])]; tensor var_12941 = transpose(perm = var_12940, x = var_12927_cast_fp16)[name = string("transpose_115")]; tensor input_595 = expand_dims(axes = input_595_axes_0, x = var_12941)[name = string("input_595")]; string gate_73_pad_type_0 = const()[name = string("gate_73_pad_type_0"), val = string("valid")]; tensor gate_73_strides_0 = const()[name = string("gate_73_strides_0"), val = tensor([1, 1])]; tensor gate_73_pad_0 = const()[name = string("gate_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_73_dilations_0 = const()[name = string("gate_73_dilations_0"), val = tensor([1, 1])]; int32 gate_73_groups_0 = const()[name = string("gate_73_groups_0"), val = int32(1)]; tensor gate_73 = conv(dilations = gate_73_dilations_0, groups = gate_73_groups_0, pad = gate_73_pad_0, pad_type = gate_73_pad_type_0, strides = gate_73_strides_0, weight = layers_18_mlp_gate_proj_weight_palettized, x = input_595)[name = string("gate_73")]; string up_37_pad_type_0 = const()[name = string("up_37_pad_type_0"), val = string("valid")]; tensor up_37_strides_0 = const()[name = string("up_37_strides_0"), val = tensor([1, 1])]; tensor up_37_pad_0 = const()[name = string("up_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_37_dilations_0 = const()[name = string("up_37_dilations_0"), val = tensor([1, 1])]; int32 up_37_groups_0 = const()[name = string("up_37_groups_0"), val = int32(1)]; tensor up_37 = conv(dilations = up_37_dilations_0, groups = up_37_groups_0, pad = up_37_pad_0, pad_type = up_37_pad_type_0, strides = up_37_strides_0, weight = layers_18_mlp_up_proj_weight_palettized, x = input_595)[name = string("up_37")]; string gate_75_mode_0 = const()[name = string("gate_75_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_75 = gelu(mode = gate_75_mode_0, x = gate_73)[name = string("gate_75")]; tensor input_597 = mul(x = gate_75, y = up_37)[name = string("input_597")]; string mlp_out_37_pad_type_0 = const()[name = string("mlp_out_37_pad_type_0"), val = string("valid")]; tensor mlp_out_37_strides_0 = const()[name = string("mlp_out_37_strides_0"), val = tensor([1, 1])]; tensor mlp_out_37_pad_0 = const()[name = string("mlp_out_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_37_dilations_0 = const()[name = string("mlp_out_37_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_37_groups_0 = const()[name = string("mlp_out_37_groups_0"), val = int32(1)]; tensor mlp_out_37 = conv(dilations = mlp_out_37_dilations_0, groups = mlp_out_37_groups_0, pad = mlp_out_37_pad_0, pad_type = mlp_out_37_pad_type_0, strides = mlp_out_37_strides_0, weight = layers_18_mlp_down_proj_weight_palettized, x = input_597)[name = string("mlp_out_37")]; tensor var_12981_axes_0 = const()[name = string("op_12981_axes_0"), val = tensor([2])]; tensor var_12981 = squeeze(axes = var_12981_axes_0, x = mlp_out_37)[name = string("op_12981")]; tensor var_12985 = const()[name = string("op_12985"), val = tensor([0, 2, 1])]; int32 var_12991 = const()[name = string("op_12991"), val = int32(-1)]; fp16 const_386_promoted_to_fp16 = const()[name = string("const_386_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_677 = transpose(perm = var_12985, x = var_12981)[name = string("transpose_114")]; tensor var_12997_cast_fp16 = mul(x = x_677, y = const_386_promoted_to_fp16)[name = string("op_12997_cast_fp16")]; bool input_599_interleave_0 = const()[name = string("input_599_interleave_0"), val = bool(false)]; tensor input_599_cast_fp16 = concat(axis = var_12991, interleave = input_599_interleave_0, values = (x_677, var_12997_cast_fp16))[name = string("input_599_cast_fp16")]; tensor normed_649_axes_0 = const()[name = string("normed_649_axes_0"), val = tensor([-1])]; fp16 var_12989_to_fp16 = const()[name = string("op_12989_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_649_cast_fp16 = layer_norm(axes = normed_649_axes_0, epsilon = var_12989_to_fp16, x = input_599_cast_fp16)[name = string("normed_649_cast_fp16")]; tensor var_13002_split_sizes_0 = const()[name = string("op_13002_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_13002_axis_0 = const()[name = string("op_13002_axis_0"), val = int32(-1)]; tensor var_13002_cast_fp16_0, tensor var_13002_cast_fp16_1 = split(axis = var_13002_axis_0, split_sizes = var_13002_split_sizes_0, x = normed_649_cast_fp16)[name = string("op_13002_cast_fp16")]; tensor const_387_to_fp16 = const()[name = string("const_387_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2285289856)))]; tensor var_13005_cast_fp16 = mul(x = var_13002_cast_fp16_0, y = const_387_to_fp16)[name = string("op_13005_cast_fp16")]; tensor hidden_states_229_cast_fp16 = add(x = x_673_cast_fp16, y = var_13005_cast_fp16)[name = string("hidden_states_229_cast_fp16")]; tensor per_layer_slice_37_begin_0 = const()[name = string("per_layer_slice_37_begin_0"), val = tensor([0, 0, 4608])]; tensor per_layer_slice_37_end_0 = const()[name = string("per_layer_slice_37_end_0"), val = tensor([1, 1, 4864])]; tensor per_layer_slice_37_end_mask_0 = const()[name = string("per_layer_slice_37_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_37 = slice_by_index(begin = per_layer_slice_37_begin_0, end = per_layer_slice_37_end_0, end_mask = per_layer_slice_37_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_37")]; tensor gated_73 = linear(bias = linear_1_bias_0, weight = layers_18_per_layer_input_gate_weight_palettized, x = hidden_states_229_cast_fp16)[name = string("linear_37")]; string gated_75_mode_0 = const()[name = string("gated_75_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_75 = gelu(mode = gated_75_mode_0, x = gated_73)[name = string("gated_75")]; tensor input_603 = mul(x = gated_75, y = per_layer_slice_37)[name = string("input_603")]; tensor x_681 = linear(bias = linear_2_bias_0, weight = layers_18_per_layer_projection_weight_palettized, x = input_603)[name = string("linear_38")]; int32 var_13042 = const()[name = string("op_13042"), val = int32(-1)]; fp16 const_388_promoted_to_fp16 = const()[name = string("const_388_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_13048_cast_fp16 = mul(x = x_681, y = const_388_promoted_to_fp16)[name = string("op_13048_cast_fp16")]; bool input_605_interleave_0 = const()[name = string("input_605_interleave_0"), val = bool(false)]; tensor input_605_cast_fp16 = concat(axis = var_13042, interleave = input_605_interleave_0, values = (x_681, var_13048_cast_fp16))[name = string("input_605_cast_fp16")]; tensor normed_653_axes_0 = const()[name = string("normed_653_axes_0"), val = tensor([-1])]; fp16 var_13040_to_fp16 = const()[name = string("op_13040_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_653_cast_fp16 = layer_norm(axes = normed_653_axes_0, epsilon = var_13040_to_fp16, x = input_605_cast_fp16)[name = string("normed_653_cast_fp16")]; tensor var_13053_split_sizes_0 = const()[name = string("op_13053_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_13053_axis_0 = const()[name = string("op_13053_axis_0"), val = int32(-1)]; tensor var_13053_cast_fp16_0, tensor var_13053_cast_fp16_1 = split(axis = var_13053_axis_0, split_sizes = var_13053_split_sizes_0, x = normed_653_cast_fp16)[name = string("op_13053_cast_fp16")]; tensor const_389_to_fp16 = const()[name = string("const_389_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2285292992)))]; tensor var_13056_cast_fp16 = mul(x = var_13053_cast_fp16_0, y = const_389_to_fp16)[name = string("op_13056_cast_fp16")]; tensor hidden_states_233_cast_fp16 = add(x = hidden_states_229_cast_fp16, y = var_13056_cast_fp16)[name = string("hidden_states_233_cast_fp16")]; tensor layers_18_layer_scalar_to_fp16 = const()[name = string("layers_18_layer_scalar_to_fp16"), val = tensor([0x1.34p-1])]; tensor x_685_cast_fp16 = mul(x = hidden_states_233_cast_fp16, y = layers_18_layer_scalar_to_fp16)[name = string("x_685_cast_fp16")]; int32 var_13064 = const()[name = string("op_13064"), val = int32(-1)]; fp16 const_390_promoted_to_fp16 = const()[name = string("const_390_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_13070_cast_fp16 = mul(x = x_685_cast_fp16, y = const_390_promoted_to_fp16)[name = string("op_13070_cast_fp16")]; bool input_607_interleave_0 = const()[name = string("input_607_interleave_0"), val = bool(false)]; tensor input_607_cast_fp16 = concat(axis = var_13064, interleave = input_607_interleave_0, values = (x_685_cast_fp16, var_13070_cast_fp16))[name = string("input_607_cast_fp16")]; tensor normed_657_axes_0 = const()[name = string("normed_657_axes_0"), val = tensor([-1])]; fp16 var_13062_to_fp16 = const()[name = string("op_13062_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_657_cast_fp16 = layer_norm(axes = normed_657_axes_0, epsilon = var_13062_to_fp16, x = input_607_cast_fp16)[name = string("normed_657_cast_fp16")]; tensor var_13075_split_sizes_0 = const()[name = string("op_13075_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_13075_axis_0 = const()[name = string("op_13075_axis_0"), val = int32(-1)]; tensor var_13075_cast_fp16_0, tensor var_13075_cast_fp16_1 = split(axis = var_13075_axis_0, split_sizes = var_13075_split_sizes_0, x = normed_657_cast_fp16)[name = string("op_13075_cast_fp16")]; tensor const_391_to_fp16 = const()[name = string("const_391_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2285296128)))]; tensor var_13078_cast_fp16 = mul(x = var_13075_cast_fp16_0, y = const_391_to_fp16)[name = string("op_13078_cast_fp16")]; tensor var_13086 = const()[name = string("op_13086"), val = tensor([0, 2, 1])]; tensor var_13089_axes_0 = const()[name = string("op_13089_axes_0"), val = tensor([2])]; tensor var_13087_cast_fp16 = transpose(perm = var_13086, x = var_13078_cast_fp16)[name = string("transpose_113")]; tensor var_13089_cast_fp16 = expand_dims(axes = var_13089_axes_0, x = var_13087_cast_fp16)[name = string("op_13089_cast_fp16")]; string var_13105_pad_type_0 = const()[name = string("op_13105_pad_type_0"), val = string("valid")]; tensor var_13105_strides_0 = const()[name = string("op_13105_strides_0"), val = tensor([1, 1])]; tensor var_13105_pad_0 = const()[name = string("op_13105_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_13105_dilations_0 = const()[name = string("op_13105_dilations_0"), val = tensor([1, 1])]; int32 var_13105_groups_0 = const()[name = string("op_13105_groups_0"), val = int32(1)]; tensor var_13105 = conv(dilations = var_13105_dilations_0, groups = var_13105_groups_0, pad = var_13105_pad_0, pad_type = var_13105_pad_type_0, strides = var_13105_strides_0, weight = layers_19_self_attn_q_proj_weight_palettized, x = var_13089_cast_fp16)[name = string("op_13105")]; tensor var_13110 = const()[name = string("op_13110"), val = tensor([1, 8, 512, 1])]; tensor var_13111 = reshape(shape = var_13110, x = var_13105)[name = string("op_13111")]; tensor var_13116 = const()[name = string("op_13116"), val = tensor([0, 1, 3, 2])]; tensor var_13126 = const()[name = string("op_13126"), val = tensor([1, 8, 512])]; tensor var_13117 = transpose(perm = var_13116, x = var_13111)[name = string("transpose_112")]; tensor x_689 = reshape(shape = var_13126, x = var_13117)[name = string("x_689")]; int32 var_13132 = const()[name = string("op_13132"), val = int32(-1)]; fp16 const_392_promoted_to_fp16 = const()[name = string("const_392_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_13138_cast_fp16 = mul(x = x_689, y = const_392_promoted_to_fp16)[name = string("op_13138_cast_fp16")]; bool input_611_interleave_0 = const()[name = string("input_611_interleave_0"), val = bool(false)]; tensor input_611_cast_fp16 = concat(axis = var_13132, interleave = input_611_interleave_0, values = (x_689, var_13138_cast_fp16))[name = string("input_611_cast_fp16")]; tensor normed_661_axes_0 = const()[name = string("normed_661_axes_0"), val = tensor([-1])]; fp16 var_13130_to_fp16 = const()[name = string("op_13130_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_661_cast_fp16 = layer_norm(axes = normed_661_axes_0, epsilon = var_13130_to_fp16, x = input_611_cast_fp16)[name = string("normed_661_cast_fp16")]; tensor var_13143_split_sizes_0 = const()[name = string("op_13143_split_sizes_0"), val = tensor([512, 512])]; int32 var_13143_axis_0 = const()[name = string("op_13143_axis_0"), val = int32(-1)]; tensor var_13143_cast_fp16_0, tensor var_13143_cast_fp16_1 = split(axis = var_13143_axis_0, split_sizes = var_13143_split_sizes_0, x = normed_661_cast_fp16)[name = string("op_13143_cast_fp16")]; tensor var_13146_cast_fp16 = mul(x = var_13143_cast_fp16_0, y = const_325_to_fp16)[name = string("op_13146_cast_fp16")]; tensor var_13152 = const()[name = string("op_13152"), val = tensor([1, 8, 1, 512])]; tensor q_147 = reshape(shape = var_13152, x = var_13146_cast_fp16)[name = string("q_147")]; tensor var_13154 = mul(x = q_147, y = cos)[name = string("op_13154")]; tensor var_13155_split_sizes_0 = const()[name = string("op_13155_split_sizes_0"), val = tensor([256, 256])]; int32 var_13155_axis_0 = const()[name = string("op_13155_axis_0"), val = int32(-1)]; tensor var_13155_0, tensor var_13155_1 = split(axis = var_13155_axis_0, split_sizes = var_13155_split_sizes_0, x = q_147)[name = string("op_13155")]; fp16 const_394_promoted = const()[name = string("const_394_promoted"), val = fp16(-0x1p+0)]; tensor var_13157 = mul(x = var_13155_1, y = const_394_promoted)[name = string("op_13157")]; int32 var_13159 = const()[name = string("op_13159"), val = int32(-1)]; bool var_13160_interleave_0 = const()[name = string("op_13160_interleave_0"), val = bool(false)]; tensor var_13160 = concat(axis = var_13159, interleave = var_13160_interleave_0, values = (var_13157, var_13155_0))[name = string("op_13160")]; tensor var_13161 = mul(x = var_13160, y = sin)[name = string("op_13161")]; tensor q_149 = add(x = var_13154, y = var_13161)[name = string("q_149")]; bool var_13185_transpose_x_0 = const()[name = string("op_13185_transpose_x_0"), val = bool(false)]; bool var_13185_transpose_y_0 = const()[name = string("op_13185_transpose_y_0"), val = bool(false)]; tensor var_13185_cast_fp16 = matmul(transpose_x = var_13185_transpose_x_0, transpose_y = var_13185_transpose_y_0, x = q_149, y = transpose_154_cast_fp16)[name = string("op_13185_cast_fp16")]; tensor attn_weights_117_cast_fp16 = add(x = var_13185_cast_fp16, y = causal_mask)[name = string("attn_weights_117_cast_fp16")]; int32 var_13195 = const()[name = string("op_13195"), val = int32(-1)]; tensor var_13197_cast_fp16 = softmax(axis = var_13195, x = attn_weights_117_cast_fp16)[name = string("op_13197_cast_fp16")]; bool var_13213_transpose_x_0 = const()[name = string("op_13213_transpose_x_0"), val = bool(false)]; bool var_13213_transpose_y_0 = const()[name = string("op_13213_transpose_y_0"), val = bool(false)]; tensor var_13213_cast_fp16 = matmul(transpose_x = var_13213_transpose_x_0, transpose_y = var_13213_transpose_y_0, x = var_13197_cast_fp16, y = V_expanded_29_cast_fp16)[name = string("op_13213_cast_fp16")]; tensor var_13223 = const()[name = string("op_13223"), val = tensor([0, 2, 1, 3])]; tensor var_13230 = const()[name = string("op_13230"), val = tensor([1, 1, -1])]; tensor var_13224 = transpose(perm = var_13223, x = var_13213_cast_fp16)[name = string("transpose_111")]; tensor attn_output_117 = reshape(shape = var_13230, x = var_13224)[name = string("attn_output_117")]; tensor var_13235 = const()[name = string("op_13235"), val = tensor([0, 2, 1])]; tensor squeeze_19_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2285299264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2288445056))))[name = string("squeeze_19_palettized")]; string var_13251_pad_type_0 = const()[name = string("op_13251_pad_type_0"), val = string("valid")]; int32 var_13251_groups_0 = const()[name = string("op_13251_groups_0"), val = int32(1)]; tensor var_13251_strides_0 = const()[name = string("op_13251_strides_0"), val = tensor([1])]; tensor var_13251_pad_0 = const()[name = string("op_13251_pad_0"), val = tensor([0, 0])]; tensor var_13251_dilations_0 = const()[name = string("op_13251_dilations_0"), val = tensor([1])]; tensor var_13236 = transpose(perm = var_13235, x = attn_output_117)[name = string("transpose_110")]; tensor var_13251 = conv(dilations = var_13251_dilations_0, groups = var_13251_groups_0, pad = var_13251_pad_0, pad_type = var_13251_pad_type_0, strides = var_13251_strides_0, weight = squeeze_19_palettized, x = var_13236)[name = string("op_13251")]; tensor var_13255 = const()[name = string("op_13255"), val = tensor([0, 2, 1])]; int32 var_13261 = const()[name = string("op_13261"), val = int32(-1)]; fp16 const_395_promoted_to_fp16 = const()[name = string("const_395_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_693 = transpose(perm = var_13255, x = var_13251)[name = string("transpose_109")]; tensor var_13267_cast_fp16 = mul(x = x_693, y = const_395_promoted_to_fp16)[name = string("op_13267_cast_fp16")]; bool input_615_interleave_0 = const()[name = string("input_615_interleave_0"), val = bool(false)]; tensor input_615_cast_fp16 = concat(axis = var_13261, interleave = input_615_interleave_0, values = (x_693, var_13267_cast_fp16))[name = string("input_615_cast_fp16")]; tensor normed_665_axes_0 = const()[name = string("normed_665_axes_0"), val = tensor([-1])]; fp16 var_13259_to_fp16 = const()[name = string("op_13259_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_665_cast_fp16 = layer_norm(axes = normed_665_axes_0, epsilon = var_13259_to_fp16, x = input_615_cast_fp16)[name = string("normed_665_cast_fp16")]; tensor var_13272_split_sizes_0 = const()[name = string("op_13272_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_13272_axis_0 = const()[name = string("op_13272_axis_0"), val = int32(-1)]; tensor var_13272_cast_fp16_0, tensor var_13272_cast_fp16_1 = split(axis = var_13272_axis_0, split_sizes = var_13272_split_sizes_0, x = normed_665_cast_fp16)[name = string("op_13272_cast_fp16")]; tensor const_396_to_fp16 = const()[name = string("const_396_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2288446656)))]; tensor var_13275_cast_fp16 = mul(x = var_13272_cast_fp16_0, y = const_396_to_fp16)[name = string("op_13275_cast_fp16")]; tensor x_697_cast_fp16 = add(x = x_685_cast_fp16, y = var_13275_cast_fp16)[name = string("x_697_cast_fp16")]; int32 var_13282 = const()[name = string("op_13282"), val = int32(-1)]; fp16 const_397_promoted_to_fp16 = const()[name = string("const_397_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_13288_cast_fp16 = mul(x = x_697_cast_fp16, y = const_397_promoted_to_fp16)[name = string("op_13288_cast_fp16")]; bool input_617_interleave_0 = const()[name = string("input_617_interleave_0"), val = bool(false)]; tensor input_617_cast_fp16 = concat(axis = var_13282, interleave = input_617_interleave_0, values = (x_697_cast_fp16, var_13288_cast_fp16))[name = string("input_617_cast_fp16")]; tensor normed_669_axes_0 = const()[name = string("normed_669_axes_0"), val = tensor([-1])]; fp16 var_13280_to_fp16 = const()[name = string("op_13280_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_669_cast_fp16 = layer_norm(axes = normed_669_axes_0, epsilon = var_13280_to_fp16, x = input_617_cast_fp16)[name = string("normed_669_cast_fp16")]; tensor var_13293_split_sizes_0 = const()[name = string("op_13293_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_13293_axis_0 = const()[name = string("op_13293_axis_0"), val = int32(-1)]; tensor var_13293_cast_fp16_0, tensor var_13293_cast_fp16_1 = split(axis = var_13293_axis_0, split_sizes = var_13293_split_sizes_0, x = normed_669_cast_fp16)[name = string("op_13293_cast_fp16")]; tensor const_398_to_fp16 = const()[name = string("const_398_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2288449792)))]; tensor var_13296_cast_fp16 = mul(x = var_13293_cast_fp16_0, y = const_398_to_fp16)[name = string("op_13296_cast_fp16")]; tensor var_13309 = const()[name = string("op_13309"), val = tensor([0, 2, 1])]; tensor input_619_axes_0 = const()[name = string("input_619_axes_0"), val = tensor([2])]; tensor var_13310 = transpose(perm = var_13309, x = var_13296_cast_fp16)[name = string("transpose_108")]; tensor input_619 = expand_dims(axes = input_619_axes_0, x = var_13310)[name = string("input_619")]; string gate_77_pad_type_0 = const()[name = string("gate_77_pad_type_0"), val = string("valid")]; tensor gate_77_strides_0 = const()[name = string("gate_77_strides_0"), val = tensor([1, 1])]; tensor gate_77_pad_0 = const()[name = string("gate_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_77_dilations_0 = const()[name = string("gate_77_dilations_0"), val = tensor([1, 1])]; int32 gate_77_groups_0 = const()[name = string("gate_77_groups_0"), val = int32(1)]; tensor gate_77 = conv(dilations = gate_77_dilations_0, groups = gate_77_groups_0, pad = gate_77_pad_0, pad_type = gate_77_pad_type_0, strides = gate_77_strides_0, weight = layers_19_mlp_gate_proj_weight_palettized, x = input_619)[name = string("gate_77")]; string up_39_pad_type_0 = const()[name = string("up_39_pad_type_0"), val = string("valid")]; tensor up_39_strides_0 = const()[name = string("up_39_strides_0"), val = tensor([1, 1])]; tensor up_39_pad_0 = const()[name = string("up_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_39_dilations_0 = const()[name = string("up_39_dilations_0"), val = tensor([1, 1])]; int32 up_39_groups_0 = const()[name = string("up_39_groups_0"), val = int32(1)]; tensor up_39 = conv(dilations = up_39_dilations_0, groups = up_39_groups_0, pad = up_39_pad_0, pad_type = up_39_pad_type_0, strides = up_39_strides_0, weight = layers_19_mlp_up_proj_weight_palettized, x = input_619)[name = string("up_39")]; string gate_79_mode_0 = const()[name = string("gate_79_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_79 = gelu(mode = gate_79_mode_0, x = gate_77)[name = string("gate_79")]; tensor input_621 = mul(x = gate_79, y = up_39)[name = string("input_621")]; string mlp_out_39_pad_type_0 = const()[name = string("mlp_out_39_pad_type_0"), val = string("valid")]; tensor mlp_out_39_strides_0 = const()[name = string("mlp_out_39_strides_0"), val = tensor([1, 1])]; tensor mlp_out_39_pad_0 = const()[name = string("mlp_out_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_39_dilations_0 = const()[name = string("mlp_out_39_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_39_groups_0 = const()[name = string("mlp_out_39_groups_0"), val = int32(1)]; tensor mlp_out_39 = conv(dilations = mlp_out_39_dilations_0, groups = mlp_out_39_groups_0, pad = mlp_out_39_pad_0, pad_type = mlp_out_39_pad_type_0, strides = mlp_out_39_strides_0, weight = layers_19_mlp_down_proj_weight_palettized, x = input_621)[name = string("mlp_out_39")]; tensor var_13350_axes_0 = const()[name = string("op_13350_axes_0"), val = tensor([2])]; tensor var_13350 = squeeze(axes = var_13350_axes_0, x = mlp_out_39)[name = string("op_13350")]; tensor var_13354 = const()[name = string("op_13354"), val = tensor([0, 2, 1])]; int32 var_13360 = const()[name = string("op_13360"), val = int32(-1)]; fp16 const_399_promoted_to_fp16 = const()[name = string("const_399_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_701 = transpose(perm = var_13354, x = var_13350)[name = string("transpose_107")]; tensor var_13366_cast_fp16 = mul(x = x_701, y = const_399_promoted_to_fp16)[name = string("op_13366_cast_fp16")]; bool input_623_interleave_0 = const()[name = string("input_623_interleave_0"), val = bool(false)]; tensor input_623_cast_fp16 = concat(axis = var_13360, interleave = input_623_interleave_0, values = (x_701, var_13366_cast_fp16))[name = string("input_623_cast_fp16")]; tensor normed_673_axes_0 = const()[name = string("normed_673_axes_0"), val = tensor([-1])]; fp16 var_13358_to_fp16 = const()[name = string("op_13358_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_673_cast_fp16 = layer_norm(axes = normed_673_axes_0, epsilon = var_13358_to_fp16, x = input_623_cast_fp16)[name = string("normed_673_cast_fp16")]; tensor var_13371_split_sizes_0 = const()[name = string("op_13371_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_13371_axis_0 = const()[name = string("op_13371_axis_0"), val = int32(-1)]; tensor var_13371_cast_fp16_0, tensor var_13371_cast_fp16_1 = split(axis = var_13371_axis_0, split_sizes = var_13371_split_sizes_0, x = normed_673_cast_fp16)[name = string("op_13371_cast_fp16")]; tensor const_400_to_fp16 = const()[name = string("const_400_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2288452928)))]; tensor var_13374_cast_fp16 = mul(x = var_13371_cast_fp16_0, y = const_400_to_fp16)[name = string("op_13374_cast_fp16")]; tensor hidden_states_241_cast_fp16 = add(x = x_697_cast_fp16, y = var_13374_cast_fp16)[name = string("hidden_states_241_cast_fp16")]; tensor per_layer_slice_39_begin_0 = const()[name = string("per_layer_slice_39_begin_0"), val = tensor([0, 0, 4864])]; tensor per_layer_slice_39_end_0 = const()[name = string("per_layer_slice_39_end_0"), val = tensor([1, 1, 5120])]; tensor per_layer_slice_39_end_mask_0 = const()[name = string("per_layer_slice_39_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_39 = slice_by_index(begin = per_layer_slice_39_begin_0, end = per_layer_slice_39_end_0, end_mask = per_layer_slice_39_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_39")]; tensor gated_77 = linear(bias = linear_1_bias_0, weight = layers_19_per_layer_input_gate_weight_palettized, x = hidden_states_241_cast_fp16)[name = string("linear_39")]; string gated_79_mode_0 = const()[name = string("gated_79_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_79 = gelu(mode = gated_79_mode_0, x = gated_77)[name = string("gated_79")]; tensor input_627 = mul(x = gated_79, y = per_layer_slice_39)[name = string("input_627")]; tensor x_705 = linear(bias = linear_2_bias_0, weight = layers_19_per_layer_projection_weight_palettized, x = input_627)[name = string("linear_40")]; int32 var_13411 = const()[name = string("op_13411"), val = int32(-1)]; fp16 const_401_promoted_to_fp16 = const()[name = string("const_401_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_13417_cast_fp16 = mul(x = x_705, y = const_401_promoted_to_fp16)[name = string("op_13417_cast_fp16")]; bool input_629_interleave_0 = const()[name = string("input_629_interleave_0"), val = bool(false)]; tensor input_629_cast_fp16 = concat(axis = var_13411, interleave = input_629_interleave_0, values = (x_705, var_13417_cast_fp16))[name = string("input_629_cast_fp16")]; tensor normed_677_axes_0 = const()[name = string("normed_677_axes_0"), val = tensor([-1])]; fp16 var_13409_to_fp16 = const()[name = string("op_13409_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_677_cast_fp16 = layer_norm(axes = normed_677_axes_0, epsilon = var_13409_to_fp16, x = input_629_cast_fp16)[name = string("normed_677_cast_fp16")]; tensor var_13422_split_sizes_0 = const()[name = string("op_13422_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_13422_axis_0 = const()[name = string("op_13422_axis_0"), val = int32(-1)]; tensor var_13422_cast_fp16_0, tensor var_13422_cast_fp16_1 = split(axis = var_13422_axis_0, split_sizes = var_13422_split_sizes_0, x = normed_677_cast_fp16)[name = string("op_13422_cast_fp16")]; tensor const_402_to_fp16 = const()[name = string("const_402_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2288456064)))]; tensor var_13425_cast_fp16 = mul(x = var_13422_cast_fp16_0, y = const_402_to_fp16)[name = string("op_13425_cast_fp16")]; tensor hidden_states_245_cast_fp16 = add(x = hidden_states_241_cast_fp16, y = var_13425_cast_fp16)[name = string("hidden_states_245_cast_fp16")]; tensor layers_19_layer_scalar_to_fp16 = const()[name = string("layers_19_layer_scalar_to_fp16"), val = tensor([0x1.14p-1])]; tensor x_709_cast_fp16 = mul(x = hidden_states_245_cast_fp16, y = layers_19_layer_scalar_to_fp16)[name = string("x_709_cast_fp16")]; int32 var_13433 = const()[name = string("op_13433"), val = int32(-1)]; fp16 const_403_promoted_to_fp16 = const()[name = string("const_403_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_13439_cast_fp16 = mul(x = x_709_cast_fp16, y = const_403_promoted_to_fp16)[name = string("op_13439_cast_fp16")]; bool input_631_interleave_0 = const()[name = string("input_631_interleave_0"), val = bool(false)]; tensor input_631_cast_fp16 = concat(axis = var_13433, interleave = input_631_interleave_0, values = (x_709_cast_fp16, var_13439_cast_fp16))[name = string("input_631_cast_fp16")]; tensor normed_681_axes_0 = const()[name = string("normed_681_axes_0"), val = tensor([-1])]; fp16 var_13431_to_fp16 = const()[name = string("op_13431_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_681_cast_fp16 = layer_norm(axes = normed_681_axes_0, epsilon = var_13431_to_fp16, x = input_631_cast_fp16)[name = string("normed_681_cast_fp16")]; tensor var_13444_split_sizes_0 = const()[name = string("op_13444_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_13444_axis_0 = const()[name = string("op_13444_axis_0"), val = int32(-1)]; tensor var_13444_cast_fp16_0, tensor var_13444_cast_fp16_1 = split(axis = var_13444_axis_0, split_sizes = var_13444_split_sizes_0, x = normed_681_cast_fp16)[name = string("op_13444_cast_fp16")]; tensor const_404_to_fp16 = const()[name = string("const_404_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2288459200)))]; tensor var_13447_cast_fp16 = mul(x = var_13444_cast_fp16_0, y = const_404_to_fp16)[name = string("op_13447_cast_fp16")]; tensor var_13455 = const()[name = string("op_13455"), val = tensor([0, 2, 1])]; tensor var_13458_axes_0 = const()[name = string("op_13458_axes_0"), val = tensor([2])]; tensor var_13456_cast_fp16 = transpose(perm = var_13455, x = var_13447_cast_fp16)[name = string("transpose_106")]; tensor var_13458_cast_fp16 = expand_dims(axes = var_13458_axes_0, x = var_13456_cast_fp16)[name = string("op_13458_cast_fp16")]; string var_13474_pad_type_0 = const()[name = string("op_13474_pad_type_0"), val = string("valid")]; tensor var_13474_strides_0 = const()[name = string("op_13474_strides_0"), val = tensor([1, 1])]; tensor var_13474_pad_0 = const()[name = string("op_13474_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_13474_dilations_0 = const()[name = string("op_13474_dilations_0"), val = tensor([1, 1])]; int32 var_13474_groups_0 = const()[name = string("op_13474_groups_0"), val = int32(1)]; tensor var_13474 = conv(dilations = var_13474_dilations_0, groups = var_13474_groups_0, pad = var_13474_pad_0, pad_type = var_13474_pad_type_0, strides = var_13474_strides_0, weight = layers_20_self_attn_q_proj_weight_palettized, x = var_13458_cast_fp16)[name = string("op_13474")]; tensor var_13479 = const()[name = string("op_13479"), val = tensor([1, 8, 256, 1])]; tensor var_13480 = reshape(shape = var_13479, x = var_13474)[name = string("op_13480")]; tensor var_13485 = const()[name = string("op_13485"), val = tensor([0, 1, 3, 2])]; tensor var_13495 = const()[name = string("op_13495"), val = tensor([1, 8, 256])]; tensor var_13486 = transpose(perm = var_13485, x = var_13480)[name = string("transpose_105")]; tensor x_713 = reshape(shape = var_13495, x = var_13486)[name = string("x_713")]; int32 var_13501 = const()[name = string("op_13501"), val = int32(-1)]; fp16 const_405_promoted_to_fp16 = const()[name = string("const_405_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_13507_cast_fp16 = mul(x = x_713, y = const_405_promoted_to_fp16)[name = string("op_13507_cast_fp16")]; bool input_635_interleave_0 = const()[name = string("input_635_interleave_0"), val = bool(false)]; tensor input_635_cast_fp16 = concat(axis = var_13501, interleave = input_635_interleave_0, values = (x_713, var_13507_cast_fp16))[name = string("input_635_cast_fp16")]; tensor normed_685_axes_0 = const()[name = string("normed_685_axes_0"), val = tensor([-1])]; fp16 var_13499_to_fp16 = const()[name = string("op_13499_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_685_cast_fp16 = layer_norm(axes = normed_685_axes_0, epsilon = var_13499_to_fp16, x = input_635_cast_fp16)[name = string("normed_685_cast_fp16")]; tensor var_13512_split_sizes_0 = const()[name = string("op_13512_split_sizes_0"), val = tensor([256, 256])]; int32 var_13512_axis_0 = const()[name = string("op_13512_axis_0"), val = int32(-1)]; tensor var_13512_cast_fp16_0, tensor var_13512_cast_fp16_1 = split(axis = var_13512_axis_0, split_sizes = var_13512_split_sizes_0, x = normed_685_cast_fp16)[name = string("op_13512_cast_fp16")]; tensor var_13515_cast_fp16 = mul(x = var_13512_cast_fp16_0, y = const_307_to_fp16)[name = string("op_13515_cast_fp16")]; tensor var_13521 = const()[name = string("op_13521"), val = tensor([1, 8, 1, 256])]; tensor q_153 = reshape(shape = var_13521, x = var_13515_cast_fp16)[name = string("q_153")]; tensor var_13523 = mul(x = q_153, y = cos_1)[name = string("op_13523")]; tensor var_13524_split_sizes_0 = const()[name = string("op_13524_split_sizes_0"), val = tensor([128, 128])]; int32 var_13524_axis_0 = const()[name = string("op_13524_axis_0"), val = int32(-1)]; tensor var_13524_0, tensor var_13524_1 = split(axis = var_13524_axis_0, split_sizes = var_13524_split_sizes_0, x = q_153)[name = string("op_13524")]; fp16 const_407_promoted = const()[name = string("const_407_promoted"), val = fp16(-0x1p+0)]; tensor var_13526 = mul(x = var_13524_1, y = const_407_promoted)[name = string("op_13526")]; int32 var_13528 = const()[name = string("op_13528"), val = int32(-1)]; bool var_13529_interleave_0 = const()[name = string("op_13529_interleave_0"), val = bool(false)]; tensor var_13529 = concat(axis = var_13528, interleave = var_13529_interleave_0, values = (var_13526, var_13524_0))[name = string("op_13529")]; tensor var_13530 = mul(x = var_13529, y = sin_1)[name = string("op_13530")]; tensor q_155 = add(x = var_13523, y = var_13530)[name = string("q_155")]; bool var_13554_transpose_x_0 = const()[name = string("op_13554_transpose_x_0"), val = bool(false)]; bool var_13554_transpose_y_0 = const()[name = string("op_13554_transpose_y_0"), val = bool(false)]; tensor var_13554_cast_fp16 = matmul(transpose_x = var_13554_transpose_x_0, transpose_y = var_13554_transpose_y_0, x = q_155, y = transpose_153_cast_fp16)[name = string("op_13554_cast_fp16")]; tensor attn_weights_123_cast_fp16 = add(x = var_13554_cast_fp16, y = causal_mask)[name = string("attn_weights_123_cast_fp16")]; int32 var_13564 = const()[name = string("op_13564"), val = int32(-1)]; tensor var_13566_cast_fp16 = softmax(axis = var_13564, x = attn_weights_123_cast_fp16)[name = string("op_13566_cast_fp16")]; bool var_13582_transpose_x_0 = const()[name = string("op_13582_transpose_x_0"), val = bool(false)]; bool var_13582_transpose_y_0 = const()[name = string("op_13582_transpose_y_0"), val = bool(false)]; tensor var_13582_cast_fp16 = matmul(transpose_x = var_13582_transpose_x_0, transpose_y = var_13582_transpose_y_0, x = var_13566_cast_fp16, y = V_expanded_27_cast_fp16)[name = string("op_13582_cast_fp16")]; tensor var_13592 = const()[name = string("op_13592"), val = tensor([0, 2, 1, 3])]; tensor var_13599 = const()[name = string("op_13599"), val = tensor([1, 1, -1])]; tensor var_13593 = transpose(perm = var_13592, x = var_13582_cast_fp16)[name = string("transpose_104")]; tensor attn_output_123 = reshape(shape = var_13599, x = var_13593)[name = string("attn_output_123")]; tensor var_13604 = const()[name = string("op_13604"), val = tensor([0, 2, 1])]; tensor squeeze_20_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2288462336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2290035264))))[name = string("squeeze_20_palettized")]; string var_13620_pad_type_0 = const()[name = string("op_13620_pad_type_0"), val = string("valid")]; int32 var_13620_groups_0 = const()[name = string("op_13620_groups_0"), val = int32(1)]; tensor var_13620_strides_0 = const()[name = string("op_13620_strides_0"), val = tensor([1])]; tensor var_13620_pad_0 = const()[name = string("op_13620_pad_0"), val = tensor([0, 0])]; tensor var_13620_dilations_0 = const()[name = string("op_13620_dilations_0"), val = tensor([1])]; tensor var_13605 = transpose(perm = var_13604, x = attn_output_123)[name = string("transpose_103")]; tensor var_13620 = conv(dilations = var_13620_dilations_0, groups = var_13620_groups_0, pad = var_13620_pad_0, pad_type = var_13620_pad_type_0, strides = var_13620_strides_0, weight = squeeze_20_palettized, x = var_13605)[name = string("op_13620")]; tensor var_13624 = const()[name = string("op_13624"), val = tensor([0, 2, 1])]; int32 var_13630 = const()[name = string("op_13630"), val = int32(-1)]; fp16 const_408_promoted_to_fp16 = const()[name = string("const_408_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_717 = transpose(perm = var_13624, x = var_13620)[name = string("transpose_102")]; tensor var_13636_cast_fp16 = mul(x = x_717, y = const_408_promoted_to_fp16)[name = string("op_13636_cast_fp16")]; bool input_639_interleave_0 = const()[name = string("input_639_interleave_0"), val = bool(false)]; tensor input_639_cast_fp16 = concat(axis = var_13630, interleave = input_639_interleave_0, values = (x_717, var_13636_cast_fp16))[name = string("input_639_cast_fp16")]; tensor normed_689_axes_0 = const()[name = string("normed_689_axes_0"), val = tensor([-1])]; fp16 var_13628_to_fp16 = const()[name = string("op_13628_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_689_cast_fp16 = layer_norm(axes = normed_689_axes_0, epsilon = var_13628_to_fp16, x = input_639_cast_fp16)[name = string("normed_689_cast_fp16")]; tensor var_13641_split_sizes_0 = const()[name = string("op_13641_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_13641_axis_0 = const()[name = string("op_13641_axis_0"), val = int32(-1)]; tensor var_13641_cast_fp16_0, tensor var_13641_cast_fp16_1 = split(axis = var_13641_axis_0, split_sizes = var_13641_split_sizes_0, x = normed_689_cast_fp16)[name = string("op_13641_cast_fp16")]; tensor const_409_to_fp16 = const()[name = string("const_409_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2290036864)))]; tensor var_13644_cast_fp16 = mul(x = var_13641_cast_fp16_0, y = const_409_to_fp16)[name = string("op_13644_cast_fp16")]; tensor x_721_cast_fp16 = add(x = x_709_cast_fp16, y = var_13644_cast_fp16)[name = string("x_721_cast_fp16")]; int32 var_13651 = const()[name = string("op_13651"), val = int32(-1)]; fp16 const_410_promoted_to_fp16 = const()[name = string("const_410_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_13657_cast_fp16 = mul(x = x_721_cast_fp16, y = const_410_promoted_to_fp16)[name = string("op_13657_cast_fp16")]; bool input_641_interleave_0 = const()[name = string("input_641_interleave_0"), val = bool(false)]; tensor input_641_cast_fp16 = concat(axis = var_13651, interleave = input_641_interleave_0, values = (x_721_cast_fp16, var_13657_cast_fp16))[name = string("input_641_cast_fp16")]; tensor normed_693_axes_0 = const()[name = string("normed_693_axes_0"), val = tensor([-1])]; fp16 var_13649_to_fp16 = const()[name = string("op_13649_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_693_cast_fp16 = layer_norm(axes = normed_693_axes_0, epsilon = var_13649_to_fp16, x = input_641_cast_fp16)[name = string("normed_693_cast_fp16")]; tensor var_13662_split_sizes_0 = const()[name = string("op_13662_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_13662_axis_0 = const()[name = string("op_13662_axis_0"), val = int32(-1)]; tensor var_13662_cast_fp16_0, tensor var_13662_cast_fp16_1 = split(axis = var_13662_axis_0, split_sizes = var_13662_split_sizes_0, x = normed_693_cast_fp16)[name = string("op_13662_cast_fp16")]; tensor const_411_to_fp16 = const()[name = string("const_411_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2290040000)))]; tensor var_13665_cast_fp16 = mul(x = var_13662_cast_fp16_0, y = const_411_to_fp16)[name = string("op_13665_cast_fp16")]; tensor var_13678 = const()[name = string("op_13678"), val = tensor([0, 2, 1])]; tensor input_643_axes_0 = const()[name = string("input_643_axes_0"), val = tensor([2])]; tensor var_13679 = transpose(perm = var_13678, x = var_13665_cast_fp16)[name = string("transpose_101")]; tensor input_643 = expand_dims(axes = input_643_axes_0, x = var_13679)[name = string("input_643")]; string gate_81_pad_type_0 = const()[name = string("gate_81_pad_type_0"), val = string("valid")]; tensor gate_81_strides_0 = const()[name = string("gate_81_strides_0"), val = tensor([1, 1])]; tensor gate_81_pad_0 = const()[name = string("gate_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_81_dilations_0 = const()[name = string("gate_81_dilations_0"), val = tensor([1, 1])]; int32 gate_81_groups_0 = const()[name = string("gate_81_groups_0"), val = int32(1)]; tensor gate_81 = conv(dilations = gate_81_dilations_0, groups = gate_81_groups_0, pad = gate_81_pad_0, pad_type = gate_81_pad_type_0, strides = gate_81_strides_0, weight = layers_20_mlp_gate_proj_weight_palettized, x = input_643)[name = string("gate_81")]; string up_41_pad_type_0 = const()[name = string("up_41_pad_type_0"), val = string("valid")]; tensor up_41_strides_0 = const()[name = string("up_41_strides_0"), val = tensor([1, 1])]; tensor up_41_pad_0 = const()[name = string("up_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_41_dilations_0 = const()[name = string("up_41_dilations_0"), val = tensor([1, 1])]; int32 up_41_groups_0 = const()[name = string("up_41_groups_0"), val = int32(1)]; tensor up_41 = conv(dilations = up_41_dilations_0, groups = up_41_groups_0, pad = up_41_pad_0, pad_type = up_41_pad_type_0, strides = up_41_strides_0, weight = layers_20_mlp_up_proj_weight_palettized, x = input_643)[name = string("up_41")]; string gate_83_mode_0 = const()[name = string("gate_83_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_83 = gelu(mode = gate_83_mode_0, x = gate_81)[name = string("gate_83")]; tensor input_645 = mul(x = gate_83, y = up_41)[name = string("input_645")]; string mlp_out_41_pad_type_0 = const()[name = string("mlp_out_41_pad_type_0"), val = string("valid")]; tensor mlp_out_41_strides_0 = const()[name = string("mlp_out_41_strides_0"), val = tensor([1, 1])]; tensor mlp_out_41_pad_0 = const()[name = string("mlp_out_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_41_dilations_0 = const()[name = string("mlp_out_41_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_41_groups_0 = const()[name = string("mlp_out_41_groups_0"), val = int32(1)]; tensor mlp_out_41 = conv(dilations = mlp_out_41_dilations_0, groups = mlp_out_41_groups_0, pad = mlp_out_41_pad_0, pad_type = mlp_out_41_pad_type_0, strides = mlp_out_41_strides_0, weight = layers_20_mlp_down_proj_weight_palettized, x = input_645)[name = string("mlp_out_41")]; tensor var_13719_axes_0 = const()[name = string("op_13719_axes_0"), val = tensor([2])]; tensor var_13719 = squeeze(axes = var_13719_axes_0, x = mlp_out_41)[name = string("op_13719")]; tensor var_13723 = const()[name = string("op_13723"), val = tensor([0, 2, 1])]; int32 var_13729 = const()[name = string("op_13729"), val = int32(-1)]; fp16 const_412_promoted_to_fp16 = const()[name = string("const_412_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_725 = transpose(perm = var_13723, x = var_13719)[name = string("transpose_100")]; tensor var_13735_cast_fp16 = mul(x = x_725, y = const_412_promoted_to_fp16)[name = string("op_13735_cast_fp16")]; bool input_647_interleave_0 = const()[name = string("input_647_interleave_0"), val = bool(false)]; tensor input_647_cast_fp16 = concat(axis = var_13729, interleave = input_647_interleave_0, values = (x_725, var_13735_cast_fp16))[name = string("input_647_cast_fp16")]; tensor normed_697_axes_0 = const()[name = string("normed_697_axes_0"), val = tensor([-1])]; fp16 var_13727_to_fp16 = const()[name = string("op_13727_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_697_cast_fp16 = layer_norm(axes = normed_697_axes_0, epsilon = var_13727_to_fp16, x = input_647_cast_fp16)[name = string("normed_697_cast_fp16")]; tensor var_13740_split_sizes_0 = const()[name = string("op_13740_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_13740_axis_0 = const()[name = string("op_13740_axis_0"), val = int32(-1)]; tensor var_13740_cast_fp16_0, tensor var_13740_cast_fp16_1 = split(axis = var_13740_axis_0, split_sizes = var_13740_split_sizes_0, x = normed_697_cast_fp16)[name = string("op_13740_cast_fp16")]; tensor const_413_to_fp16 = const()[name = string("const_413_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2290043136)))]; tensor var_13743_cast_fp16 = mul(x = var_13740_cast_fp16_0, y = const_413_to_fp16)[name = string("op_13743_cast_fp16")]; tensor hidden_states_253_cast_fp16 = add(x = x_721_cast_fp16, y = var_13743_cast_fp16)[name = string("hidden_states_253_cast_fp16")]; tensor per_layer_slice_41_begin_0 = const()[name = string("per_layer_slice_41_begin_0"), val = tensor([0, 0, 5120])]; tensor per_layer_slice_41_end_0 = const()[name = string("per_layer_slice_41_end_0"), val = tensor([1, 1, 5376])]; tensor per_layer_slice_41_end_mask_0 = const()[name = string("per_layer_slice_41_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_41 = slice_by_index(begin = per_layer_slice_41_begin_0, end = per_layer_slice_41_end_0, end_mask = per_layer_slice_41_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_41")]; tensor gated_81 = linear(bias = linear_1_bias_0, weight = layers_20_per_layer_input_gate_weight_palettized, x = hidden_states_253_cast_fp16)[name = string("linear_41")]; string gated_83_mode_0 = const()[name = string("gated_83_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_83 = gelu(mode = gated_83_mode_0, x = gated_81)[name = string("gated_83")]; tensor input_651 = mul(x = gated_83, y = per_layer_slice_41)[name = string("input_651")]; tensor x_729 = linear(bias = linear_2_bias_0, weight = layers_20_per_layer_projection_weight_palettized, x = input_651)[name = string("linear_42")]; int32 var_13780 = const()[name = string("op_13780"), val = int32(-1)]; fp16 const_414_promoted_to_fp16 = const()[name = string("const_414_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_13786_cast_fp16 = mul(x = x_729, y = const_414_promoted_to_fp16)[name = string("op_13786_cast_fp16")]; bool input_653_interleave_0 = const()[name = string("input_653_interleave_0"), val = bool(false)]; tensor input_653_cast_fp16 = concat(axis = var_13780, interleave = input_653_interleave_0, values = (x_729, var_13786_cast_fp16))[name = string("input_653_cast_fp16")]; tensor normed_701_axes_0 = const()[name = string("normed_701_axes_0"), val = tensor([-1])]; fp16 var_13778_to_fp16 = const()[name = string("op_13778_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_701_cast_fp16 = layer_norm(axes = normed_701_axes_0, epsilon = var_13778_to_fp16, x = input_653_cast_fp16)[name = string("normed_701_cast_fp16")]; tensor var_13791_split_sizes_0 = const()[name = string("op_13791_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_13791_axis_0 = const()[name = string("op_13791_axis_0"), val = int32(-1)]; tensor var_13791_cast_fp16_0, tensor var_13791_cast_fp16_1 = split(axis = var_13791_axis_0, split_sizes = var_13791_split_sizes_0, x = normed_701_cast_fp16)[name = string("op_13791_cast_fp16")]; tensor const_415_to_fp16 = const()[name = string("const_415_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2290046272)))]; tensor var_13794_cast_fp16 = mul(x = var_13791_cast_fp16_0, y = const_415_to_fp16)[name = string("op_13794_cast_fp16")]; tensor hidden_states_257_cast_fp16 = add(x = hidden_states_253_cast_fp16, y = var_13794_cast_fp16)[name = string("hidden_states_257_cast_fp16")]; tensor layers_20_layer_scalar_to_fp16 = const()[name = string("layers_20_layer_scalar_to_fp16"), val = tensor([0x1.fap-2])]; tensor x_733_cast_fp16 = mul(x = hidden_states_257_cast_fp16, y = layers_20_layer_scalar_to_fp16)[name = string("x_733_cast_fp16")]; int32 var_13802 = const()[name = string("op_13802"), val = int32(-1)]; fp16 const_416_promoted_to_fp16 = const()[name = string("const_416_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_13808_cast_fp16 = mul(x = x_733_cast_fp16, y = const_416_promoted_to_fp16)[name = string("op_13808_cast_fp16")]; bool input_655_interleave_0 = const()[name = string("input_655_interleave_0"), val = bool(false)]; tensor input_655_cast_fp16 = concat(axis = var_13802, interleave = input_655_interleave_0, values = (x_733_cast_fp16, var_13808_cast_fp16))[name = string("input_655_cast_fp16")]; tensor normed_705_axes_0 = const()[name = string("normed_705_axes_0"), val = tensor([-1])]; fp16 var_13800_to_fp16 = const()[name = string("op_13800_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_705_cast_fp16 = layer_norm(axes = normed_705_axes_0, epsilon = var_13800_to_fp16, x = input_655_cast_fp16)[name = string("normed_705_cast_fp16")]; tensor var_13813_split_sizes_0 = const()[name = string("op_13813_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_13813_axis_0 = const()[name = string("op_13813_axis_0"), val = int32(-1)]; tensor var_13813_cast_fp16_0, tensor var_13813_cast_fp16_1 = split(axis = var_13813_axis_0, split_sizes = var_13813_split_sizes_0, x = normed_705_cast_fp16)[name = string("op_13813_cast_fp16")]; tensor const_417_to_fp16 = const()[name = string("const_417_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2290049408)))]; tensor var_13816_cast_fp16 = mul(x = var_13813_cast_fp16_0, y = const_417_to_fp16)[name = string("op_13816_cast_fp16")]; tensor var_13824 = const()[name = string("op_13824"), val = tensor([0, 2, 1])]; tensor var_13827_axes_0 = const()[name = string("op_13827_axes_0"), val = tensor([2])]; tensor var_13825_cast_fp16 = transpose(perm = var_13824, x = var_13816_cast_fp16)[name = string("transpose_99")]; tensor var_13827_cast_fp16 = expand_dims(axes = var_13827_axes_0, x = var_13825_cast_fp16)[name = string("op_13827_cast_fp16")]; string var_13843_pad_type_0 = const()[name = string("op_13843_pad_type_0"), val = string("valid")]; tensor var_13843_strides_0 = const()[name = string("op_13843_strides_0"), val = tensor([1, 1])]; tensor var_13843_pad_0 = const()[name = string("op_13843_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_13843_dilations_0 = const()[name = string("op_13843_dilations_0"), val = tensor([1, 1])]; int32 var_13843_groups_0 = const()[name = string("op_13843_groups_0"), val = int32(1)]; tensor var_13843 = conv(dilations = var_13843_dilations_0, groups = var_13843_groups_0, pad = var_13843_pad_0, pad_type = var_13843_pad_type_0, strides = var_13843_strides_0, weight = layers_21_self_attn_q_proj_weight_palettized, x = var_13827_cast_fp16)[name = string("op_13843")]; tensor var_13848 = const()[name = string("op_13848"), val = tensor([1, 8, 256, 1])]; tensor var_13849 = reshape(shape = var_13848, x = var_13843)[name = string("op_13849")]; tensor var_13854 = const()[name = string("op_13854"), val = tensor([0, 1, 3, 2])]; tensor var_13864 = const()[name = string("op_13864"), val = tensor([1, 8, 256])]; tensor var_13855 = transpose(perm = var_13854, x = var_13849)[name = string("transpose_98")]; tensor x_737 = reshape(shape = var_13864, x = var_13855)[name = string("x_737")]; int32 var_13870 = const()[name = string("op_13870"), val = int32(-1)]; fp16 const_418_promoted_to_fp16 = const()[name = string("const_418_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_13876_cast_fp16 = mul(x = x_737, y = const_418_promoted_to_fp16)[name = string("op_13876_cast_fp16")]; bool input_659_interleave_0 = const()[name = string("input_659_interleave_0"), val = bool(false)]; tensor input_659_cast_fp16 = concat(axis = var_13870, interleave = input_659_interleave_0, values = (x_737, var_13876_cast_fp16))[name = string("input_659_cast_fp16")]; tensor normed_709_axes_0 = const()[name = string("normed_709_axes_0"), val = tensor([-1])]; fp16 var_13868_to_fp16 = const()[name = string("op_13868_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_709_cast_fp16 = layer_norm(axes = normed_709_axes_0, epsilon = var_13868_to_fp16, x = input_659_cast_fp16)[name = string("normed_709_cast_fp16")]; tensor var_13881_split_sizes_0 = const()[name = string("op_13881_split_sizes_0"), val = tensor([256, 256])]; int32 var_13881_axis_0 = const()[name = string("op_13881_axis_0"), val = int32(-1)]; tensor var_13881_cast_fp16_0, tensor var_13881_cast_fp16_1 = split(axis = var_13881_axis_0, split_sizes = var_13881_split_sizes_0, x = normed_709_cast_fp16)[name = string("op_13881_cast_fp16")]; tensor var_13884_cast_fp16 = mul(x = var_13881_cast_fp16_0, y = const_307_to_fp16)[name = string("op_13884_cast_fp16")]; tensor var_13890 = const()[name = string("op_13890"), val = tensor([1, 8, 1, 256])]; tensor q_159 = reshape(shape = var_13890, x = var_13884_cast_fp16)[name = string("q_159")]; tensor var_13892 = mul(x = q_159, y = cos_1)[name = string("op_13892")]; tensor var_13893_split_sizes_0 = const()[name = string("op_13893_split_sizes_0"), val = tensor([128, 128])]; int32 var_13893_axis_0 = const()[name = string("op_13893_axis_0"), val = int32(-1)]; tensor var_13893_0, tensor var_13893_1 = split(axis = var_13893_axis_0, split_sizes = var_13893_split_sizes_0, x = q_159)[name = string("op_13893")]; fp16 const_420_promoted = const()[name = string("const_420_promoted"), val = fp16(-0x1p+0)]; tensor var_13895 = mul(x = var_13893_1, y = const_420_promoted)[name = string("op_13895")]; int32 var_13897 = const()[name = string("op_13897"), val = int32(-1)]; bool var_13898_interleave_0 = const()[name = string("op_13898_interleave_0"), val = bool(false)]; tensor var_13898 = concat(axis = var_13897, interleave = var_13898_interleave_0, values = (var_13895, var_13893_0))[name = string("op_13898")]; tensor var_13899 = mul(x = var_13898, y = sin_1)[name = string("op_13899")]; tensor q_161 = add(x = var_13892, y = var_13899)[name = string("q_161")]; bool var_13923_transpose_x_0 = const()[name = string("op_13923_transpose_x_0"), val = bool(false)]; bool var_13923_transpose_y_0 = const()[name = string("op_13923_transpose_y_0"), val = bool(false)]; tensor var_13923_cast_fp16 = matmul(transpose_x = var_13923_transpose_x_0, transpose_y = var_13923_transpose_y_0, x = q_161, y = transpose_153_cast_fp16)[name = string("op_13923_cast_fp16")]; tensor attn_weights_129_cast_fp16 = add(x = var_13923_cast_fp16, y = causal_mask)[name = string("attn_weights_129_cast_fp16")]; int32 var_13933 = const()[name = string("op_13933"), val = int32(-1)]; tensor var_13935_cast_fp16 = softmax(axis = var_13933, x = attn_weights_129_cast_fp16)[name = string("op_13935_cast_fp16")]; bool var_13951_transpose_x_0 = const()[name = string("op_13951_transpose_x_0"), val = bool(false)]; bool var_13951_transpose_y_0 = const()[name = string("op_13951_transpose_y_0"), val = bool(false)]; tensor var_13951_cast_fp16 = matmul(transpose_x = var_13951_transpose_x_0, transpose_y = var_13951_transpose_y_0, x = var_13935_cast_fp16, y = V_expanded_27_cast_fp16)[name = string("op_13951_cast_fp16")]; tensor var_13961 = const()[name = string("op_13961"), val = tensor([0, 2, 1, 3])]; tensor var_13968 = const()[name = string("op_13968"), val = tensor([1, 1, -1])]; tensor var_13962 = transpose(perm = var_13961, x = var_13951_cast_fp16)[name = string("transpose_97")]; tensor attn_output_129 = reshape(shape = var_13968, x = var_13962)[name = string("attn_output_129")]; tensor var_13973 = const()[name = string("op_13973"), val = tensor([0, 2, 1])]; tensor squeeze_21_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2290052544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2291625472))))[name = string("squeeze_21_palettized")]; string var_13989_pad_type_0 = const()[name = string("op_13989_pad_type_0"), val = string("valid")]; int32 var_13989_groups_0 = const()[name = string("op_13989_groups_0"), val = int32(1)]; tensor var_13989_strides_0 = const()[name = string("op_13989_strides_0"), val = tensor([1])]; tensor var_13989_pad_0 = const()[name = string("op_13989_pad_0"), val = tensor([0, 0])]; tensor var_13989_dilations_0 = const()[name = string("op_13989_dilations_0"), val = tensor([1])]; tensor var_13974 = transpose(perm = var_13973, x = attn_output_129)[name = string("transpose_96")]; tensor var_13989 = conv(dilations = var_13989_dilations_0, groups = var_13989_groups_0, pad = var_13989_pad_0, pad_type = var_13989_pad_type_0, strides = var_13989_strides_0, weight = squeeze_21_palettized, x = var_13974)[name = string("op_13989")]; tensor var_13993 = const()[name = string("op_13993"), val = tensor([0, 2, 1])]; int32 var_13999 = const()[name = string("op_13999"), val = int32(-1)]; fp16 const_421_promoted_to_fp16 = const()[name = string("const_421_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_741 = transpose(perm = var_13993, x = var_13989)[name = string("transpose_95")]; tensor var_14005_cast_fp16 = mul(x = x_741, y = const_421_promoted_to_fp16)[name = string("op_14005_cast_fp16")]; bool input_663_interleave_0 = const()[name = string("input_663_interleave_0"), val = bool(false)]; tensor input_663_cast_fp16 = concat(axis = var_13999, interleave = input_663_interleave_0, values = (x_741, var_14005_cast_fp16))[name = string("input_663_cast_fp16")]; tensor normed_713_axes_0 = const()[name = string("normed_713_axes_0"), val = tensor([-1])]; fp16 var_13997_to_fp16 = const()[name = string("op_13997_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_713_cast_fp16 = layer_norm(axes = normed_713_axes_0, epsilon = var_13997_to_fp16, x = input_663_cast_fp16)[name = string("normed_713_cast_fp16")]; tensor var_14010_split_sizes_0 = const()[name = string("op_14010_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_14010_axis_0 = const()[name = string("op_14010_axis_0"), val = int32(-1)]; tensor var_14010_cast_fp16_0, tensor var_14010_cast_fp16_1 = split(axis = var_14010_axis_0, split_sizes = var_14010_split_sizes_0, x = normed_713_cast_fp16)[name = string("op_14010_cast_fp16")]; tensor const_422_to_fp16 = const()[name = string("const_422_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2291627072)))]; tensor var_14013_cast_fp16 = mul(x = var_14010_cast_fp16_0, y = const_422_to_fp16)[name = string("op_14013_cast_fp16")]; tensor x_745_cast_fp16 = add(x = x_733_cast_fp16, y = var_14013_cast_fp16)[name = string("x_745_cast_fp16")]; int32 var_14020 = const()[name = string("op_14020"), val = int32(-1)]; fp16 const_423_promoted_to_fp16 = const()[name = string("const_423_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_14026_cast_fp16 = mul(x = x_745_cast_fp16, y = const_423_promoted_to_fp16)[name = string("op_14026_cast_fp16")]; bool input_665_interleave_0 = const()[name = string("input_665_interleave_0"), val = bool(false)]; tensor input_665_cast_fp16 = concat(axis = var_14020, interleave = input_665_interleave_0, values = (x_745_cast_fp16, var_14026_cast_fp16))[name = string("input_665_cast_fp16")]; tensor normed_717_axes_0 = const()[name = string("normed_717_axes_0"), val = tensor([-1])]; fp16 var_14018_to_fp16 = const()[name = string("op_14018_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_717_cast_fp16 = layer_norm(axes = normed_717_axes_0, epsilon = var_14018_to_fp16, x = input_665_cast_fp16)[name = string("normed_717_cast_fp16")]; tensor var_14031_split_sizes_0 = const()[name = string("op_14031_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_14031_axis_0 = const()[name = string("op_14031_axis_0"), val = int32(-1)]; tensor var_14031_cast_fp16_0, tensor var_14031_cast_fp16_1 = split(axis = var_14031_axis_0, split_sizes = var_14031_split_sizes_0, x = normed_717_cast_fp16)[name = string("op_14031_cast_fp16")]; tensor const_424_to_fp16 = const()[name = string("const_424_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2291630208)))]; tensor var_14034_cast_fp16 = mul(x = var_14031_cast_fp16_0, y = const_424_to_fp16)[name = string("op_14034_cast_fp16")]; tensor var_14047 = const()[name = string("op_14047"), val = tensor([0, 2, 1])]; tensor input_667_axes_0 = const()[name = string("input_667_axes_0"), val = tensor([2])]; tensor var_14048 = transpose(perm = var_14047, x = var_14034_cast_fp16)[name = string("transpose_94")]; tensor input_667 = expand_dims(axes = input_667_axes_0, x = var_14048)[name = string("input_667")]; string gate_85_pad_type_0 = const()[name = string("gate_85_pad_type_0"), val = string("valid")]; tensor gate_85_strides_0 = const()[name = string("gate_85_strides_0"), val = tensor([1, 1])]; tensor gate_85_pad_0 = const()[name = string("gate_85_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_85_dilations_0 = const()[name = string("gate_85_dilations_0"), val = tensor([1, 1])]; int32 gate_85_groups_0 = const()[name = string("gate_85_groups_0"), val = int32(1)]; tensor gate_85 = conv(dilations = gate_85_dilations_0, groups = gate_85_groups_0, pad = gate_85_pad_0, pad_type = gate_85_pad_type_0, strides = gate_85_strides_0, weight = layers_21_mlp_gate_proj_weight_palettized, x = input_667)[name = string("gate_85")]; string up_43_pad_type_0 = const()[name = string("up_43_pad_type_0"), val = string("valid")]; tensor up_43_strides_0 = const()[name = string("up_43_strides_0"), val = tensor([1, 1])]; tensor up_43_pad_0 = const()[name = string("up_43_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_43_dilations_0 = const()[name = string("up_43_dilations_0"), val = tensor([1, 1])]; int32 up_43_groups_0 = const()[name = string("up_43_groups_0"), val = int32(1)]; tensor up_43 = conv(dilations = up_43_dilations_0, groups = up_43_groups_0, pad = up_43_pad_0, pad_type = up_43_pad_type_0, strides = up_43_strides_0, weight = layers_21_mlp_up_proj_weight_palettized, x = input_667)[name = string("up_43")]; string gate_87_mode_0 = const()[name = string("gate_87_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_87 = gelu(mode = gate_87_mode_0, x = gate_85)[name = string("gate_87")]; tensor input_669 = mul(x = gate_87, y = up_43)[name = string("input_669")]; string mlp_out_43_pad_type_0 = const()[name = string("mlp_out_43_pad_type_0"), val = string("valid")]; tensor mlp_out_43_strides_0 = const()[name = string("mlp_out_43_strides_0"), val = tensor([1, 1])]; tensor mlp_out_43_pad_0 = const()[name = string("mlp_out_43_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_43_dilations_0 = const()[name = string("mlp_out_43_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_43_groups_0 = const()[name = string("mlp_out_43_groups_0"), val = int32(1)]; tensor mlp_out_43 = conv(dilations = mlp_out_43_dilations_0, groups = mlp_out_43_groups_0, pad = mlp_out_43_pad_0, pad_type = mlp_out_43_pad_type_0, strides = mlp_out_43_strides_0, weight = layers_21_mlp_down_proj_weight_palettized, x = input_669)[name = string("mlp_out_43")]; tensor var_14088_axes_0 = const()[name = string("op_14088_axes_0"), val = tensor([2])]; tensor var_14088 = squeeze(axes = var_14088_axes_0, x = mlp_out_43)[name = string("op_14088")]; tensor var_14092 = const()[name = string("op_14092"), val = tensor([0, 2, 1])]; int32 var_14098 = const()[name = string("op_14098"), val = int32(-1)]; fp16 const_425_promoted_to_fp16 = const()[name = string("const_425_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_749 = transpose(perm = var_14092, x = var_14088)[name = string("transpose_93")]; tensor var_14104_cast_fp16 = mul(x = x_749, y = const_425_promoted_to_fp16)[name = string("op_14104_cast_fp16")]; bool input_671_interleave_0 = const()[name = string("input_671_interleave_0"), val = bool(false)]; tensor input_671_cast_fp16 = concat(axis = var_14098, interleave = input_671_interleave_0, values = (x_749, var_14104_cast_fp16))[name = string("input_671_cast_fp16")]; tensor normed_721_axes_0 = const()[name = string("normed_721_axes_0"), val = tensor([-1])]; fp16 var_14096_to_fp16 = const()[name = string("op_14096_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_721_cast_fp16 = layer_norm(axes = normed_721_axes_0, epsilon = var_14096_to_fp16, x = input_671_cast_fp16)[name = string("normed_721_cast_fp16")]; tensor var_14109_split_sizes_0 = const()[name = string("op_14109_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_14109_axis_0 = const()[name = string("op_14109_axis_0"), val = int32(-1)]; tensor var_14109_cast_fp16_0, tensor var_14109_cast_fp16_1 = split(axis = var_14109_axis_0, split_sizes = var_14109_split_sizes_0, x = normed_721_cast_fp16)[name = string("op_14109_cast_fp16")]; tensor const_426_to_fp16 = const()[name = string("const_426_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2291633344)))]; tensor var_14112_cast_fp16 = mul(x = var_14109_cast_fp16_0, y = const_426_to_fp16)[name = string("op_14112_cast_fp16")]; tensor hidden_states_265_cast_fp16 = add(x = x_745_cast_fp16, y = var_14112_cast_fp16)[name = string("hidden_states_265_cast_fp16")]; tensor per_layer_slice_43_begin_0 = const()[name = string("per_layer_slice_43_begin_0"), val = tensor([0, 0, 5376])]; tensor per_layer_slice_43_end_0 = const()[name = string("per_layer_slice_43_end_0"), val = tensor([1, 1, 5632])]; tensor per_layer_slice_43_end_mask_0 = const()[name = string("per_layer_slice_43_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_43 = slice_by_index(begin = per_layer_slice_43_begin_0, end = per_layer_slice_43_end_0, end_mask = per_layer_slice_43_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_43")]; tensor gated_85 = linear(bias = linear_1_bias_0, weight = layers_21_per_layer_input_gate_weight_palettized, x = hidden_states_265_cast_fp16)[name = string("linear_43")]; string gated_87_mode_0 = const()[name = string("gated_87_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_87 = gelu(mode = gated_87_mode_0, x = gated_85)[name = string("gated_87")]; tensor input_675 = mul(x = gated_87, y = per_layer_slice_43)[name = string("input_675")]; tensor x_753 = linear(bias = linear_2_bias_0, weight = layers_21_per_layer_projection_weight_palettized, x = input_675)[name = string("linear_44")]; int32 var_14149 = const()[name = string("op_14149"), val = int32(-1)]; fp16 const_427_promoted_to_fp16 = const()[name = string("const_427_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_14155_cast_fp16 = mul(x = x_753, y = const_427_promoted_to_fp16)[name = string("op_14155_cast_fp16")]; bool input_677_interleave_0 = const()[name = string("input_677_interleave_0"), val = bool(false)]; tensor input_677_cast_fp16 = concat(axis = var_14149, interleave = input_677_interleave_0, values = (x_753, var_14155_cast_fp16))[name = string("input_677_cast_fp16")]; tensor normed_725_axes_0 = const()[name = string("normed_725_axes_0"), val = tensor([-1])]; fp16 var_14147_to_fp16 = const()[name = string("op_14147_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_725_cast_fp16 = layer_norm(axes = normed_725_axes_0, epsilon = var_14147_to_fp16, x = input_677_cast_fp16)[name = string("normed_725_cast_fp16")]; tensor var_14160_split_sizes_0 = const()[name = string("op_14160_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_14160_axis_0 = const()[name = string("op_14160_axis_0"), val = int32(-1)]; tensor var_14160_cast_fp16_0, tensor var_14160_cast_fp16_1 = split(axis = var_14160_axis_0, split_sizes = var_14160_split_sizes_0, x = normed_725_cast_fp16)[name = string("op_14160_cast_fp16")]; tensor const_428_to_fp16 = const()[name = string("const_428_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2291636480)))]; tensor var_14163_cast_fp16 = mul(x = var_14160_cast_fp16_0, y = const_428_to_fp16)[name = string("op_14163_cast_fp16")]; tensor hidden_states_269_cast_fp16 = add(x = hidden_states_265_cast_fp16, y = var_14163_cast_fp16)[name = string("hidden_states_269_cast_fp16")]; tensor layers_21_layer_scalar_to_fp16 = const()[name = string("layers_21_layer_scalar_to_fp16"), val = tensor([0x1.4ap-1])]; tensor x_757_cast_fp16 = mul(x = hidden_states_269_cast_fp16, y = layers_21_layer_scalar_to_fp16)[name = string("x_757_cast_fp16")]; int32 var_14171 = const()[name = string("op_14171"), val = int32(-1)]; fp16 const_429_promoted_to_fp16 = const()[name = string("const_429_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_14177_cast_fp16 = mul(x = x_757_cast_fp16, y = const_429_promoted_to_fp16)[name = string("op_14177_cast_fp16")]; bool input_679_interleave_0 = const()[name = string("input_679_interleave_0"), val = bool(false)]; tensor input_679_cast_fp16 = concat(axis = var_14171, interleave = input_679_interleave_0, values = (x_757_cast_fp16, var_14177_cast_fp16))[name = string("input_679_cast_fp16")]; tensor normed_729_axes_0 = const()[name = string("normed_729_axes_0"), val = tensor([-1])]; fp16 var_14169_to_fp16 = const()[name = string("op_14169_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_729_cast_fp16 = layer_norm(axes = normed_729_axes_0, epsilon = var_14169_to_fp16, x = input_679_cast_fp16)[name = string("normed_729_cast_fp16")]; tensor var_14182_split_sizes_0 = const()[name = string("op_14182_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_14182_axis_0 = const()[name = string("op_14182_axis_0"), val = int32(-1)]; tensor var_14182_cast_fp16_0, tensor var_14182_cast_fp16_1 = split(axis = var_14182_axis_0, split_sizes = var_14182_split_sizes_0, x = normed_729_cast_fp16)[name = string("op_14182_cast_fp16")]; tensor const_430_to_fp16 = const()[name = string("const_430_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2291639616)))]; tensor var_14185_cast_fp16 = mul(x = var_14182_cast_fp16_0, y = const_430_to_fp16)[name = string("op_14185_cast_fp16")]; tensor var_14193 = const()[name = string("op_14193"), val = tensor([0, 2, 1])]; tensor var_14196_axes_0 = const()[name = string("op_14196_axes_0"), val = tensor([2])]; tensor var_14194_cast_fp16 = transpose(perm = var_14193, x = var_14185_cast_fp16)[name = string("transpose_92")]; tensor var_14196_cast_fp16 = expand_dims(axes = var_14196_axes_0, x = var_14194_cast_fp16)[name = string("op_14196_cast_fp16")]; string var_14212_pad_type_0 = const()[name = string("op_14212_pad_type_0"), val = string("valid")]; tensor var_14212_strides_0 = const()[name = string("op_14212_strides_0"), val = tensor([1, 1])]; tensor var_14212_pad_0 = const()[name = string("op_14212_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_14212_dilations_0 = const()[name = string("op_14212_dilations_0"), val = tensor([1, 1])]; int32 var_14212_groups_0 = const()[name = string("op_14212_groups_0"), val = int32(1)]; tensor var_14212 = conv(dilations = var_14212_dilations_0, groups = var_14212_groups_0, pad = var_14212_pad_0, pad_type = var_14212_pad_type_0, strides = var_14212_strides_0, weight = layers_22_self_attn_q_proj_weight_palettized, x = var_14196_cast_fp16)[name = string("op_14212")]; tensor var_14217 = const()[name = string("op_14217"), val = tensor([1, 8, 256, 1])]; tensor var_14218 = reshape(shape = var_14217, x = var_14212)[name = string("op_14218")]; tensor var_14223 = const()[name = string("op_14223"), val = tensor([0, 1, 3, 2])]; tensor var_14233 = const()[name = string("op_14233"), val = tensor([1, 8, 256])]; tensor var_14224 = transpose(perm = var_14223, x = var_14218)[name = string("transpose_91")]; tensor x_761 = reshape(shape = var_14233, x = var_14224)[name = string("x_761")]; int32 var_14239 = const()[name = string("op_14239"), val = int32(-1)]; fp16 const_431_promoted_to_fp16 = const()[name = string("const_431_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_14245_cast_fp16 = mul(x = x_761, y = const_431_promoted_to_fp16)[name = string("op_14245_cast_fp16")]; bool input_683_interleave_0 = const()[name = string("input_683_interleave_0"), val = bool(false)]; tensor input_683_cast_fp16 = concat(axis = var_14239, interleave = input_683_interleave_0, values = (x_761, var_14245_cast_fp16))[name = string("input_683_cast_fp16")]; tensor normed_733_axes_0 = const()[name = string("normed_733_axes_0"), val = tensor([-1])]; fp16 var_14237_to_fp16 = const()[name = string("op_14237_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_733_cast_fp16 = layer_norm(axes = normed_733_axes_0, epsilon = var_14237_to_fp16, x = input_683_cast_fp16)[name = string("normed_733_cast_fp16")]; tensor var_14250_split_sizes_0 = const()[name = string("op_14250_split_sizes_0"), val = tensor([256, 256])]; int32 var_14250_axis_0 = const()[name = string("op_14250_axis_0"), val = int32(-1)]; tensor var_14250_cast_fp16_0, tensor var_14250_cast_fp16_1 = split(axis = var_14250_axis_0, split_sizes = var_14250_split_sizes_0, x = normed_733_cast_fp16)[name = string("op_14250_cast_fp16")]; tensor var_14253_cast_fp16 = mul(x = var_14250_cast_fp16_0, y = const_307_to_fp16)[name = string("op_14253_cast_fp16")]; tensor var_14259 = const()[name = string("op_14259"), val = tensor([1, 8, 1, 256])]; tensor q_165 = reshape(shape = var_14259, x = var_14253_cast_fp16)[name = string("q_165")]; tensor var_14261 = mul(x = q_165, y = cos_1)[name = string("op_14261")]; tensor var_14262_split_sizes_0 = const()[name = string("op_14262_split_sizes_0"), val = tensor([128, 128])]; int32 var_14262_axis_0 = const()[name = string("op_14262_axis_0"), val = int32(-1)]; tensor var_14262_0, tensor var_14262_1 = split(axis = var_14262_axis_0, split_sizes = var_14262_split_sizes_0, x = q_165)[name = string("op_14262")]; fp16 const_433_promoted = const()[name = string("const_433_promoted"), val = fp16(-0x1p+0)]; tensor var_14264 = mul(x = var_14262_1, y = const_433_promoted)[name = string("op_14264")]; int32 var_14266 = const()[name = string("op_14266"), val = int32(-1)]; bool var_14267_interleave_0 = const()[name = string("op_14267_interleave_0"), val = bool(false)]; tensor var_14267 = concat(axis = var_14266, interleave = var_14267_interleave_0, values = (var_14264, var_14262_0))[name = string("op_14267")]; tensor var_14268 = mul(x = var_14267, y = sin_1)[name = string("op_14268")]; tensor q_167 = add(x = var_14261, y = var_14268)[name = string("q_167")]; bool var_14292_transpose_x_0 = const()[name = string("op_14292_transpose_x_0"), val = bool(false)]; bool var_14292_transpose_y_0 = const()[name = string("op_14292_transpose_y_0"), val = bool(false)]; tensor var_14292_cast_fp16 = matmul(transpose_x = var_14292_transpose_x_0, transpose_y = var_14292_transpose_y_0, x = q_167, y = transpose_153_cast_fp16)[name = string("op_14292_cast_fp16")]; tensor attn_weights_135_cast_fp16 = add(x = var_14292_cast_fp16, y = causal_mask)[name = string("attn_weights_135_cast_fp16")]; int32 var_14302 = const()[name = string("op_14302"), val = int32(-1)]; tensor var_14304_cast_fp16 = softmax(axis = var_14302, x = attn_weights_135_cast_fp16)[name = string("op_14304_cast_fp16")]; bool var_14320_transpose_x_0 = const()[name = string("op_14320_transpose_x_0"), val = bool(false)]; bool var_14320_transpose_y_0 = const()[name = string("op_14320_transpose_y_0"), val = bool(false)]; tensor var_14320_cast_fp16 = matmul(transpose_x = var_14320_transpose_x_0, transpose_y = var_14320_transpose_y_0, x = var_14304_cast_fp16, y = V_expanded_27_cast_fp16)[name = string("op_14320_cast_fp16")]; tensor var_14330 = const()[name = string("op_14330"), val = tensor([0, 2, 1, 3])]; tensor var_14337 = const()[name = string("op_14337"), val = tensor([1, 1, -1])]; tensor var_14331 = transpose(perm = var_14330, x = var_14320_cast_fp16)[name = string("transpose_90")]; tensor attn_output_135 = reshape(shape = var_14337, x = var_14331)[name = string("attn_output_135")]; tensor var_14342 = const()[name = string("op_14342"), val = tensor([0, 2, 1])]; tensor squeeze_22_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2291642752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2293215680))))[name = string("squeeze_22_palettized")]; string var_14358_pad_type_0 = const()[name = string("op_14358_pad_type_0"), val = string("valid")]; int32 var_14358_groups_0 = const()[name = string("op_14358_groups_0"), val = int32(1)]; tensor var_14358_strides_0 = const()[name = string("op_14358_strides_0"), val = tensor([1])]; tensor var_14358_pad_0 = const()[name = string("op_14358_pad_0"), val = tensor([0, 0])]; tensor var_14358_dilations_0 = const()[name = string("op_14358_dilations_0"), val = tensor([1])]; tensor var_14343 = transpose(perm = var_14342, x = attn_output_135)[name = string("transpose_89")]; tensor var_14358 = conv(dilations = var_14358_dilations_0, groups = var_14358_groups_0, pad = var_14358_pad_0, pad_type = var_14358_pad_type_0, strides = var_14358_strides_0, weight = squeeze_22_palettized, x = var_14343)[name = string("op_14358")]; tensor var_14362 = const()[name = string("op_14362"), val = tensor([0, 2, 1])]; int32 var_14368 = const()[name = string("op_14368"), val = int32(-1)]; fp16 const_434_promoted_to_fp16 = const()[name = string("const_434_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_765 = transpose(perm = var_14362, x = var_14358)[name = string("transpose_88")]; tensor var_14374_cast_fp16 = mul(x = x_765, y = const_434_promoted_to_fp16)[name = string("op_14374_cast_fp16")]; bool input_687_interleave_0 = const()[name = string("input_687_interleave_0"), val = bool(false)]; tensor input_687_cast_fp16 = concat(axis = var_14368, interleave = input_687_interleave_0, values = (x_765, var_14374_cast_fp16))[name = string("input_687_cast_fp16")]; tensor normed_737_axes_0 = const()[name = string("normed_737_axes_0"), val = tensor([-1])]; fp16 var_14366_to_fp16 = const()[name = string("op_14366_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_737_cast_fp16 = layer_norm(axes = normed_737_axes_0, epsilon = var_14366_to_fp16, x = input_687_cast_fp16)[name = string("normed_737_cast_fp16")]; tensor var_14379_split_sizes_0 = const()[name = string("op_14379_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_14379_axis_0 = const()[name = string("op_14379_axis_0"), val = int32(-1)]; tensor var_14379_cast_fp16_0, tensor var_14379_cast_fp16_1 = split(axis = var_14379_axis_0, split_sizes = var_14379_split_sizes_0, x = normed_737_cast_fp16)[name = string("op_14379_cast_fp16")]; tensor const_435_to_fp16 = const()[name = string("const_435_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2293217280)))]; tensor var_14382_cast_fp16 = mul(x = var_14379_cast_fp16_0, y = const_435_to_fp16)[name = string("op_14382_cast_fp16")]; tensor x_769_cast_fp16 = add(x = x_757_cast_fp16, y = var_14382_cast_fp16)[name = string("x_769_cast_fp16")]; int32 var_14389 = const()[name = string("op_14389"), val = int32(-1)]; fp16 const_436_promoted_to_fp16 = const()[name = string("const_436_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_14395_cast_fp16 = mul(x = x_769_cast_fp16, y = const_436_promoted_to_fp16)[name = string("op_14395_cast_fp16")]; bool input_689_interleave_0 = const()[name = string("input_689_interleave_0"), val = bool(false)]; tensor input_689_cast_fp16 = concat(axis = var_14389, interleave = input_689_interleave_0, values = (x_769_cast_fp16, var_14395_cast_fp16))[name = string("input_689_cast_fp16")]; tensor normed_741_axes_0 = const()[name = string("normed_741_axes_0"), val = tensor([-1])]; fp16 var_14387_to_fp16 = const()[name = string("op_14387_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_741_cast_fp16 = layer_norm(axes = normed_741_axes_0, epsilon = var_14387_to_fp16, x = input_689_cast_fp16)[name = string("normed_741_cast_fp16")]; tensor var_14400_split_sizes_0 = const()[name = string("op_14400_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_14400_axis_0 = const()[name = string("op_14400_axis_0"), val = int32(-1)]; tensor var_14400_cast_fp16_0, tensor var_14400_cast_fp16_1 = split(axis = var_14400_axis_0, split_sizes = var_14400_split_sizes_0, x = normed_741_cast_fp16)[name = string("op_14400_cast_fp16")]; tensor const_437_to_fp16 = const()[name = string("const_437_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2293220416)))]; tensor var_14403_cast_fp16 = mul(x = var_14400_cast_fp16_0, y = const_437_to_fp16)[name = string("op_14403_cast_fp16")]; tensor var_14416 = const()[name = string("op_14416"), val = tensor([0, 2, 1])]; tensor input_691_axes_0 = const()[name = string("input_691_axes_0"), val = tensor([2])]; tensor var_14417 = transpose(perm = var_14416, x = var_14403_cast_fp16)[name = string("transpose_87")]; tensor input_691 = expand_dims(axes = input_691_axes_0, x = var_14417)[name = string("input_691")]; string gate_89_pad_type_0 = const()[name = string("gate_89_pad_type_0"), val = string("valid")]; tensor gate_89_strides_0 = const()[name = string("gate_89_strides_0"), val = tensor([1, 1])]; tensor gate_89_pad_0 = const()[name = string("gate_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_89_dilations_0 = const()[name = string("gate_89_dilations_0"), val = tensor([1, 1])]; int32 gate_89_groups_0 = const()[name = string("gate_89_groups_0"), val = int32(1)]; tensor gate_89 = conv(dilations = gate_89_dilations_0, groups = gate_89_groups_0, pad = gate_89_pad_0, pad_type = gate_89_pad_type_0, strides = gate_89_strides_0, weight = layers_22_mlp_gate_proj_weight_palettized, x = input_691)[name = string("gate_89")]; string up_45_pad_type_0 = const()[name = string("up_45_pad_type_0"), val = string("valid")]; tensor up_45_strides_0 = const()[name = string("up_45_strides_0"), val = tensor([1, 1])]; tensor up_45_pad_0 = const()[name = string("up_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_45_dilations_0 = const()[name = string("up_45_dilations_0"), val = tensor([1, 1])]; int32 up_45_groups_0 = const()[name = string("up_45_groups_0"), val = int32(1)]; tensor up_45 = conv(dilations = up_45_dilations_0, groups = up_45_groups_0, pad = up_45_pad_0, pad_type = up_45_pad_type_0, strides = up_45_strides_0, weight = layers_22_mlp_up_proj_weight_palettized, x = input_691)[name = string("up_45")]; string gate_91_mode_0 = const()[name = string("gate_91_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_91 = gelu(mode = gate_91_mode_0, x = gate_89)[name = string("gate_91")]; tensor input_693 = mul(x = gate_91, y = up_45)[name = string("input_693")]; string mlp_out_45_pad_type_0 = const()[name = string("mlp_out_45_pad_type_0"), val = string("valid")]; tensor mlp_out_45_strides_0 = const()[name = string("mlp_out_45_strides_0"), val = tensor([1, 1])]; tensor mlp_out_45_pad_0 = const()[name = string("mlp_out_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_45_dilations_0 = const()[name = string("mlp_out_45_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_45_groups_0 = const()[name = string("mlp_out_45_groups_0"), val = int32(1)]; tensor mlp_out_45 = conv(dilations = mlp_out_45_dilations_0, groups = mlp_out_45_groups_0, pad = mlp_out_45_pad_0, pad_type = mlp_out_45_pad_type_0, strides = mlp_out_45_strides_0, weight = layers_22_mlp_down_proj_weight_palettized, x = input_693)[name = string("mlp_out_45")]; tensor var_14457_axes_0 = const()[name = string("op_14457_axes_0"), val = tensor([2])]; tensor var_14457 = squeeze(axes = var_14457_axes_0, x = mlp_out_45)[name = string("op_14457")]; tensor var_14461 = const()[name = string("op_14461"), val = tensor([0, 2, 1])]; int32 var_14467 = const()[name = string("op_14467"), val = int32(-1)]; fp16 const_438_promoted_to_fp16 = const()[name = string("const_438_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_773 = transpose(perm = var_14461, x = var_14457)[name = string("transpose_86")]; tensor var_14473_cast_fp16 = mul(x = x_773, y = const_438_promoted_to_fp16)[name = string("op_14473_cast_fp16")]; bool input_695_interleave_0 = const()[name = string("input_695_interleave_0"), val = bool(false)]; tensor input_695_cast_fp16 = concat(axis = var_14467, interleave = input_695_interleave_0, values = (x_773, var_14473_cast_fp16))[name = string("input_695_cast_fp16")]; tensor normed_745_axes_0 = const()[name = string("normed_745_axes_0"), val = tensor([-1])]; fp16 var_14465_to_fp16 = const()[name = string("op_14465_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_745_cast_fp16 = layer_norm(axes = normed_745_axes_0, epsilon = var_14465_to_fp16, x = input_695_cast_fp16)[name = string("normed_745_cast_fp16")]; tensor var_14478_split_sizes_0 = const()[name = string("op_14478_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_14478_axis_0 = const()[name = string("op_14478_axis_0"), val = int32(-1)]; tensor var_14478_cast_fp16_0, tensor var_14478_cast_fp16_1 = split(axis = var_14478_axis_0, split_sizes = var_14478_split_sizes_0, x = normed_745_cast_fp16)[name = string("op_14478_cast_fp16")]; tensor const_439_to_fp16 = const()[name = string("const_439_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2293223552)))]; tensor var_14481_cast_fp16 = mul(x = var_14478_cast_fp16_0, y = const_439_to_fp16)[name = string("op_14481_cast_fp16")]; tensor hidden_states_277_cast_fp16 = add(x = x_769_cast_fp16, y = var_14481_cast_fp16)[name = string("hidden_states_277_cast_fp16")]; tensor per_layer_slice_45_begin_0 = const()[name = string("per_layer_slice_45_begin_0"), val = tensor([0, 0, 5632])]; tensor per_layer_slice_45_end_0 = const()[name = string("per_layer_slice_45_end_0"), val = tensor([1, 1, 5888])]; tensor per_layer_slice_45_end_mask_0 = const()[name = string("per_layer_slice_45_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_45 = slice_by_index(begin = per_layer_slice_45_begin_0, end = per_layer_slice_45_end_0, end_mask = per_layer_slice_45_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_45")]; tensor gated_89 = linear(bias = linear_1_bias_0, weight = layers_22_per_layer_input_gate_weight_palettized, x = hidden_states_277_cast_fp16)[name = string("linear_45")]; string gated_91_mode_0 = const()[name = string("gated_91_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_91 = gelu(mode = gated_91_mode_0, x = gated_89)[name = string("gated_91")]; tensor input_699 = mul(x = gated_91, y = per_layer_slice_45)[name = string("input_699")]; tensor x_777 = linear(bias = linear_2_bias_0, weight = layers_22_per_layer_projection_weight_palettized, x = input_699)[name = string("linear_46")]; int32 var_14518 = const()[name = string("op_14518"), val = int32(-1)]; fp16 const_440_promoted_to_fp16 = const()[name = string("const_440_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_14524_cast_fp16 = mul(x = x_777, y = const_440_promoted_to_fp16)[name = string("op_14524_cast_fp16")]; bool input_701_interleave_0 = const()[name = string("input_701_interleave_0"), val = bool(false)]; tensor input_701_cast_fp16 = concat(axis = var_14518, interleave = input_701_interleave_0, values = (x_777, var_14524_cast_fp16))[name = string("input_701_cast_fp16")]; tensor normed_749_axes_0 = const()[name = string("normed_749_axes_0"), val = tensor([-1])]; fp16 var_14516_to_fp16 = const()[name = string("op_14516_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_749_cast_fp16 = layer_norm(axes = normed_749_axes_0, epsilon = var_14516_to_fp16, x = input_701_cast_fp16)[name = string("normed_749_cast_fp16")]; tensor var_14529_split_sizes_0 = const()[name = string("op_14529_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_14529_axis_0 = const()[name = string("op_14529_axis_0"), val = int32(-1)]; tensor var_14529_cast_fp16_0, tensor var_14529_cast_fp16_1 = split(axis = var_14529_axis_0, split_sizes = var_14529_split_sizes_0, x = normed_749_cast_fp16)[name = string("op_14529_cast_fp16")]; tensor const_441_to_fp16 = const()[name = string("const_441_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2293226688)))]; tensor var_14532_cast_fp16 = mul(x = var_14529_cast_fp16_0, y = const_441_to_fp16)[name = string("op_14532_cast_fp16")]; tensor hidden_states_281_cast_fp16 = add(x = hidden_states_277_cast_fp16, y = var_14532_cast_fp16)[name = string("hidden_states_281_cast_fp16")]; tensor layers_22_layer_scalar_to_fp16 = const()[name = string("layers_22_layer_scalar_to_fp16"), val = tensor([0x1.44p-1])]; tensor x_781_cast_fp16 = mul(x = hidden_states_281_cast_fp16, y = layers_22_layer_scalar_to_fp16)[name = string("x_781_cast_fp16")]; int32 var_14540 = const()[name = string("op_14540"), val = int32(-1)]; fp16 const_442_promoted_to_fp16 = const()[name = string("const_442_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_14546_cast_fp16 = mul(x = x_781_cast_fp16, y = const_442_promoted_to_fp16)[name = string("op_14546_cast_fp16")]; bool input_703_interleave_0 = const()[name = string("input_703_interleave_0"), val = bool(false)]; tensor input_703_cast_fp16 = concat(axis = var_14540, interleave = input_703_interleave_0, values = (x_781_cast_fp16, var_14546_cast_fp16))[name = string("input_703_cast_fp16")]; tensor normed_753_axes_0 = const()[name = string("normed_753_axes_0"), val = tensor([-1])]; fp16 var_14538_to_fp16 = const()[name = string("op_14538_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_753_cast_fp16 = layer_norm(axes = normed_753_axes_0, epsilon = var_14538_to_fp16, x = input_703_cast_fp16)[name = string("normed_753_cast_fp16")]; tensor var_14551_split_sizes_0 = const()[name = string("op_14551_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_14551_axis_0 = const()[name = string("op_14551_axis_0"), val = int32(-1)]; tensor var_14551_cast_fp16_0, tensor var_14551_cast_fp16_1 = split(axis = var_14551_axis_0, split_sizes = var_14551_split_sizes_0, x = normed_753_cast_fp16)[name = string("op_14551_cast_fp16")]; tensor const_443_to_fp16 = const()[name = string("const_443_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2293229824)))]; tensor var_14554_cast_fp16 = mul(x = var_14551_cast_fp16_0, y = const_443_to_fp16)[name = string("op_14554_cast_fp16")]; tensor var_14562 = const()[name = string("op_14562"), val = tensor([0, 2, 1])]; tensor var_14565_axes_0 = const()[name = string("op_14565_axes_0"), val = tensor([2])]; tensor var_14563_cast_fp16 = transpose(perm = var_14562, x = var_14554_cast_fp16)[name = string("transpose_85")]; tensor var_14565_cast_fp16 = expand_dims(axes = var_14565_axes_0, x = var_14563_cast_fp16)[name = string("op_14565_cast_fp16")]; string var_14581_pad_type_0 = const()[name = string("op_14581_pad_type_0"), val = string("valid")]; tensor var_14581_strides_0 = const()[name = string("op_14581_strides_0"), val = tensor([1, 1])]; tensor var_14581_pad_0 = const()[name = string("op_14581_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_14581_dilations_0 = const()[name = string("op_14581_dilations_0"), val = tensor([1, 1])]; int32 var_14581_groups_0 = const()[name = string("op_14581_groups_0"), val = int32(1)]; tensor var_14581 = conv(dilations = var_14581_dilations_0, groups = var_14581_groups_0, pad = var_14581_pad_0, pad_type = var_14581_pad_type_0, strides = var_14581_strides_0, weight = layers_23_self_attn_q_proj_weight_palettized, x = var_14565_cast_fp16)[name = string("op_14581")]; tensor var_14586 = const()[name = string("op_14586"), val = tensor([1, 8, 256, 1])]; tensor var_14587 = reshape(shape = var_14586, x = var_14581)[name = string("op_14587")]; tensor var_14592 = const()[name = string("op_14592"), val = tensor([0, 1, 3, 2])]; tensor var_14602 = const()[name = string("op_14602"), val = tensor([1, 8, 256])]; tensor var_14593 = transpose(perm = var_14592, x = var_14587)[name = string("transpose_84")]; tensor x_785 = reshape(shape = var_14602, x = var_14593)[name = string("x_785")]; int32 var_14608 = const()[name = string("op_14608"), val = int32(-1)]; fp16 const_444_promoted_to_fp16 = const()[name = string("const_444_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_14614_cast_fp16 = mul(x = x_785, y = const_444_promoted_to_fp16)[name = string("op_14614_cast_fp16")]; bool input_707_interleave_0 = const()[name = string("input_707_interleave_0"), val = bool(false)]; tensor input_707_cast_fp16 = concat(axis = var_14608, interleave = input_707_interleave_0, values = (x_785, var_14614_cast_fp16))[name = string("input_707_cast_fp16")]; tensor normed_757_axes_0 = const()[name = string("normed_757_axes_0"), val = tensor([-1])]; fp16 var_14606_to_fp16 = const()[name = string("op_14606_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_757_cast_fp16 = layer_norm(axes = normed_757_axes_0, epsilon = var_14606_to_fp16, x = input_707_cast_fp16)[name = string("normed_757_cast_fp16")]; tensor var_14619_split_sizes_0 = const()[name = string("op_14619_split_sizes_0"), val = tensor([256, 256])]; int32 var_14619_axis_0 = const()[name = string("op_14619_axis_0"), val = int32(-1)]; tensor var_14619_cast_fp16_0, tensor var_14619_cast_fp16_1 = split(axis = var_14619_axis_0, split_sizes = var_14619_split_sizes_0, x = normed_757_cast_fp16)[name = string("op_14619_cast_fp16")]; tensor var_14622_cast_fp16 = mul(x = var_14619_cast_fp16_0, y = const_307_to_fp16)[name = string("op_14622_cast_fp16")]; tensor var_14628 = const()[name = string("op_14628"), val = tensor([1, 8, 1, 256])]; tensor q_171 = reshape(shape = var_14628, x = var_14622_cast_fp16)[name = string("q_171")]; tensor var_14630 = mul(x = q_171, y = cos_1)[name = string("op_14630")]; tensor var_14631_split_sizes_0 = const()[name = string("op_14631_split_sizes_0"), val = tensor([128, 128])]; int32 var_14631_axis_0 = const()[name = string("op_14631_axis_0"), val = int32(-1)]; tensor var_14631_0, tensor var_14631_1 = split(axis = var_14631_axis_0, split_sizes = var_14631_split_sizes_0, x = q_171)[name = string("op_14631")]; fp16 const_446_promoted = const()[name = string("const_446_promoted"), val = fp16(-0x1p+0)]; tensor var_14633 = mul(x = var_14631_1, y = const_446_promoted)[name = string("op_14633")]; int32 var_14635 = const()[name = string("op_14635"), val = int32(-1)]; bool var_14636_interleave_0 = const()[name = string("op_14636_interleave_0"), val = bool(false)]; tensor var_14636 = concat(axis = var_14635, interleave = var_14636_interleave_0, values = (var_14633, var_14631_0))[name = string("op_14636")]; tensor var_14637 = mul(x = var_14636, y = sin_1)[name = string("op_14637")]; tensor q_173 = add(x = var_14630, y = var_14637)[name = string("q_173")]; bool var_14661_transpose_x_0 = const()[name = string("op_14661_transpose_x_0"), val = bool(false)]; bool var_14661_transpose_y_0 = const()[name = string("op_14661_transpose_y_0"), val = bool(false)]; tensor var_14661_cast_fp16 = matmul(transpose_x = var_14661_transpose_x_0, transpose_y = var_14661_transpose_y_0, x = q_173, y = transpose_153_cast_fp16)[name = string("op_14661_cast_fp16")]; tensor attn_weights_141_cast_fp16 = add(x = var_14661_cast_fp16, y = causal_mask)[name = string("attn_weights_141_cast_fp16")]; int32 var_14671 = const()[name = string("op_14671"), val = int32(-1)]; tensor var_14673_cast_fp16 = softmax(axis = var_14671, x = attn_weights_141_cast_fp16)[name = string("op_14673_cast_fp16")]; bool var_14689_transpose_x_0 = const()[name = string("op_14689_transpose_x_0"), val = bool(false)]; bool var_14689_transpose_y_0 = const()[name = string("op_14689_transpose_y_0"), val = bool(false)]; tensor var_14689_cast_fp16 = matmul(transpose_x = var_14689_transpose_x_0, transpose_y = var_14689_transpose_y_0, x = var_14673_cast_fp16, y = V_expanded_27_cast_fp16)[name = string("op_14689_cast_fp16")]; tensor var_14699 = const()[name = string("op_14699"), val = tensor([0, 2, 1, 3])]; tensor var_14706 = const()[name = string("op_14706"), val = tensor([1, 1, -1])]; tensor var_14700 = transpose(perm = var_14699, x = var_14689_cast_fp16)[name = string("transpose_83")]; tensor attn_output_141 = reshape(shape = var_14706, x = var_14700)[name = string("attn_output_141")]; tensor var_14711 = const()[name = string("op_14711"), val = tensor([0, 2, 1])]; tensor squeeze_23_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2293232960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2294805888))))[name = string("squeeze_23_palettized")]; string var_14727_pad_type_0 = const()[name = string("op_14727_pad_type_0"), val = string("valid")]; int32 var_14727_groups_0 = const()[name = string("op_14727_groups_0"), val = int32(1)]; tensor var_14727_strides_0 = const()[name = string("op_14727_strides_0"), val = tensor([1])]; tensor var_14727_pad_0 = const()[name = string("op_14727_pad_0"), val = tensor([0, 0])]; tensor var_14727_dilations_0 = const()[name = string("op_14727_dilations_0"), val = tensor([1])]; tensor var_14712 = transpose(perm = var_14711, x = attn_output_141)[name = string("transpose_82")]; tensor var_14727 = conv(dilations = var_14727_dilations_0, groups = var_14727_groups_0, pad = var_14727_pad_0, pad_type = var_14727_pad_type_0, strides = var_14727_strides_0, weight = squeeze_23_palettized, x = var_14712)[name = string("op_14727")]; tensor var_14731 = const()[name = string("op_14731"), val = tensor([0, 2, 1])]; int32 var_14737 = const()[name = string("op_14737"), val = int32(-1)]; fp16 const_447_promoted_to_fp16 = const()[name = string("const_447_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_789 = transpose(perm = var_14731, x = var_14727)[name = string("transpose_81")]; tensor var_14743_cast_fp16 = mul(x = x_789, y = const_447_promoted_to_fp16)[name = string("op_14743_cast_fp16")]; bool input_711_interleave_0 = const()[name = string("input_711_interleave_0"), val = bool(false)]; tensor input_711_cast_fp16 = concat(axis = var_14737, interleave = input_711_interleave_0, values = (x_789, var_14743_cast_fp16))[name = string("input_711_cast_fp16")]; tensor normed_761_axes_0 = const()[name = string("normed_761_axes_0"), val = tensor([-1])]; fp16 var_14735_to_fp16 = const()[name = string("op_14735_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_761_cast_fp16 = layer_norm(axes = normed_761_axes_0, epsilon = var_14735_to_fp16, x = input_711_cast_fp16)[name = string("normed_761_cast_fp16")]; tensor var_14748_split_sizes_0 = const()[name = string("op_14748_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_14748_axis_0 = const()[name = string("op_14748_axis_0"), val = int32(-1)]; tensor var_14748_cast_fp16_0, tensor var_14748_cast_fp16_1 = split(axis = var_14748_axis_0, split_sizes = var_14748_split_sizes_0, x = normed_761_cast_fp16)[name = string("op_14748_cast_fp16")]; tensor const_448_to_fp16 = const()[name = string("const_448_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2294807488)))]; tensor var_14751_cast_fp16 = mul(x = var_14748_cast_fp16_0, y = const_448_to_fp16)[name = string("op_14751_cast_fp16")]; tensor x_793_cast_fp16 = add(x = x_781_cast_fp16, y = var_14751_cast_fp16)[name = string("x_793_cast_fp16")]; int32 var_14758 = const()[name = string("op_14758"), val = int32(-1)]; fp16 const_449_promoted_to_fp16 = const()[name = string("const_449_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_14764_cast_fp16 = mul(x = x_793_cast_fp16, y = const_449_promoted_to_fp16)[name = string("op_14764_cast_fp16")]; bool input_713_interleave_0 = const()[name = string("input_713_interleave_0"), val = bool(false)]; tensor input_713_cast_fp16 = concat(axis = var_14758, interleave = input_713_interleave_0, values = (x_793_cast_fp16, var_14764_cast_fp16))[name = string("input_713_cast_fp16")]; tensor normed_765_axes_0 = const()[name = string("normed_765_axes_0"), val = tensor([-1])]; fp16 var_14756_to_fp16 = const()[name = string("op_14756_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_765_cast_fp16 = layer_norm(axes = normed_765_axes_0, epsilon = var_14756_to_fp16, x = input_713_cast_fp16)[name = string("normed_765_cast_fp16")]; tensor var_14769_split_sizes_0 = const()[name = string("op_14769_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_14769_axis_0 = const()[name = string("op_14769_axis_0"), val = int32(-1)]; tensor var_14769_cast_fp16_0, tensor var_14769_cast_fp16_1 = split(axis = var_14769_axis_0, split_sizes = var_14769_split_sizes_0, x = normed_765_cast_fp16)[name = string("op_14769_cast_fp16")]; tensor const_450_to_fp16 = const()[name = string("const_450_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2294810624)))]; tensor var_14772_cast_fp16 = mul(x = var_14769_cast_fp16_0, y = const_450_to_fp16)[name = string("op_14772_cast_fp16")]; tensor var_14785 = const()[name = string("op_14785"), val = tensor([0, 2, 1])]; tensor input_715_axes_0 = const()[name = string("input_715_axes_0"), val = tensor([2])]; tensor var_14786 = transpose(perm = var_14785, x = var_14772_cast_fp16)[name = string("transpose_80")]; tensor input_715 = expand_dims(axes = input_715_axes_0, x = var_14786)[name = string("input_715")]; string gate_93_pad_type_0 = const()[name = string("gate_93_pad_type_0"), val = string("valid")]; tensor gate_93_strides_0 = const()[name = string("gate_93_strides_0"), val = tensor([1, 1])]; tensor gate_93_pad_0 = const()[name = string("gate_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_93_dilations_0 = const()[name = string("gate_93_dilations_0"), val = tensor([1, 1])]; int32 gate_93_groups_0 = const()[name = string("gate_93_groups_0"), val = int32(1)]; tensor gate_93 = conv(dilations = gate_93_dilations_0, groups = gate_93_groups_0, pad = gate_93_pad_0, pad_type = gate_93_pad_type_0, strides = gate_93_strides_0, weight = layers_23_mlp_gate_proj_weight_palettized, x = input_715)[name = string("gate_93")]; string up_47_pad_type_0 = const()[name = string("up_47_pad_type_0"), val = string("valid")]; tensor up_47_strides_0 = const()[name = string("up_47_strides_0"), val = tensor([1, 1])]; tensor up_47_pad_0 = const()[name = string("up_47_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_47_dilations_0 = const()[name = string("up_47_dilations_0"), val = tensor([1, 1])]; int32 up_47_groups_0 = const()[name = string("up_47_groups_0"), val = int32(1)]; tensor up_47 = conv(dilations = up_47_dilations_0, groups = up_47_groups_0, pad = up_47_pad_0, pad_type = up_47_pad_type_0, strides = up_47_strides_0, weight = layers_23_mlp_up_proj_weight_palettized, x = input_715)[name = string("up_47")]; string gate_95_mode_0 = const()[name = string("gate_95_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_95 = gelu(mode = gate_95_mode_0, x = gate_93)[name = string("gate_95")]; tensor input_717 = mul(x = gate_95, y = up_47)[name = string("input_717")]; string mlp_out_47_pad_type_0 = const()[name = string("mlp_out_47_pad_type_0"), val = string("valid")]; tensor mlp_out_47_strides_0 = const()[name = string("mlp_out_47_strides_0"), val = tensor([1, 1])]; tensor mlp_out_47_pad_0 = const()[name = string("mlp_out_47_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_47_dilations_0 = const()[name = string("mlp_out_47_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_47_groups_0 = const()[name = string("mlp_out_47_groups_0"), val = int32(1)]; tensor mlp_out_47 = conv(dilations = mlp_out_47_dilations_0, groups = mlp_out_47_groups_0, pad = mlp_out_47_pad_0, pad_type = mlp_out_47_pad_type_0, strides = mlp_out_47_strides_0, weight = layers_23_mlp_down_proj_weight_palettized, x = input_717)[name = string("mlp_out_47")]; tensor var_14826_axes_0 = const()[name = string("op_14826_axes_0"), val = tensor([2])]; tensor var_14826 = squeeze(axes = var_14826_axes_0, x = mlp_out_47)[name = string("op_14826")]; tensor var_14830 = const()[name = string("op_14830"), val = tensor([0, 2, 1])]; int32 var_14836 = const()[name = string("op_14836"), val = int32(-1)]; fp16 const_451_promoted_to_fp16 = const()[name = string("const_451_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_797 = transpose(perm = var_14830, x = var_14826)[name = string("transpose_79")]; tensor var_14842_cast_fp16 = mul(x = x_797, y = const_451_promoted_to_fp16)[name = string("op_14842_cast_fp16")]; bool input_719_interleave_0 = const()[name = string("input_719_interleave_0"), val = bool(false)]; tensor input_719_cast_fp16 = concat(axis = var_14836, interleave = input_719_interleave_0, values = (x_797, var_14842_cast_fp16))[name = string("input_719_cast_fp16")]; tensor normed_769_axes_0 = const()[name = string("normed_769_axes_0"), val = tensor([-1])]; fp16 var_14834_to_fp16 = const()[name = string("op_14834_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_769_cast_fp16 = layer_norm(axes = normed_769_axes_0, epsilon = var_14834_to_fp16, x = input_719_cast_fp16)[name = string("normed_769_cast_fp16")]; tensor var_14847_split_sizes_0 = const()[name = string("op_14847_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_14847_axis_0 = const()[name = string("op_14847_axis_0"), val = int32(-1)]; tensor var_14847_cast_fp16_0, tensor var_14847_cast_fp16_1 = split(axis = var_14847_axis_0, split_sizes = var_14847_split_sizes_0, x = normed_769_cast_fp16)[name = string("op_14847_cast_fp16")]; tensor const_452_to_fp16 = const()[name = string("const_452_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2294813760)))]; tensor var_14850_cast_fp16 = mul(x = var_14847_cast_fp16_0, y = const_452_to_fp16)[name = string("op_14850_cast_fp16")]; tensor hidden_states_289_cast_fp16 = add(x = x_793_cast_fp16, y = var_14850_cast_fp16)[name = string("hidden_states_289_cast_fp16")]; tensor per_layer_slice_47_begin_0 = const()[name = string("per_layer_slice_47_begin_0"), val = tensor([0, 0, 5888])]; tensor per_layer_slice_47_end_0 = const()[name = string("per_layer_slice_47_end_0"), val = tensor([1, 1, 6144])]; tensor per_layer_slice_47_end_mask_0 = const()[name = string("per_layer_slice_47_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_47 = slice_by_index(begin = per_layer_slice_47_begin_0, end = per_layer_slice_47_end_0, end_mask = per_layer_slice_47_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_47")]; tensor gated_93 = linear(bias = linear_1_bias_0, weight = layers_23_per_layer_input_gate_weight_palettized, x = hidden_states_289_cast_fp16)[name = string("linear_47")]; string gated_95_mode_0 = const()[name = string("gated_95_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_95 = gelu(mode = gated_95_mode_0, x = gated_93)[name = string("gated_95")]; tensor input_723 = mul(x = gated_95, y = per_layer_slice_47)[name = string("input_723")]; tensor x_801 = linear(bias = linear_2_bias_0, weight = layers_23_per_layer_projection_weight_palettized, x = input_723)[name = string("linear_48")]; int32 var_14887 = const()[name = string("op_14887"), val = int32(-1)]; fp16 const_453_promoted_to_fp16 = const()[name = string("const_453_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_14893_cast_fp16 = mul(x = x_801, y = const_453_promoted_to_fp16)[name = string("op_14893_cast_fp16")]; bool input_725_interleave_0 = const()[name = string("input_725_interleave_0"), val = bool(false)]; tensor input_725_cast_fp16 = concat(axis = var_14887, interleave = input_725_interleave_0, values = (x_801, var_14893_cast_fp16))[name = string("input_725_cast_fp16")]; tensor normed_773_axes_0 = const()[name = string("normed_773_axes_0"), val = tensor([-1])]; fp16 var_14885_to_fp16 = const()[name = string("op_14885_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_773_cast_fp16 = layer_norm(axes = normed_773_axes_0, epsilon = var_14885_to_fp16, x = input_725_cast_fp16)[name = string("normed_773_cast_fp16")]; tensor var_14898_split_sizes_0 = const()[name = string("op_14898_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_14898_axis_0 = const()[name = string("op_14898_axis_0"), val = int32(-1)]; tensor var_14898_cast_fp16_0, tensor var_14898_cast_fp16_1 = split(axis = var_14898_axis_0, split_sizes = var_14898_split_sizes_0, x = normed_773_cast_fp16)[name = string("op_14898_cast_fp16")]; tensor const_454_to_fp16 = const()[name = string("const_454_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2294816896)))]; tensor var_14901_cast_fp16 = mul(x = var_14898_cast_fp16_0, y = const_454_to_fp16)[name = string("op_14901_cast_fp16")]; tensor hidden_states_293_cast_fp16 = add(x = hidden_states_289_cast_fp16, y = var_14901_cast_fp16)[name = string("hidden_states_293_cast_fp16")]; tensor layers_23_layer_scalar_to_fp16 = const()[name = string("layers_23_layer_scalar_to_fp16"), val = tensor([0x1.bap-2])]; tensor x_805_cast_fp16 = mul(x = hidden_states_293_cast_fp16, y = layers_23_layer_scalar_to_fp16)[name = string("x_805_cast_fp16")]; int32 var_14909 = const()[name = string("op_14909"), val = int32(-1)]; fp16 const_455_promoted_to_fp16 = const()[name = string("const_455_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_14915_cast_fp16 = mul(x = x_805_cast_fp16, y = const_455_promoted_to_fp16)[name = string("op_14915_cast_fp16")]; bool input_727_interleave_0 = const()[name = string("input_727_interleave_0"), val = bool(false)]; tensor input_727_cast_fp16 = concat(axis = var_14909, interleave = input_727_interleave_0, values = (x_805_cast_fp16, var_14915_cast_fp16))[name = string("input_727_cast_fp16")]; tensor normed_777_axes_0 = const()[name = string("normed_777_axes_0"), val = tensor([-1])]; fp16 var_14907_to_fp16 = const()[name = string("op_14907_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_777_cast_fp16 = layer_norm(axes = normed_777_axes_0, epsilon = var_14907_to_fp16, x = input_727_cast_fp16)[name = string("normed_777_cast_fp16")]; tensor var_14920_split_sizes_0 = const()[name = string("op_14920_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_14920_axis_0 = const()[name = string("op_14920_axis_0"), val = int32(-1)]; tensor var_14920_cast_fp16_0, tensor var_14920_cast_fp16_1 = split(axis = var_14920_axis_0, split_sizes = var_14920_split_sizes_0, x = normed_777_cast_fp16)[name = string("op_14920_cast_fp16")]; tensor const_456_to_fp16 = const()[name = string("const_456_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2294820032)))]; tensor var_14923_cast_fp16 = mul(x = var_14920_cast_fp16_0, y = const_456_to_fp16)[name = string("op_14923_cast_fp16")]; tensor var_14931 = const()[name = string("op_14931"), val = tensor([0, 2, 1])]; tensor var_14934_axes_0 = const()[name = string("op_14934_axes_0"), val = tensor([2])]; tensor var_14932_cast_fp16 = transpose(perm = var_14931, x = var_14923_cast_fp16)[name = string("transpose_78")]; tensor var_14934_cast_fp16 = expand_dims(axes = var_14934_axes_0, x = var_14932_cast_fp16)[name = string("op_14934_cast_fp16")]; string var_14950_pad_type_0 = const()[name = string("op_14950_pad_type_0"), val = string("valid")]; tensor var_14950_strides_0 = const()[name = string("op_14950_strides_0"), val = tensor([1, 1])]; tensor var_14950_pad_0 = const()[name = string("op_14950_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_14950_dilations_0 = const()[name = string("op_14950_dilations_0"), val = tensor([1, 1])]; int32 var_14950_groups_0 = const()[name = string("op_14950_groups_0"), val = int32(1)]; tensor var_14950 = conv(dilations = var_14950_dilations_0, groups = var_14950_groups_0, pad = var_14950_pad_0, pad_type = var_14950_pad_type_0, strides = var_14950_strides_0, weight = layers_24_self_attn_q_proj_weight_palettized, x = var_14934_cast_fp16)[name = string("op_14950")]; tensor var_14955 = const()[name = string("op_14955"), val = tensor([1, 8, 512, 1])]; tensor var_14956 = reshape(shape = var_14955, x = var_14950)[name = string("op_14956")]; tensor var_14961 = const()[name = string("op_14961"), val = tensor([0, 1, 3, 2])]; tensor var_14971 = const()[name = string("op_14971"), val = tensor([1, 8, 512])]; tensor var_14962 = transpose(perm = var_14961, x = var_14956)[name = string("transpose_77")]; tensor x_809 = reshape(shape = var_14971, x = var_14962)[name = string("x_809")]; int32 var_14977 = const()[name = string("op_14977"), val = int32(-1)]; fp16 const_457_promoted_to_fp16 = const()[name = string("const_457_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_14983_cast_fp16 = mul(x = x_809, y = const_457_promoted_to_fp16)[name = string("op_14983_cast_fp16")]; bool input_731_interleave_0 = const()[name = string("input_731_interleave_0"), val = bool(false)]; tensor input_731_cast_fp16 = concat(axis = var_14977, interleave = input_731_interleave_0, values = (x_809, var_14983_cast_fp16))[name = string("input_731_cast_fp16")]; tensor normed_781_axes_0 = const()[name = string("normed_781_axes_0"), val = tensor([-1])]; fp16 var_14975_to_fp16 = const()[name = string("op_14975_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_781_cast_fp16 = layer_norm(axes = normed_781_axes_0, epsilon = var_14975_to_fp16, x = input_731_cast_fp16)[name = string("normed_781_cast_fp16")]; tensor var_14988_split_sizes_0 = const()[name = string("op_14988_split_sizes_0"), val = tensor([512, 512])]; int32 var_14988_axis_0 = const()[name = string("op_14988_axis_0"), val = int32(-1)]; tensor var_14988_cast_fp16_0, tensor var_14988_cast_fp16_1 = split(axis = var_14988_axis_0, split_sizes = var_14988_split_sizes_0, x = normed_781_cast_fp16)[name = string("op_14988_cast_fp16")]; tensor var_14991_cast_fp16 = mul(x = var_14988_cast_fp16_0, y = const_325_to_fp16)[name = string("op_14991_cast_fp16")]; tensor var_14997 = const()[name = string("op_14997"), val = tensor([1, 8, 1, 512])]; tensor q_177 = reshape(shape = var_14997, x = var_14991_cast_fp16)[name = string("q_177")]; tensor var_14999 = mul(x = q_177, y = cos)[name = string("op_14999")]; tensor var_15000_split_sizes_0 = const()[name = string("op_15000_split_sizes_0"), val = tensor([256, 256])]; int32 var_15000_axis_0 = const()[name = string("op_15000_axis_0"), val = int32(-1)]; tensor var_15000_0, tensor var_15000_1 = split(axis = var_15000_axis_0, split_sizes = var_15000_split_sizes_0, x = q_177)[name = string("op_15000")]; fp16 const_459_promoted = const()[name = string("const_459_promoted"), val = fp16(-0x1p+0)]; tensor var_15002 = mul(x = var_15000_1, y = const_459_promoted)[name = string("op_15002")]; int32 var_15004 = const()[name = string("op_15004"), val = int32(-1)]; bool var_15005_interleave_0 = const()[name = string("op_15005_interleave_0"), val = bool(false)]; tensor var_15005 = concat(axis = var_15004, interleave = var_15005_interleave_0, values = (var_15002, var_15000_0))[name = string("op_15005")]; tensor var_15006 = mul(x = var_15005, y = sin)[name = string("op_15006")]; tensor q_179 = add(x = var_14999, y = var_15006)[name = string("q_179")]; bool var_15030_transpose_x_0 = const()[name = string("op_15030_transpose_x_0"), val = bool(false)]; bool var_15030_transpose_y_0 = const()[name = string("op_15030_transpose_y_0"), val = bool(false)]; tensor var_15030_cast_fp16 = matmul(transpose_x = var_15030_transpose_x_0, transpose_y = var_15030_transpose_y_0, x = q_179, y = transpose_154_cast_fp16)[name = string("op_15030_cast_fp16")]; tensor attn_weights_147_cast_fp16 = add(x = var_15030_cast_fp16, y = causal_mask)[name = string("attn_weights_147_cast_fp16")]; int32 var_15040 = const()[name = string("op_15040"), val = int32(-1)]; tensor var_15042_cast_fp16 = softmax(axis = var_15040, x = attn_weights_147_cast_fp16)[name = string("op_15042_cast_fp16")]; bool var_15058_transpose_x_0 = const()[name = string("op_15058_transpose_x_0"), val = bool(false)]; bool var_15058_transpose_y_0 = const()[name = string("op_15058_transpose_y_0"), val = bool(false)]; tensor var_15058_cast_fp16 = matmul(transpose_x = var_15058_transpose_x_0, transpose_y = var_15058_transpose_y_0, x = var_15042_cast_fp16, y = V_expanded_29_cast_fp16)[name = string("op_15058_cast_fp16")]; tensor var_15068 = const()[name = string("op_15068"), val = tensor([0, 2, 1, 3])]; tensor var_15075 = const()[name = string("op_15075"), val = tensor([1, 1, -1])]; tensor var_15069 = transpose(perm = var_15068, x = var_15058_cast_fp16)[name = string("transpose_76")]; tensor attn_output_147 = reshape(shape = var_15075, x = var_15069)[name = string("attn_output_147")]; tensor var_15080 = const()[name = string("op_15080"), val = tensor([0, 2, 1])]; tensor squeeze_24_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2294823168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2297968960))))[name = string("squeeze_24_palettized")]; string var_15096_pad_type_0 = const()[name = string("op_15096_pad_type_0"), val = string("valid")]; int32 var_15096_groups_0 = const()[name = string("op_15096_groups_0"), val = int32(1)]; tensor var_15096_strides_0 = const()[name = string("op_15096_strides_0"), val = tensor([1])]; tensor var_15096_pad_0 = const()[name = string("op_15096_pad_0"), val = tensor([0, 0])]; tensor var_15096_dilations_0 = const()[name = string("op_15096_dilations_0"), val = tensor([1])]; tensor var_15081 = transpose(perm = var_15080, x = attn_output_147)[name = string("transpose_75")]; tensor var_15096 = conv(dilations = var_15096_dilations_0, groups = var_15096_groups_0, pad = var_15096_pad_0, pad_type = var_15096_pad_type_0, strides = var_15096_strides_0, weight = squeeze_24_palettized, x = var_15081)[name = string("op_15096")]; tensor var_15100 = const()[name = string("op_15100"), val = tensor([0, 2, 1])]; int32 var_15106 = const()[name = string("op_15106"), val = int32(-1)]; fp16 const_460_promoted_to_fp16 = const()[name = string("const_460_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_813 = transpose(perm = var_15100, x = var_15096)[name = string("transpose_74")]; tensor var_15112_cast_fp16 = mul(x = x_813, y = const_460_promoted_to_fp16)[name = string("op_15112_cast_fp16")]; bool input_735_interleave_0 = const()[name = string("input_735_interleave_0"), val = bool(false)]; tensor input_735_cast_fp16 = concat(axis = var_15106, interleave = input_735_interleave_0, values = (x_813, var_15112_cast_fp16))[name = string("input_735_cast_fp16")]; tensor normed_785_axes_0 = const()[name = string("normed_785_axes_0"), val = tensor([-1])]; fp16 var_15104_to_fp16 = const()[name = string("op_15104_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_785_cast_fp16 = layer_norm(axes = normed_785_axes_0, epsilon = var_15104_to_fp16, x = input_735_cast_fp16)[name = string("normed_785_cast_fp16")]; tensor var_15117_split_sizes_0 = const()[name = string("op_15117_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_15117_axis_0 = const()[name = string("op_15117_axis_0"), val = int32(-1)]; tensor var_15117_cast_fp16_0, tensor var_15117_cast_fp16_1 = split(axis = var_15117_axis_0, split_sizes = var_15117_split_sizes_0, x = normed_785_cast_fp16)[name = string("op_15117_cast_fp16")]; tensor const_461_to_fp16 = const()[name = string("const_461_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2297970560)))]; tensor var_15120_cast_fp16 = mul(x = var_15117_cast_fp16_0, y = const_461_to_fp16)[name = string("op_15120_cast_fp16")]; tensor x_817_cast_fp16 = add(x = x_805_cast_fp16, y = var_15120_cast_fp16)[name = string("x_817_cast_fp16")]; int32 var_15127 = const()[name = string("op_15127"), val = int32(-1)]; fp16 const_462_promoted_to_fp16 = const()[name = string("const_462_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_15133_cast_fp16 = mul(x = x_817_cast_fp16, y = const_462_promoted_to_fp16)[name = string("op_15133_cast_fp16")]; bool input_737_interleave_0 = const()[name = string("input_737_interleave_0"), val = bool(false)]; tensor input_737_cast_fp16 = concat(axis = var_15127, interleave = input_737_interleave_0, values = (x_817_cast_fp16, var_15133_cast_fp16))[name = string("input_737_cast_fp16")]; tensor normed_789_axes_0 = const()[name = string("normed_789_axes_0"), val = tensor([-1])]; fp16 var_15125_to_fp16 = const()[name = string("op_15125_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_789_cast_fp16 = layer_norm(axes = normed_789_axes_0, epsilon = var_15125_to_fp16, x = input_737_cast_fp16)[name = string("normed_789_cast_fp16")]; tensor var_15138_split_sizes_0 = const()[name = string("op_15138_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_15138_axis_0 = const()[name = string("op_15138_axis_0"), val = int32(-1)]; tensor var_15138_cast_fp16_0, tensor var_15138_cast_fp16_1 = split(axis = var_15138_axis_0, split_sizes = var_15138_split_sizes_0, x = normed_789_cast_fp16)[name = string("op_15138_cast_fp16")]; tensor const_463_to_fp16 = const()[name = string("const_463_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2297973696)))]; tensor var_15141_cast_fp16 = mul(x = var_15138_cast_fp16_0, y = const_463_to_fp16)[name = string("op_15141_cast_fp16")]; tensor var_15154 = const()[name = string("op_15154"), val = tensor([0, 2, 1])]; tensor input_739_axes_0 = const()[name = string("input_739_axes_0"), val = tensor([2])]; tensor var_15155 = transpose(perm = var_15154, x = var_15141_cast_fp16)[name = string("transpose_73")]; tensor input_739 = expand_dims(axes = input_739_axes_0, x = var_15155)[name = string("input_739")]; string gate_97_pad_type_0 = const()[name = string("gate_97_pad_type_0"), val = string("valid")]; tensor gate_97_strides_0 = const()[name = string("gate_97_strides_0"), val = tensor([1, 1])]; tensor gate_97_pad_0 = const()[name = string("gate_97_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_97_dilations_0 = const()[name = string("gate_97_dilations_0"), val = tensor([1, 1])]; int32 gate_97_groups_0 = const()[name = string("gate_97_groups_0"), val = int32(1)]; tensor gate_97 = conv(dilations = gate_97_dilations_0, groups = gate_97_groups_0, pad = gate_97_pad_0, pad_type = gate_97_pad_type_0, strides = gate_97_strides_0, weight = layers_24_mlp_gate_proj_weight_palettized, x = input_739)[name = string("gate_97")]; string up_49_pad_type_0 = const()[name = string("up_49_pad_type_0"), val = string("valid")]; tensor up_49_strides_0 = const()[name = string("up_49_strides_0"), val = tensor([1, 1])]; tensor up_49_pad_0 = const()[name = string("up_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_49_dilations_0 = const()[name = string("up_49_dilations_0"), val = tensor([1, 1])]; int32 up_49_groups_0 = const()[name = string("up_49_groups_0"), val = int32(1)]; tensor up_49 = conv(dilations = up_49_dilations_0, groups = up_49_groups_0, pad = up_49_pad_0, pad_type = up_49_pad_type_0, strides = up_49_strides_0, weight = layers_24_mlp_up_proj_weight_palettized, x = input_739)[name = string("up_49")]; string gate_99_mode_0 = const()[name = string("gate_99_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_99 = gelu(mode = gate_99_mode_0, x = gate_97)[name = string("gate_99")]; tensor input_741 = mul(x = gate_99, y = up_49)[name = string("input_741")]; string mlp_out_49_pad_type_0 = const()[name = string("mlp_out_49_pad_type_0"), val = string("valid")]; tensor mlp_out_49_strides_0 = const()[name = string("mlp_out_49_strides_0"), val = tensor([1, 1])]; tensor mlp_out_49_pad_0 = const()[name = string("mlp_out_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_49_dilations_0 = const()[name = string("mlp_out_49_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_49_groups_0 = const()[name = string("mlp_out_49_groups_0"), val = int32(1)]; tensor mlp_out_49 = conv(dilations = mlp_out_49_dilations_0, groups = mlp_out_49_groups_0, pad = mlp_out_49_pad_0, pad_type = mlp_out_49_pad_type_0, strides = mlp_out_49_strides_0, weight = layers_24_mlp_down_proj_weight_palettized, x = input_741)[name = string("mlp_out_49")]; tensor var_15195_axes_0 = const()[name = string("op_15195_axes_0"), val = tensor([2])]; tensor var_15195 = squeeze(axes = var_15195_axes_0, x = mlp_out_49)[name = string("op_15195")]; tensor var_15199 = const()[name = string("op_15199"), val = tensor([0, 2, 1])]; int32 var_15205 = const()[name = string("op_15205"), val = int32(-1)]; fp16 const_464_promoted_to_fp16 = const()[name = string("const_464_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_821 = transpose(perm = var_15199, x = var_15195)[name = string("transpose_72")]; tensor var_15211_cast_fp16 = mul(x = x_821, y = const_464_promoted_to_fp16)[name = string("op_15211_cast_fp16")]; bool input_743_interleave_0 = const()[name = string("input_743_interleave_0"), val = bool(false)]; tensor input_743_cast_fp16 = concat(axis = var_15205, interleave = input_743_interleave_0, values = (x_821, var_15211_cast_fp16))[name = string("input_743_cast_fp16")]; tensor normed_793_axes_0 = const()[name = string("normed_793_axes_0"), val = tensor([-1])]; fp16 var_15203_to_fp16 = const()[name = string("op_15203_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_793_cast_fp16 = layer_norm(axes = normed_793_axes_0, epsilon = var_15203_to_fp16, x = input_743_cast_fp16)[name = string("normed_793_cast_fp16")]; tensor var_15216_split_sizes_0 = const()[name = string("op_15216_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_15216_axis_0 = const()[name = string("op_15216_axis_0"), val = int32(-1)]; tensor var_15216_cast_fp16_0, tensor var_15216_cast_fp16_1 = split(axis = var_15216_axis_0, split_sizes = var_15216_split_sizes_0, x = normed_793_cast_fp16)[name = string("op_15216_cast_fp16")]; tensor const_465_to_fp16 = const()[name = string("const_465_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2297976832)))]; tensor var_15219_cast_fp16 = mul(x = var_15216_cast_fp16_0, y = const_465_to_fp16)[name = string("op_15219_cast_fp16")]; tensor hidden_states_301_cast_fp16 = add(x = x_817_cast_fp16, y = var_15219_cast_fp16)[name = string("hidden_states_301_cast_fp16")]; tensor per_layer_slice_49_begin_0 = const()[name = string("per_layer_slice_49_begin_0"), val = tensor([0, 0, 6144])]; tensor per_layer_slice_49_end_0 = const()[name = string("per_layer_slice_49_end_0"), val = tensor([1, 1, 6400])]; tensor per_layer_slice_49_end_mask_0 = const()[name = string("per_layer_slice_49_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_49 = slice_by_index(begin = per_layer_slice_49_begin_0, end = per_layer_slice_49_end_0, end_mask = per_layer_slice_49_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_49")]; tensor gated_97 = linear(bias = linear_1_bias_0, weight = layers_24_per_layer_input_gate_weight_palettized, x = hidden_states_301_cast_fp16)[name = string("linear_49")]; string gated_99_mode_0 = const()[name = string("gated_99_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_99 = gelu(mode = gated_99_mode_0, x = gated_97)[name = string("gated_99")]; tensor input_747 = mul(x = gated_99, y = per_layer_slice_49)[name = string("input_747")]; tensor x_825 = linear(bias = linear_2_bias_0, weight = layers_24_per_layer_projection_weight_palettized, x = input_747)[name = string("linear_50")]; int32 var_15256 = const()[name = string("op_15256"), val = int32(-1)]; fp16 const_466_promoted_to_fp16 = const()[name = string("const_466_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_15262_cast_fp16 = mul(x = x_825, y = const_466_promoted_to_fp16)[name = string("op_15262_cast_fp16")]; bool input_749_interleave_0 = const()[name = string("input_749_interleave_0"), val = bool(false)]; tensor input_749_cast_fp16 = concat(axis = var_15256, interleave = input_749_interleave_0, values = (x_825, var_15262_cast_fp16))[name = string("input_749_cast_fp16")]; tensor normed_797_axes_0 = const()[name = string("normed_797_axes_0"), val = tensor([-1])]; fp16 var_15254_to_fp16 = const()[name = string("op_15254_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_797_cast_fp16 = layer_norm(axes = normed_797_axes_0, epsilon = var_15254_to_fp16, x = input_749_cast_fp16)[name = string("normed_797_cast_fp16")]; tensor var_15267_split_sizes_0 = const()[name = string("op_15267_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_15267_axis_0 = const()[name = string("op_15267_axis_0"), val = int32(-1)]; tensor var_15267_cast_fp16_0, tensor var_15267_cast_fp16_1 = split(axis = var_15267_axis_0, split_sizes = var_15267_split_sizes_0, x = normed_797_cast_fp16)[name = string("op_15267_cast_fp16")]; tensor const_467_to_fp16 = const()[name = string("const_467_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2297979968)))]; tensor var_15270_cast_fp16 = mul(x = var_15267_cast_fp16_0, y = const_467_to_fp16)[name = string("op_15270_cast_fp16")]; tensor hidden_states_305_cast_fp16 = add(x = hidden_states_301_cast_fp16, y = var_15270_cast_fp16)[name = string("hidden_states_305_cast_fp16")]; tensor layers_24_layer_scalar_to_fp16 = const()[name = string("layers_24_layer_scalar_to_fp16"), val = tensor([0x1.cp-2])]; tensor x_829_cast_fp16 = mul(x = hidden_states_305_cast_fp16, y = layers_24_layer_scalar_to_fp16)[name = string("x_829_cast_fp16")]; int32 var_15278 = const()[name = string("op_15278"), val = int32(-1)]; fp16 const_468_promoted_to_fp16 = const()[name = string("const_468_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_15284_cast_fp16 = mul(x = x_829_cast_fp16, y = const_468_promoted_to_fp16)[name = string("op_15284_cast_fp16")]; bool input_751_interleave_0 = const()[name = string("input_751_interleave_0"), val = bool(false)]; tensor input_751_cast_fp16 = concat(axis = var_15278, interleave = input_751_interleave_0, values = (x_829_cast_fp16, var_15284_cast_fp16))[name = string("input_751_cast_fp16")]; tensor normed_801_axes_0 = const()[name = string("normed_801_axes_0"), val = tensor([-1])]; fp16 var_15276_to_fp16 = const()[name = string("op_15276_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_801_cast_fp16 = layer_norm(axes = normed_801_axes_0, epsilon = var_15276_to_fp16, x = input_751_cast_fp16)[name = string("normed_801_cast_fp16")]; tensor var_15289_split_sizes_0 = const()[name = string("op_15289_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_15289_axis_0 = const()[name = string("op_15289_axis_0"), val = int32(-1)]; tensor var_15289_cast_fp16_0, tensor var_15289_cast_fp16_1 = split(axis = var_15289_axis_0, split_sizes = var_15289_split_sizes_0, x = normed_801_cast_fp16)[name = string("op_15289_cast_fp16")]; tensor const_469_to_fp16 = const()[name = string("const_469_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2297983104)))]; tensor var_15292_cast_fp16 = mul(x = var_15289_cast_fp16_0, y = const_469_to_fp16)[name = string("op_15292_cast_fp16")]; tensor var_15300 = const()[name = string("op_15300"), val = tensor([0, 2, 1])]; tensor var_15303_axes_0 = const()[name = string("op_15303_axes_0"), val = tensor([2])]; tensor var_15301_cast_fp16 = transpose(perm = var_15300, x = var_15292_cast_fp16)[name = string("transpose_71")]; tensor var_15303_cast_fp16 = expand_dims(axes = var_15303_axes_0, x = var_15301_cast_fp16)[name = string("op_15303_cast_fp16")]; string var_15319_pad_type_0 = const()[name = string("op_15319_pad_type_0"), val = string("valid")]; tensor var_15319_strides_0 = const()[name = string("op_15319_strides_0"), val = tensor([1, 1])]; tensor var_15319_pad_0 = const()[name = string("op_15319_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_15319_dilations_0 = const()[name = string("op_15319_dilations_0"), val = tensor([1, 1])]; int32 var_15319_groups_0 = const()[name = string("op_15319_groups_0"), val = int32(1)]; tensor var_15319 = conv(dilations = var_15319_dilations_0, groups = var_15319_groups_0, pad = var_15319_pad_0, pad_type = var_15319_pad_type_0, strides = var_15319_strides_0, weight = layers_25_self_attn_q_proj_weight_palettized, x = var_15303_cast_fp16)[name = string("op_15319")]; tensor var_15324 = const()[name = string("op_15324"), val = tensor([1, 8, 256, 1])]; tensor var_15325 = reshape(shape = var_15324, x = var_15319)[name = string("op_15325")]; tensor var_15330 = const()[name = string("op_15330"), val = tensor([0, 1, 3, 2])]; tensor var_15340 = const()[name = string("op_15340"), val = tensor([1, 8, 256])]; tensor var_15331 = transpose(perm = var_15330, x = var_15325)[name = string("transpose_70")]; tensor x_833 = reshape(shape = var_15340, x = var_15331)[name = string("x_833")]; int32 var_15346 = const()[name = string("op_15346"), val = int32(-1)]; fp16 const_470_promoted_to_fp16 = const()[name = string("const_470_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_15352_cast_fp16 = mul(x = x_833, y = const_470_promoted_to_fp16)[name = string("op_15352_cast_fp16")]; bool input_755_interleave_0 = const()[name = string("input_755_interleave_0"), val = bool(false)]; tensor input_755_cast_fp16 = concat(axis = var_15346, interleave = input_755_interleave_0, values = (x_833, var_15352_cast_fp16))[name = string("input_755_cast_fp16")]; tensor normed_805_axes_0 = const()[name = string("normed_805_axes_0"), val = tensor([-1])]; fp16 var_15344_to_fp16 = const()[name = string("op_15344_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_805_cast_fp16 = layer_norm(axes = normed_805_axes_0, epsilon = var_15344_to_fp16, x = input_755_cast_fp16)[name = string("normed_805_cast_fp16")]; tensor var_15357_split_sizes_0 = const()[name = string("op_15357_split_sizes_0"), val = tensor([256, 256])]; int32 var_15357_axis_0 = const()[name = string("op_15357_axis_0"), val = int32(-1)]; tensor var_15357_cast_fp16_0, tensor var_15357_cast_fp16_1 = split(axis = var_15357_axis_0, split_sizes = var_15357_split_sizes_0, x = normed_805_cast_fp16)[name = string("op_15357_cast_fp16")]; tensor var_15360_cast_fp16 = mul(x = var_15357_cast_fp16_0, y = const_307_to_fp16)[name = string("op_15360_cast_fp16")]; tensor var_15366 = const()[name = string("op_15366"), val = tensor([1, 8, 1, 256])]; tensor q_183 = reshape(shape = var_15366, x = var_15360_cast_fp16)[name = string("q_183")]; tensor var_15368 = mul(x = q_183, y = cos_1)[name = string("op_15368")]; tensor var_15369_split_sizes_0 = const()[name = string("op_15369_split_sizes_0"), val = tensor([128, 128])]; int32 var_15369_axis_0 = const()[name = string("op_15369_axis_0"), val = int32(-1)]; tensor var_15369_0, tensor var_15369_1 = split(axis = var_15369_axis_0, split_sizes = var_15369_split_sizes_0, x = q_183)[name = string("op_15369")]; fp16 const_472_promoted = const()[name = string("const_472_promoted"), val = fp16(-0x1p+0)]; tensor var_15371 = mul(x = var_15369_1, y = const_472_promoted)[name = string("op_15371")]; int32 var_15373 = const()[name = string("op_15373"), val = int32(-1)]; bool var_15374_interleave_0 = const()[name = string("op_15374_interleave_0"), val = bool(false)]; tensor var_15374 = concat(axis = var_15373, interleave = var_15374_interleave_0, values = (var_15371, var_15369_0))[name = string("op_15374")]; tensor var_15375 = mul(x = var_15374, y = sin_1)[name = string("op_15375")]; tensor q_185 = add(x = var_15368, y = var_15375)[name = string("q_185")]; bool var_15399_transpose_x_0 = const()[name = string("op_15399_transpose_x_0"), val = bool(false)]; bool var_15399_transpose_y_0 = const()[name = string("op_15399_transpose_y_0"), val = bool(false)]; tensor var_15399_cast_fp16 = matmul(transpose_x = var_15399_transpose_x_0, transpose_y = var_15399_transpose_y_0, x = q_185, y = transpose_153_cast_fp16)[name = string("op_15399_cast_fp16")]; tensor attn_weights_153_cast_fp16 = add(x = var_15399_cast_fp16, y = causal_mask)[name = string("attn_weights_153_cast_fp16")]; int32 var_15409 = const()[name = string("op_15409"), val = int32(-1)]; tensor var_15411_cast_fp16 = softmax(axis = var_15409, x = attn_weights_153_cast_fp16)[name = string("op_15411_cast_fp16")]; bool var_15427_transpose_x_0 = const()[name = string("op_15427_transpose_x_0"), val = bool(false)]; bool var_15427_transpose_y_0 = const()[name = string("op_15427_transpose_y_0"), val = bool(false)]; tensor var_15427_cast_fp16 = matmul(transpose_x = var_15427_transpose_x_0, transpose_y = var_15427_transpose_y_0, x = var_15411_cast_fp16, y = V_expanded_27_cast_fp16)[name = string("op_15427_cast_fp16")]; tensor var_15437 = const()[name = string("op_15437"), val = tensor([0, 2, 1, 3])]; tensor var_15444 = const()[name = string("op_15444"), val = tensor([1, 1, -1])]; tensor var_15438 = transpose(perm = var_15437, x = var_15427_cast_fp16)[name = string("transpose_69")]; tensor attn_output_153 = reshape(shape = var_15444, x = var_15438)[name = string("attn_output_153")]; tensor var_15449 = const()[name = string("op_15449"), val = tensor([0, 2, 1])]; tensor squeeze_25_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2297986240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2299559168))))[name = string("squeeze_25_palettized")]; string var_15465_pad_type_0 = const()[name = string("op_15465_pad_type_0"), val = string("valid")]; int32 var_15465_groups_0 = const()[name = string("op_15465_groups_0"), val = int32(1)]; tensor var_15465_strides_0 = const()[name = string("op_15465_strides_0"), val = tensor([1])]; tensor var_15465_pad_0 = const()[name = string("op_15465_pad_0"), val = tensor([0, 0])]; tensor var_15465_dilations_0 = const()[name = string("op_15465_dilations_0"), val = tensor([1])]; tensor var_15450 = transpose(perm = var_15449, x = attn_output_153)[name = string("transpose_68")]; tensor var_15465 = conv(dilations = var_15465_dilations_0, groups = var_15465_groups_0, pad = var_15465_pad_0, pad_type = var_15465_pad_type_0, strides = var_15465_strides_0, weight = squeeze_25_palettized, x = var_15450)[name = string("op_15465")]; tensor var_15469 = const()[name = string("op_15469"), val = tensor([0, 2, 1])]; int32 var_15475 = const()[name = string("op_15475"), val = int32(-1)]; fp16 const_473_promoted_to_fp16 = const()[name = string("const_473_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_837 = transpose(perm = var_15469, x = var_15465)[name = string("transpose_67")]; tensor var_15481_cast_fp16 = mul(x = x_837, y = const_473_promoted_to_fp16)[name = string("op_15481_cast_fp16")]; bool input_759_interleave_0 = const()[name = string("input_759_interleave_0"), val = bool(false)]; tensor input_759_cast_fp16 = concat(axis = var_15475, interleave = input_759_interleave_0, values = (x_837, var_15481_cast_fp16))[name = string("input_759_cast_fp16")]; tensor normed_809_axes_0 = const()[name = string("normed_809_axes_0"), val = tensor([-1])]; fp16 var_15473_to_fp16 = const()[name = string("op_15473_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_809_cast_fp16 = layer_norm(axes = normed_809_axes_0, epsilon = var_15473_to_fp16, x = input_759_cast_fp16)[name = string("normed_809_cast_fp16")]; tensor var_15486_split_sizes_0 = const()[name = string("op_15486_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_15486_axis_0 = const()[name = string("op_15486_axis_0"), val = int32(-1)]; tensor var_15486_cast_fp16_0, tensor var_15486_cast_fp16_1 = split(axis = var_15486_axis_0, split_sizes = var_15486_split_sizes_0, x = normed_809_cast_fp16)[name = string("op_15486_cast_fp16")]; tensor const_474_to_fp16 = const()[name = string("const_474_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2299560768)))]; tensor var_15489_cast_fp16 = mul(x = var_15486_cast_fp16_0, y = const_474_to_fp16)[name = string("op_15489_cast_fp16")]; tensor x_841_cast_fp16 = add(x = x_829_cast_fp16, y = var_15489_cast_fp16)[name = string("x_841_cast_fp16")]; int32 var_15496 = const()[name = string("op_15496"), val = int32(-1)]; fp16 const_475_promoted_to_fp16 = const()[name = string("const_475_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_15502_cast_fp16 = mul(x = x_841_cast_fp16, y = const_475_promoted_to_fp16)[name = string("op_15502_cast_fp16")]; bool input_761_interleave_0 = const()[name = string("input_761_interleave_0"), val = bool(false)]; tensor input_761_cast_fp16 = concat(axis = var_15496, interleave = input_761_interleave_0, values = (x_841_cast_fp16, var_15502_cast_fp16))[name = string("input_761_cast_fp16")]; tensor normed_813_axes_0 = const()[name = string("normed_813_axes_0"), val = tensor([-1])]; fp16 var_15494_to_fp16 = const()[name = string("op_15494_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_813_cast_fp16 = layer_norm(axes = normed_813_axes_0, epsilon = var_15494_to_fp16, x = input_761_cast_fp16)[name = string("normed_813_cast_fp16")]; tensor var_15507_split_sizes_0 = const()[name = string("op_15507_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_15507_axis_0 = const()[name = string("op_15507_axis_0"), val = int32(-1)]; tensor var_15507_cast_fp16_0, tensor var_15507_cast_fp16_1 = split(axis = var_15507_axis_0, split_sizes = var_15507_split_sizes_0, x = normed_813_cast_fp16)[name = string("op_15507_cast_fp16")]; tensor const_476_to_fp16 = const()[name = string("const_476_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2299563904)))]; tensor var_15510_cast_fp16 = mul(x = var_15507_cast_fp16_0, y = const_476_to_fp16)[name = string("op_15510_cast_fp16")]; tensor var_15523 = const()[name = string("op_15523"), val = tensor([0, 2, 1])]; tensor input_763_axes_0 = const()[name = string("input_763_axes_0"), val = tensor([2])]; tensor var_15524 = transpose(perm = var_15523, x = var_15510_cast_fp16)[name = string("transpose_66")]; tensor input_763 = expand_dims(axes = input_763_axes_0, x = var_15524)[name = string("input_763")]; string gate_101_pad_type_0 = const()[name = string("gate_101_pad_type_0"), val = string("valid")]; tensor gate_101_strides_0 = const()[name = string("gate_101_strides_0"), val = tensor([1, 1])]; tensor gate_101_pad_0 = const()[name = string("gate_101_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_101_dilations_0 = const()[name = string("gate_101_dilations_0"), val = tensor([1, 1])]; int32 gate_101_groups_0 = const()[name = string("gate_101_groups_0"), val = int32(1)]; tensor gate_101 = conv(dilations = gate_101_dilations_0, groups = gate_101_groups_0, pad = gate_101_pad_0, pad_type = gate_101_pad_type_0, strides = gate_101_strides_0, weight = layers_25_mlp_gate_proj_weight_palettized, x = input_763)[name = string("gate_101")]; string up_51_pad_type_0 = const()[name = string("up_51_pad_type_0"), val = string("valid")]; tensor up_51_strides_0 = const()[name = string("up_51_strides_0"), val = tensor([1, 1])]; tensor up_51_pad_0 = const()[name = string("up_51_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_51_dilations_0 = const()[name = string("up_51_dilations_0"), val = tensor([1, 1])]; int32 up_51_groups_0 = const()[name = string("up_51_groups_0"), val = int32(1)]; tensor up_51 = conv(dilations = up_51_dilations_0, groups = up_51_groups_0, pad = up_51_pad_0, pad_type = up_51_pad_type_0, strides = up_51_strides_0, weight = layers_25_mlp_up_proj_weight_palettized, x = input_763)[name = string("up_51")]; string gate_103_mode_0 = const()[name = string("gate_103_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_103 = gelu(mode = gate_103_mode_0, x = gate_101)[name = string("gate_103")]; tensor input_765 = mul(x = gate_103, y = up_51)[name = string("input_765")]; string mlp_out_51_pad_type_0 = const()[name = string("mlp_out_51_pad_type_0"), val = string("valid")]; tensor mlp_out_51_strides_0 = const()[name = string("mlp_out_51_strides_0"), val = tensor([1, 1])]; tensor mlp_out_51_pad_0 = const()[name = string("mlp_out_51_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_51_dilations_0 = const()[name = string("mlp_out_51_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_51_groups_0 = const()[name = string("mlp_out_51_groups_0"), val = int32(1)]; tensor mlp_out_51 = conv(dilations = mlp_out_51_dilations_0, groups = mlp_out_51_groups_0, pad = mlp_out_51_pad_0, pad_type = mlp_out_51_pad_type_0, strides = mlp_out_51_strides_0, weight = layers_25_mlp_down_proj_weight_palettized, x = input_765)[name = string("mlp_out_51")]; tensor var_15564_axes_0 = const()[name = string("op_15564_axes_0"), val = tensor([2])]; tensor var_15564 = squeeze(axes = var_15564_axes_0, x = mlp_out_51)[name = string("op_15564")]; tensor var_15568 = const()[name = string("op_15568"), val = tensor([0, 2, 1])]; int32 var_15574 = const()[name = string("op_15574"), val = int32(-1)]; fp16 const_477_promoted_to_fp16 = const()[name = string("const_477_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_845 = transpose(perm = var_15568, x = var_15564)[name = string("transpose_65")]; tensor var_15580_cast_fp16 = mul(x = x_845, y = const_477_promoted_to_fp16)[name = string("op_15580_cast_fp16")]; bool input_767_interleave_0 = const()[name = string("input_767_interleave_0"), val = bool(false)]; tensor input_767_cast_fp16 = concat(axis = var_15574, interleave = input_767_interleave_0, values = (x_845, var_15580_cast_fp16))[name = string("input_767_cast_fp16")]; tensor normed_817_axes_0 = const()[name = string("normed_817_axes_0"), val = tensor([-1])]; fp16 var_15572_to_fp16 = const()[name = string("op_15572_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_817_cast_fp16 = layer_norm(axes = normed_817_axes_0, epsilon = var_15572_to_fp16, x = input_767_cast_fp16)[name = string("normed_817_cast_fp16")]; tensor var_15585_split_sizes_0 = const()[name = string("op_15585_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_15585_axis_0 = const()[name = string("op_15585_axis_0"), val = int32(-1)]; tensor var_15585_cast_fp16_0, tensor var_15585_cast_fp16_1 = split(axis = var_15585_axis_0, split_sizes = var_15585_split_sizes_0, x = normed_817_cast_fp16)[name = string("op_15585_cast_fp16")]; tensor const_478_to_fp16 = const()[name = string("const_478_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2299567040)))]; tensor var_15588_cast_fp16 = mul(x = var_15585_cast_fp16_0, y = const_478_to_fp16)[name = string("op_15588_cast_fp16")]; tensor hidden_states_313_cast_fp16 = add(x = x_841_cast_fp16, y = var_15588_cast_fp16)[name = string("hidden_states_313_cast_fp16")]; tensor per_layer_slice_51_begin_0 = const()[name = string("per_layer_slice_51_begin_0"), val = tensor([0, 0, 6400])]; tensor per_layer_slice_51_end_0 = const()[name = string("per_layer_slice_51_end_0"), val = tensor([1, 1, 6656])]; tensor per_layer_slice_51_end_mask_0 = const()[name = string("per_layer_slice_51_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_51 = slice_by_index(begin = per_layer_slice_51_begin_0, end = per_layer_slice_51_end_0, end_mask = per_layer_slice_51_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_51")]; tensor gated_101 = linear(bias = linear_1_bias_0, weight = layers_25_per_layer_input_gate_weight_palettized, x = hidden_states_313_cast_fp16)[name = string("linear_51")]; string gated_103_mode_0 = const()[name = string("gated_103_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_103 = gelu(mode = gated_103_mode_0, x = gated_101)[name = string("gated_103")]; tensor input_771 = mul(x = gated_103, y = per_layer_slice_51)[name = string("input_771")]; tensor x_849 = linear(bias = linear_2_bias_0, weight = layers_25_per_layer_projection_weight_palettized, x = input_771)[name = string("linear_52")]; int32 var_15625 = const()[name = string("op_15625"), val = int32(-1)]; fp16 const_479_promoted_to_fp16 = const()[name = string("const_479_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_15631_cast_fp16 = mul(x = x_849, y = const_479_promoted_to_fp16)[name = string("op_15631_cast_fp16")]; bool input_773_interleave_0 = const()[name = string("input_773_interleave_0"), val = bool(false)]; tensor input_773_cast_fp16 = concat(axis = var_15625, interleave = input_773_interleave_0, values = (x_849, var_15631_cast_fp16))[name = string("input_773_cast_fp16")]; tensor normed_821_axes_0 = const()[name = string("normed_821_axes_0"), val = tensor([-1])]; fp16 var_15623_to_fp16 = const()[name = string("op_15623_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_821_cast_fp16 = layer_norm(axes = normed_821_axes_0, epsilon = var_15623_to_fp16, x = input_773_cast_fp16)[name = string("normed_821_cast_fp16")]; tensor var_15636_split_sizes_0 = const()[name = string("op_15636_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_15636_axis_0 = const()[name = string("op_15636_axis_0"), val = int32(-1)]; tensor var_15636_cast_fp16_0, tensor var_15636_cast_fp16_1 = split(axis = var_15636_axis_0, split_sizes = var_15636_split_sizes_0, x = normed_821_cast_fp16)[name = string("op_15636_cast_fp16")]; tensor const_480_to_fp16 = const()[name = string("const_480_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2299570176)))]; tensor var_15639_cast_fp16 = mul(x = var_15636_cast_fp16_0, y = const_480_to_fp16)[name = string("op_15639_cast_fp16")]; tensor hidden_states_317_cast_fp16 = add(x = hidden_states_313_cast_fp16, y = var_15639_cast_fp16)[name = string("hidden_states_317_cast_fp16")]; tensor layers_25_layer_scalar_to_fp16 = const()[name = string("layers_25_layer_scalar_to_fp16"), val = tensor([0x1.92p-1])]; tensor x_853_cast_fp16 = mul(x = hidden_states_317_cast_fp16, y = layers_25_layer_scalar_to_fp16)[name = string("x_853_cast_fp16")]; int32 var_15647 = const()[name = string("op_15647"), val = int32(-1)]; fp16 const_481_promoted_to_fp16 = const()[name = string("const_481_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_15653_cast_fp16 = mul(x = x_853_cast_fp16, y = const_481_promoted_to_fp16)[name = string("op_15653_cast_fp16")]; bool input_775_interleave_0 = const()[name = string("input_775_interleave_0"), val = bool(false)]; tensor input_775_cast_fp16 = concat(axis = var_15647, interleave = input_775_interleave_0, values = (x_853_cast_fp16, var_15653_cast_fp16))[name = string("input_775_cast_fp16")]; tensor normed_825_axes_0 = const()[name = string("normed_825_axes_0"), val = tensor([-1])]; fp16 var_15645_to_fp16 = const()[name = string("op_15645_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_825_cast_fp16 = layer_norm(axes = normed_825_axes_0, epsilon = var_15645_to_fp16, x = input_775_cast_fp16)[name = string("normed_825_cast_fp16")]; tensor var_15658_split_sizes_0 = const()[name = string("op_15658_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_15658_axis_0 = const()[name = string("op_15658_axis_0"), val = int32(-1)]; tensor var_15658_cast_fp16_0, tensor var_15658_cast_fp16_1 = split(axis = var_15658_axis_0, split_sizes = var_15658_split_sizes_0, x = normed_825_cast_fp16)[name = string("op_15658_cast_fp16")]; tensor const_482_to_fp16 = const()[name = string("const_482_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2299573312)))]; tensor var_15661_cast_fp16 = mul(x = var_15658_cast_fp16_0, y = const_482_to_fp16)[name = string("op_15661_cast_fp16")]; tensor var_15669 = const()[name = string("op_15669"), val = tensor([0, 2, 1])]; tensor var_15672_axes_0 = const()[name = string("op_15672_axes_0"), val = tensor([2])]; tensor var_15670_cast_fp16 = transpose(perm = var_15669, x = var_15661_cast_fp16)[name = string("transpose_64")]; tensor var_15672_cast_fp16 = expand_dims(axes = var_15672_axes_0, x = var_15670_cast_fp16)[name = string("op_15672_cast_fp16")]; string var_15688_pad_type_0 = const()[name = string("op_15688_pad_type_0"), val = string("valid")]; tensor var_15688_strides_0 = const()[name = string("op_15688_strides_0"), val = tensor([1, 1])]; tensor var_15688_pad_0 = const()[name = string("op_15688_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_15688_dilations_0 = const()[name = string("op_15688_dilations_0"), val = tensor([1, 1])]; int32 var_15688_groups_0 = const()[name = string("op_15688_groups_0"), val = int32(1)]; tensor var_15688 = conv(dilations = var_15688_dilations_0, groups = var_15688_groups_0, pad = var_15688_pad_0, pad_type = var_15688_pad_type_0, strides = var_15688_strides_0, weight = layers_26_self_attn_q_proj_weight_palettized, x = var_15672_cast_fp16)[name = string("op_15688")]; tensor var_15693 = const()[name = string("op_15693"), val = tensor([1, 8, 256, 1])]; tensor var_15694 = reshape(shape = var_15693, x = var_15688)[name = string("op_15694")]; tensor var_15699 = const()[name = string("op_15699"), val = tensor([0, 1, 3, 2])]; tensor var_15709 = const()[name = string("op_15709"), val = tensor([1, 8, 256])]; tensor var_15700 = transpose(perm = var_15699, x = var_15694)[name = string("transpose_63")]; tensor x_857 = reshape(shape = var_15709, x = var_15700)[name = string("x_857")]; int32 var_15715 = const()[name = string("op_15715"), val = int32(-1)]; fp16 const_483_promoted_to_fp16 = const()[name = string("const_483_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_15721_cast_fp16 = mul(x = x_857, y = const_483_promoted_to_fp16)[name = string("op_15721_cast_fp16")]; bool input_779_interleave_0 = const()[name = string("input_779_interleave_0"), val = bool(false)]; tensor input_779_cast_fp16 = concat(axis = var_15715, interleave = input_779_interleave_0, values = (x_857, var_15721_cast_fp16))[name = string("input_779_cast_fp16")]; tensor normed_829_axes_0 = const()[name = string("normed_829_axes_0"), val = tensor([-1])]; fp16 var_15713_to_fp16 = const()[name = string("op_15713_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_829_cast_fp16 = layer_norm(axes = normed_829_axes_0, epsilon = var_15713_to_fp16, x = input_779_cast_fp16)[name = string("normed_829_cast_fp16")]; tensor var_15726_split_sizes_0 = const()[name = string("op_15726_split_sizes_0"), val = tensor([256, 256])]; int32 var_15726_axis_0 = const()[name = string("op_15726_axis_0"), val = int32(-1)]; tensor var_15726_cast_fp16_0, tensor var_15726_cast_fp16_1 = split(axis = var_15726_axis_0, split_sizes = var_15726_split_sizes_0, x = normed_829_cast_fp16)[name = string("op_15726_cast_fp16")]; tensor var_15729_cast_fp16 = mul(x = var_15726_cast_fp16_0, y = const_307_to_fp16)[name = string("op_15729_cast_fp16")]; tensor var_15735 = const()[name = string("op_15735"), val = tensor([1, 8, 1, 256])]; tensor q_189 = reshape(shape = var_15735, x = var_15729_cast_fp16)[name = string("q_189")]; tensor var_15737 = mul(x = q_189, y = cos_1)[name = string("op_15737")]; tensor var_15738_split_sizes_0 = const()[name = string("op_15738_split_sizes_0"), val = tensor([128, 128])]; int32 var_15738_axis_0 = const()[name = string("op_15738_axis_0"), val = int32(-1)]; tensor var_15738_0, tensor var_15738_1 = split(axis = var_15738_axis_0, split_sizes = var_15738_split_sizes_0, x = q_189)[name = string("op_15738")]; fp16 const_485_promoted = const()[name = string("const_485_promoted"), val = fp16(-0x1p+0)]; tensor var_15740 = mul(x = var_15738_1, y = const_485_promoted)[name = string("op_15740")]; int32 var_15742 = const()[name = string("op_15742"), val = int32(-1)]; bool var_15743_interleave_0 = const()[name = string("op_15743_interleave_0"), val = bool(false)]; tensor var_15743 = concat(axis = var_15742, interleave = var_15743_interleave_0, values = (var_15740, var_15738_0))[name = string("op_15743")]; tensor var_15744 = mul(x = var_15743, y = sin_1)[name = string("op_15744")]; tensor q_191 = add(x = var_15737, y = var_15744)[name = string("q_191")]; bool var_15768_transpose_x_0 = const()[name = string("op_15768_transpose_x_0"), val = bool(false)]; bool var_15768_transpose_y_0 = const()[name = string("op_15768_transpose_y_0"), val = bool(false)]; tensor var_15768_cast_fp16 = matmul(transpose_x = var_15768_transpose_x_0, transpose_y = var_15768_transpose_y_0, x = q_191, y = transpose_153_cast_fp16)[name = string("op_15768_cast_fp16")]; tensor attn_weights_159_cast_fp16 = add(x = var_15768_cast_fp16, y = causal_mask)[name = string("attn_weights_159_cast_fp16")]; int32 var_15778 = const()[name = string("op_15778"), val = int32(-1)]; tensor var_15780_cast_fp16 = softmax(axis = var_15778, x = attn_weights_159_cast_fp16)[name = string("op_15780_cast_fp16")]; bool var_15796_transpose_x_0 = const()[name = string("op_15796_transpose_x_0"), val = bool(false)]; bool var_15796_transpose_y_0 = const()[name = string("op_15796_transpose_y_0"), val = bool(false)]; tensor var_15796_cast_fp16 = matmul(transpose_x = var_15796_transpose_x_0, transpose_y = var_15796_transpose_y_0, x = var_15780_cast_fp16, y = V_expanded_27_cast_fp16)[name = string("op_15796_cast_fp16")]; tensor var_15806 = const()[name = string("op_15806"), val = tensor([0, 2, 1, 3])]; tensor var_15813 = const()[name = string("op_15813"), val = tensor([1, 1, -1])]; tensor var_15807 = transpose(perm = var_15806, x = var_15796_cast_fp16)[name = string("transpose_62")]; tensor attn_output_159 = reshape(shape = var_15813, x = var_15807)[name = string("attn_output_159")]; tensor var_15818 = const()[name = string("op_15818"), val = tensor([0, 2, 1])]; tensor squeeze_26_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2299576448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2301149376))))[name = string("squeeze_26_palettized")]; string var_15834_pad_type_0 = const()[name = string("op_15834_pad_type_0"), val = string("valid")]; int32 var_15834_groups_0 = const()[name = string("op_15834_groups_0"), val = int32(1)]; tensor var_15834_strides_0 = const()[name = string("op_15834_strides_0"), val = tensor([1])]; tensor var_15834_pad_0 = const()[name = string("op_15834_pad_0"), val = tensor([0, 0])]; tensor var_15834_dilations_0 = const()[name = string("op_15834_dilations_0"), val = tensor([1])]; tensor var_15819 = transpose(perm = var_15818, x = attn_output_159)[name = string("transpose_61")]; tensor var_15834 = conv(dilations = var_15834_dilations_0, groups = var_15834_groups_0, pad = var_15834_pad_0, pad_type = var_15834_pad_type_0, strides = var_15834_strides_0, weight = squeeze_26_palettized, x = var_15819)[name = string("op_15834")]; tensor var_15838 = const()[name = string("op_15838"), val = tensor([0, 2, 1])]; int32 var_15844 = const()[name = string("op_15844"), val = int32(-1)]; fp16 const_486_promoted_to_fp16 = const()[name = string("const_486_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_861 = transpose(perm = var_15838, x = var_15834)[name = string("transpose_60")]; tensor var_15850_cast_fp16 = mul(x = x_861, y = const_486_promoted_to_fp16)[name = string("op_15850_cast_fp16")]; bool input_783_interleave_0 = const()[name = string("input_783_interleave_0"), val = bool(false)]; tensor input_783_cast_fp16 = concat(axis = var_15844, interleave = input_783_interleave_0, values = (x_861, var_15850_cast_fp16))[name = string("input_783_cast_fp16")]; tensor normed_833_axes_0 = const()[name = string("normed_833_axes_0"), val = tensor([-1])]; fp16 var_15842_to_fp16 = const()[name = string("op_15842_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_833_cast_fp16 = layer_norm(axes = normed_833_axes_0, epsilon = var_15842_to_fp16, x = input_783_cast_fp16)[name = string("normed_833_cast_fp16")]; tensor var_15855_split_sizes_0 = const()[name = string("op_15855_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_15855_axis_0 = const()[name = string("op_15855_axis_0"), val = int32(-1)]; tensor var_15855_cast_fp16_0, tensor var_15855_cast_fp16_1 = split(axis = var_15855_axis_0, split_sizes = var_15855_split_sizes_0, x = normed_833_cast_fp16)[name = string("op_15855_cast_fp16")]; tensor const_487_to_fp16 = const()[name = string("const_487_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2301150976)))]; tensor var_15858_cast_fp16 = mul(x = var_15855_cast_fp16_0, y = const_487_to_fp16)[name = string("op_15858_cast_fp16")]; tensor x_865_cast_fp16 = add(x = x_853_cast_fp16, y = var_15858_cast_fp16)[name = string("x_865_cast_fp16")]; int32 var_15865 = const()[name = string("op_15865"), val = int32(-1)]; fp16 const_488_promoted_to_fp16 = const()[name = string("const_488_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_15871_cast_fp16 = mul(x = x_865_cast_fp16, y = const_488_promoted_to_fp16)[name = string("op_15871_cast_fp16")]; bool input_785_interleave_0 = const()[name = string("input_785_interleave_0"), val = bool(false)]; tensor input_785_cast_fp16 = concat(axis = var_15865, interleave = input_785_interleave_0, values = (x_865_cast_fp16, var_15871_cast_fp16))[name = string("input_785_cast_fp16")]; tensor normed_837_axes_0 = const()[name = string("normed_837_axes_0"), val = tensor([-1])]; fp16 var_15863_to_fp16 = const()[name = string("op_15863_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_837_cast_fp16 = layer_norm(axes = normed_837_axes_0, epsilon = var_15863_to_fp16, x = input_785_cast_fp16)[name = string("normed_837_cast_fp16")]; tensor var_15876_split_sizes_0 = const()[name = string("op_15876_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_15876_axis_0 = const()[name = string("op_15876_axis_0"), val = int32(-1)]; tensor var_15876_cast_fp16_0, tensor var_15876_cast_fp16_1 = split(axis = var_15876_axis_0, split_sizes = var_15876_split_sizes_0, x = normed_837_cast_fp16)[name = string("op_15876_cast_fp16")]; tensor const_489_to_fp16 = const()[name = string("const_489_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2301154112)))]; tensor var_15879_cast_fp16 = mul(x = var_15876_cast_fp16_0, y = const_489_to_fp16)[name = string("op_15879_cast_fp16")]; tensor var_15892 = const()[name = string("op_15892"), val = tensor([0, 2, 1])]; tensor input_787_axes_0 = const()[name = string("input_787_axes_0"), val = tensor([2])]; tensor var_15893 = transpose(perm = var_15892, x = var_15879_cast_fp16)[name = string("transpose_59")]; tensor input_787 = expand_dims(axes = input_787_axes_0, x = var_15893)[name = string("input_787")]; string gate_105_pad_type_0 = const()[name = string("gate_105_pad_type_0"), val = string("valid")]; tensor gate_105_strides_0 = const()[name = string("gate_105_strides_0"), val = tensor([1, 1])]; tensor gate_105_pad_0 = const()[name = string("gate_105_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_105_dilations_0 = const()[name = string("gate_105_dilations_0"), val = tensor([1, 1])]; int32 gate_105_groups_0 = const()[name = string("gate_105_groups_0"), val = int32(1)]; tensor gate_105 = conv(dilations = gate_105_dilations_0, groups = gate_105_groups_0, pad = gate_105_pad_0, pad_type = gate_105_pad_type_0, strides = gate_105_strides_0, weight = layers_26_mlp_gate_proj_weight_palettized, x = input_787)[name = string("gate_105")]; string up_53_pad_type_0 = const()[name = string("up_53_pad_type_0"), val = string("valid")]; tensor up_53_strides_0 = const()[name = string("up_53_strides_0"), val = tensor([1, 1])]; tensor up_53_pad_0 = const()[name = string("up_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_53_dilations_0 = const()[name = string("up_53_dilations_0"), val = tensor([1, 1])]; int32 up_53_groups_0 = const()[name = string("up_53_groups_0"), val = int32(1)]; tensor up_53 = conv(dilations = up_53_dilations_0, groups = up_53_groups_0, pad = up_53_pad_0, pad_type = up_53_pad_type_0, strides = up_53_strides_0, weight = layers_26_mlp_up_proj_weight_palettized, x = input_787)[name = string("up_53")]; string gate_107_mode_0 = const()[name = string("gate_107_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_107 = gelu(mode = gate_107_mode_0, x = gate_105)[name = string("gate_107")]; tensor input_789 = mul(x = gate_107, y = up_53)[name = string("input_789")]; string mlp_out_53_pad_type_0 = const()[name = string("mlp_out_53_pad_type_0"), val = string("valid")]; tensor mlp_out_53_strides_0 = const()[name = string("mlp_out_53_strides_0"), val = tensor([1, 1])]; tensor mlp_out_53_pad_0 = const()[name = string("mlp_out_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_53_dilations_0 = const()[name = string("mlp_out_53_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_53_groups_0 = const()[name = string("mlp_out_53_groups_0"), val = int32(1)]; tensor mlp_out_53 = conv(dilations = mlp_out_53_dilations_0, groups = mlp_out_53_groups_0, pad = mlp_out_53_pad_0, pad_type = mlp_out_53_pad_type_0, strides = mlp_out_53_strides_0, weight = layers_26_mlp_down_proj_weight_palettized, x = input_789)[name = string("mlp_out_53")]; tensor var_15933_axes_0 = const()[name = string("op_15933_axes_0"), val = tensor([2])]; tensor var_15933 = squeeze(axes = var_15933_axes_0, x = mlp_out_53)[name = string("op_15933")]; tensor var_15937 = const()[name = string("op_15937"), val = tensor([0, 2, 1])]; int32 var_15943 = const()[name = string("op_15943"), val = int32(-1)]; fp16 const_490_promoted_to_fp16 = const()[name = string("const_490_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_869 = transpose(perm = var_15937, x = var_15933)[name = string("transpose_58")]; tensor var_15949_cast_fp16 = mul(x = x_869, y = const_490_promoted_to_fp16)[name = string("op_15949_cast_fp16")]; bool input_791_interleave_0 = const()[name = string("input_791_interleave_0"), val = bool(false)]; tensor input_791_cast_fp16 = concat(axis = var_15943, interleave = input_791_interleave_0, values = (x_869, var_15949_cast_fp16))[name = string("input_791_cast_fp16")]; tensor normed_841_axes_0 = const()[name = string("normed_841_axes_0"), val = tensor([-1])]; fp16 var_15941_to_fp16 = const()[name = string("op_15941_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_841_cast_fp16 = layer_norm(axes = normed_841_axes_0, epsilon = var_15941_to_fp16, x = input_791_cast_fp16)[name = string("normed_841_cast_fp16")]; tensor var_15954_split_sizes_0 = const()[name = string("op_15954_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_15954_axis_0 = const()[name = string("op_15954_axis_0"), val = int32(-1)]; tensor var_15954_cast_fp16_0, tensor var_15954_cast_fp16_1 = split(axis = var_15954_axis_0, split_sizes = var_15954_split_sizes_0, x = normed_841_cast_fp16)[name = string("op_15954_cast_fp16")]; tensor const_491_to_fp16 = const()[name = string("const_491_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2301157248)))]; tensor var_15957_cast_fp16 = mul(x = var_15954_cast_fp16_0, y = const_491_to_fp16)[name = string("op_15957_cast_fp16")]; tensor hidden_states_325_cast_fp16 = add(x = x_865_cast_fp16, y = var_15957_cast_fp16)[name = string("hidden_states_325_cast_fp16")]; tensor per_layer_slice_53_begin_0 = const()[name = string("per_layer_slice_53_begin_0"), val = tensor([0, 0, 6656])]; tensor per_layer_slice_53_end_0 = const()[name = string("per_layer_slice_53_end_0"), val = tensor([1, 1, 6912])]; tensor per_layer_slice_53_end_mask_0 = const()[name = string("per_layer_slice_53_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_53 = slice_by_index(begin = per_layer_slice_53_begin_0, end = per_layer_slice_53_end_0, end_mask = per_layer_slice_53_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_53")]; tensor gated_105 = linear(bias = linear_1_bias_0, weight = layers_26_per_layer_input_gate_weight_palettized, x = hidden_states_325_cast_fp16)[name = string("linear_53")]; string gated_107_mode_0 = const()[name = string("gated_107_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_107 = gelu(mode = gated_107_mode_0, x = gated_105)[name = string("gated_107")]; tensor input_795 = mul(x = gated_107, y = per_layer_slice_53)[name = string("input_795")]; tensor x_873 = linear(bias = linear_2_bias_0, weight = layers_26_per_layer_projection_weight_palettized, x = input_795)[name = string("linear_54")]; int32 var_15994 = const()[name = string("op_15994"), val = int32(-1)]; fp16 const_492_promoted_to_fp16 = const()[name = string("const_492_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_16000_cast_fp16 = mul(x = x_873, y = const_492_promoted_to_fp16)[name = string("op_16000_cast_fp16")]; bool input_797_interleave_0 = const()[name = string("input_797_interleave_0"), val = bool(false)]; tensor input_797_cast_fp16 = concat(axis = var_15994, interleave = input_797_interleave_0, values = (x_873, var_16000_cast_fp16))[name = string("input_797_cast_fp16")]; tensor normed_845_axes_0 = const()[name = string("normed_845_axes_0"), val = tensor([-1])]; fp16 var_15992_to_fp16 = const()[name = string("op_15992_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_845_cast_fp16 = layer_norm(axes = normed_845_axes_0, epsilon = var_15992_to_fp16, x = input_797_cast_fp16)[name = string("normed_845_cast_fp16")]; tensor var_16005_split_sizes_0 = const()[name = string("op_16005_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_16005_axis_0 = const()[name = string("op_16005_axis_0"), val = int32(-1)]; tensor var_16005_cast_fp16_0, tensor var_16005_cast_fp16_1 = split(axis = var_16005_axis_0, split_sizes = var_16005_split_sizes_0, x = normed_845_cast_fp16)[name = string("op_16005_cast_fp16")]; tensor const_493_to_fp16 = const()[name = string("const_493_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2301160384)))]; tensor var_16008_cast_fp16 = mul(x = var_16005_cast_fp16_0, y = const_493_to_fp16)[name = string("op_16008_cast_fp16")]; tensor hidden_states_329_cast_fp16 = add(x = hidden_states_325_cast_fp16, y = var_16008_cast_fp16)[name = string("hidden_states_329_cast_fp16")]; tensor layers_26_layer_scalar_to_fp16 = const()[name = string("layers_26_layer_scalar_to_fp16"), val = tensor([0x1.a6p-1])]; tensor x_877_cast_fp16 = mul(x = hidden_states_329_cast_fp16, y = layers_26_layer_scalar_to_fp16)[name = string("x_877_cast_fp16")]; int32 var_16016 = const()[name = string("op_16016"), val = int32(-1)]; fp16 const_494_promoted_to_fp16 = const()[name = string("const_494_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_16022_cast_fp16 = mul(x = x_877_cast_fp16, y = const_494_promoted_to_fp16)[name = string("op_16022_cast_fp16")]; bool input_799_interleave_0 = const()[name = string("input_799_interleave_0"), val = bool(false)]; tensor input_799_cast_fp16 = concat(axis = var_16016, interleave = input_799_interleave_0, values = (x_877_cast_fp16, var_16022_cast_fp16))[name = string("input_799_cast_fp16")]; tensor normed_849_axes_0 = const()[name = string("normed_849_axes_0"), val = tensor([-1])]; fp16 var_16014_to_fp16 = const()[name = string("op_16014_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_849_cast_fp16 = layer_norm(axes = normed_849_axes_0, epsilon = var_16014_to_fp16, x = input_799_cast_fp16)[name = string("normed_849_cast_fp16")]; tensor var_16027_split_sizes_0 = const()[name = string("op_16027_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_16027_axis_0 = const()[name = string("op_16027_axis_0"), val = int32(-1)]; tensor var_16027_cast_fp16_0, tensor var_16027_cast_fp16_1 = split(axis = var_16027_axis_0, split_sizes = var_16027_split_sizes_0, x = normed_849_cast_fp16)[name = string("op_16027_cast_fp16")]; tensor const_495_to_fp16 = const()[name = string("const_495_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2301163520)))]; tensor var_16030_cast_fp16 = mul(x = var_16027_cast_fp16_0, y = const_495_to_fp16)[name = string("op_16030_cast_fp16")]; tensor var_16038 = const()[name = string("op_16038"), val = tensor([0, 2, 1])]; tensor var_16041_axes_0 = const()[name = string("op_16041_axes_0"), val = tensor([2])]; tensor var_16039_cast_fp16 = transpose(perm = var_16038, x = var_16030_cast_fp16)[name = string("transpose_57")]; tensor var_16041_cast_fp16 = expand_dims(axes = var_16041_axes_0, x = var_16039_cast_fp16)[name = string("op_16041_cast_fp16")]; string var_16057_pad_type_0 = const()[name = string("op_16057_pad_type_0"), val = string("valid")]; tensor var_16057_strides_0 = const()[name = string("op_16057_strides_0"), val = tensor([1, 1])]; tensor var_16057_pad_0 = const()[name = string("op_16057_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_16057_dilations_0 = const()[name = string("op_16057_dilations_0"), val = tensor([1, 1])]; int32 var_16057_groups_0 = const()[name = string("op_16057_groups_0"), val = int32(1)]; tensor var_16057 = conv(dilations = var_16057_dilations_0, groups = var_16057_groups_0, pad = var_16057_pad_0, pad_type = var_16057_pad_type_0, strides = var_16057_strides_0, weight = layers_27_self_attn_q_proj_weight_palettized, x = var_16041_cast_fp16)[name = string("op_16057")]; tensor var_16062 = const()[name = string("op_16062"), val = tensor([1, 8, 256, 1])]; tensor var_16063 = reshape(shape = var_16062, x = var_16057)[name = string("op_16063")]; tensor var_16068 = const()[name = string("op_16068"), val = tensor([0, 1, 3, 2])]; tensor var_16078 = const()[name = string("op_16078"), val = tensor([1, 8, 256])]; tensor var_16069 = transpose(perm = var_16068, x = var_16063)[name = string("transpose_56")]; tensor x_881 = reshape(shape = var_16078, x = var_16069)[name = string("x_881")]; int32 var_16084 = const()[name = string("op_16084"), val = int32(-1)]; fp16 const_496_promoted_to_fp16 = const()[name = string("const_496_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_16090_cast_fp16 = mul(x = x_881, y = const_496_promoted_to_fp16)[name = string("op_16090_cast_fp16")]; bool input_803_interleave_0 = const()[name = string("input_803_interleave_0"), val = bool(false)]; tensor input_803_cast_fp16 = concat(axis = var_16084, interleave = input_803_interleave_0, values = (x_881, var_16090_cast_fp16))[name = string("input_803_cast_fp16")]; tensor normed_853_axes_0 = const()[name = string("normed_853_axes_0"), val = tensor([-1])]; fp16 var_16082_to_fp16 = const()[name = string("op_16082_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_853_cast_fp16 = layer_norm(axes = normed_853_axes_0, epsilon = var_16082_to_fp16, x = input_803_cast_fp16)[name = string("normed_853_cast_fp16")]; tensor var_16095_split_sizes_0 = const()[name = string("op_16095_split_sizes_0"), val = tensor([256, 256])]; int32 var_16095_axis_0 = const()[name = string("op_16095_axis_0"), val = int32(-1)]; tensor var_16095_cast_fp16_0, tensor var_16095_cast_fp16_1 = split(axis = var_16095_axis_0, split_sizes = var_16095_split_sizes_0, x = normed_853_cast_fp16)[name = string("op_16095_cast_fp16")]; tensor var_16098_cast_fp16 = mul(x = var_16095_cast_fp16_0, y = const_307_to_fp16)[name = string("op_16098_cast_fp16")]; tensor var_16104 = const()[name = string("op_16104"), val = tensor([1, 8, 1, 256])]; tensor q_195 = reshape(shape = var_16104, x = var_16098_cast_fp16)[name = string("q_195")]; tensor var_16106 = mul(x = q_195, y = cos_1)[name = string("op_16106")]; tensor var_16107_split_sizes_0 = const()[name = string("op_16107_split_sizes_0"), val = tensor([128, 128])]; int32 var_16107_axis_0 = const()[name = string("op_16107_axis_0"), val = int32(-1)]; tensor var_16107_0, tensor var_16107_1 = split(axis = var_16107_axis_0, split_sizes = var_16107_split_sizes_0, x = q_195)[name = string("op_16107")]; fp16 const_498_promoted = const()[name = string("const_498_promoted"), val = fp16(-0x1p+0)]; tensor var_16109 = mul(x = var_16107_1, y = const_498_promoted)[name = string("op_16109")]; int32 var_16111 = const()[name = string("op_16111"), val = int32(-1)]; bool var_16112_interleave_0 = const()[name = string("op_16112_interleave_0"), val = bool(false)]; tensor var_16112 = concat(axis = var_16111, interleave = var_16112_interleave_0, values = (var_16109, var_16107_0))[name = string("op_16112")]; tensor var_16113 = mul(x = var_16112, y = sin_1)[name = string("op_16113")]; tensor q_197 = add(x = var_16106, y = var_16113)[name = string("q_197")]; bool var_16137_transpose_x_0 = const()[name = string("op_16137_transpose_x_0"), val = bool(false)]; bool var_16137_transpose_y_0 = const()[name = string("op_16137_transpose_y_0"), val = bool(false)]; tensor var_16137_cast_fp16 = matmul(transpose_x = var_16137_transpose_x_0, transpose_y = var_16137_transpose_y_0, x = q_197, y = transpose_153_cast_fp16)[name = string("op_16137_cast_fp16")]; tensor attn_weights_165_cast_fp16 = add(x = var_16137_cast_fp16, y = causal_mask)[name = string("attn_weights_165_cast_fp16")]; int32 var_16147 = const()[name = string("op_16147"), val = int32(-1)]; tensor var_16149_cast_fp16 = softmax(axis = var_16147, x = attn_weights_165_cast_fp16)[name = string("op_16149_cast_fp16")]; bool var_16165_transpose_x_0 = const()[name = string("op_16165_transpose_x_0"), val = bool(false)]; bool var_16165_transpose_y_0 = const()[name = string("op_16165_transpose_y_0"), val = bool(false)]; tensor var_16165_cast_fp16 = matmul(transpose_x = var_16165_transpose_x_0, transpose_y = var_16165_transpose_y_0, x = var_16149_cast_fp16, y = V_expanded_27_cast_fp16)[name = string("op_16165_cast_fp16")]; tensor var_16175 = const()[name = string("op_16175"), val = tensor([0, 2, 1, 3])]; tensor var_16182 = const()[name = string("op_16182"), val = tensor([1, 1, -1])]; tensor var_16176 = transpose(perm = var_16175, x = var_16165_cast_fp16)[name = string("transpose_55")]; tensor attn_output_165 = reshape(shape = var_16182, x = var_16176)[name = string("attn_output_165")]; tensor var_16187 = const()[name = string("op_16187"), val = tensor([0, 2, 1])]; tensor squeeze_27_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2301166656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2302739584))))[name = string("squeeze_27_palettized")]; string var_16203_pad_type_0 = const()[name = string("op_16203_pad_type_0"), val = string("valid")]; int32 var_16203_groups_0 = const()[name = string("op_16203_groups_0"), val = int32(1)]; tensor var_16203_strides_0 = const()[name = string("op_16203_strides_0"), val = tensor([1])]; tensor var_16203_pad_0 = const()[name = string("op_16203_pad_0"), val = tensor([0, 0])]; tensor var_16203_dilations_0 = const()[name = string("op_16203_dilations_0"), val = tensor([1])]; tensor var_16188 = transpose(perm = var_16187, x = attn_output_165)[name = string("transpose_54")]; tensor var_16203 = conv(dilations = var_16203_dilations_0, groups = var_16203_groups_0, pad = var_16203_pad_0, pad_type = var_16203_pad_type_0, strides = var_16203_strides_0, weight = squeeze_27_palettized, x = var_16188)[name = string("op_16203")]; tensor var_16207 = const()[name = string("op_16207"), val = tensor([0, 2, 1])]; int32 var_16213 = const()[name = string("op_16213"), val = int32(-1)]; fp16 const_499_promoted_to_fp16 = const()[name = string("const_499_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_885 = transpose(perm = var_16207, x = var_16203)[name = string("transpose_53")]; tensor var_16219_cast_fp16 = mul(x = x_885, y = const_499_promoted_to_fp16)[name = string("op_16219_cast_fp16")]; bool input_807_interleave_0 = const()[name = string("input_807_interleave_0"), val = bool(false)]; tensor input_807_cast_fp16 = concat(axis = var_16213, interleave = input_807_interleave_0, values = (x_885, var_16219_cast_fp16))[name = string("input_807_cast_fp16")]; tensor normed_857_axes_0 = const()[name = string("normed_857_axes_0"), val = tensor([-1])]; fp16 var_16211_to_fp16 = const()[name = string("op_16211_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_857_cast_fp16 = layer_norm(axes = normed_857_axes_0, epsilon = var_16211_to_fp16, x = input_807_cast_fp16)[name = string("normed_857_cast_fp16")]; tensor var_16224_split_sizes_0 = const()[name = string("op_16224_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_16224_axis_0 = const()[name = string("op_16224_axis_0"), val = int32(-1)]; tensor var_16224_cast_fp16_0, tensor var_16224_cast_fp16_1 = split(axis = var_16224_axis_0, split_sizes = var_16224_split_sizes_0, x = normed_857_cast_fp16)[name = string("op_16224_cast_fp16")]; tensor const_500_to_fp16 = const()[name = string("const_500_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2302741184)))]; tensor var_16227_cast_fp16 = mul(x = var_16224_cast_fp16_0, y = const_500_to_fp16)[name = string("op_16227_cast_fp16")]; tensor x_889_cast_fp16 = add(x = x_877_cast_fp16, y = var_16227_cast_fp16)[name = string("x_889_cast_fp16")]; int32 var_16234 = const()[name = string("op_16234"), val = int32(-1)]; fp16 const_501_promoted_to_fp16 = const()[name = string("const_501_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_16240_cast_fp16 = mul(x = x_889_cast_fp16, y = const_501_promoted_to_fp16)[name = string("op_16240_cast_fp16")]; bool input_809_interleave_0 = const()[name = string("input_809_interleave_0"), val = bool(false)]; tensor input_809_cast_fp16 = concat(axis = var_16234, interleave = input_809_interleave_0, values = (x_889_cast_fp16, var_16240_cast_fp16))[name = string("input_809_cast_fp16")]; tensor normed_861_axes_0 = const()[name = string("normed_861_axes_0"), val = tensor([-1])]; fp16 var_16232_to_fp16 = const()[name = string("op_16232_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_861_cast_fp16 = layer_norm(axes = normed_861_axes_0, epsilon = var_16232_to_fp16, x = input_809_cast_fp16)[name = string("normed_861_cast_fp16")]; tensor var_16245_split_sizes_0 = const()[name = string("op_16245_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_16245_axis_0 = const()[name = string("op_16245_axis_0"), val = int32(-1)]; tensor var_16245_cast_fp16_0, tensor var_16245_cast_fp16_1 = split(axis = var_16245_axis_0, split_sizes = var_16245_split_sizes_0, x = normed_861_cast_fp16)[name = string("op_16245_cast_fp16")]; tensor const_502_to_fp16 = const()[name = string("const_502_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2302744320)))]; tensor var_16248_cast_fp16 = mul(x = var_16245_cast_fp16_0, y = const_502_to_fp16)[name = string("op_16248_cast_fp16")]; tensor var_16261 = const()[name = string("op_16261"), val = tensor([0, 2, 1])]; tensor input_811_axes_0 = const()[name = string("input_811_axes_0"), val = tensor([2])]; tensor var_16262 = transpose(perm = var_16261, x = var_16248_cast_fp16)[name = string("transpose_52")]; tensor input_811 = expand_dims(axes = input_811_axes_0, x = var_16262)[name = string("input_811")]; string gate_109_pad_type_0 = const()[name = string("gate_109_pad_type_0"), val = string("valid")]; tensor gate_109_strides_0 = const()[name = string("gate_109_strides_0"), val = tensor([1, 1])]; tensor gate_109_pad_0 = const()[name = string("gate_109_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_109_dilations_0 = const()[name = string("gate_109_dilations_0"), val = tensor([1, 1])]; int32 gate_109_groups_0 = const()[name = string("gate_109_groups_0"), val = int32(1)]; tensor gate_109 = conv(dilations = gate_109_dilations_0, groups = gate_109_groups_0, pad = gate_109_pad_0, pad_type = gate_109_pad_type_0, strides = gate_109_strides_0, weight = layers_27_mlp_gate_proj_weight_palettized, x = input_811)[name = string("gate_109")]; string up_55_pad_type_0 = const()[name = string("up_55_pad_type_0"), val = string("valid")]; tensor up_55_strides_0 = const()[name = string("up_55_strides_0"), val = tensor([1, 1])]; tensor up_55_pad_0 = const()[name = string("up_55_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_55_dilations_0 = const()[name = string("up_55_dilations_0"), val = tensor([1, 1])]; int32 up_55_groups_0 = const()[name = string("up_55_groups_0"), val = int32(1)]; tensor up_55 = conv(dilations = up_55_dilations_0, groups = up_55_groups_0, pad = up_55_pad_0, pad_type = up_55_pad_type_0, strides = up_55_strides_0, weight = layers_27_mlp_up_proj_weight_palettized, x = input_811)[name = string("up_55")]; string gate_111_mode_0 = const()[name = string("gate_111_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_111 = gelu(mode = gate_111_mode_0, x = gate_109)[name = string("gate_111")]; tensor input_813 = mul(x = gate_111, y = up_55)[name = string("input_813")]; string mlp_out_55_pad_type_0 = const()[name = string("mlp_out_55_pad_type_0"), val = string("valid")]; tensor mlp_out_55_strides_0 = const()[name = string("mlp_out_55_strides_0"), val = tensor([1, 1])]; tensor mlp_out_55_pad_0 = const()[name = string("mlp_out_55_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_55_dilations_0 = const()[name = string("mlp_out_55_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_55_groups_0 = const()[name = string("mlp_out_55_groups_0"), val = int32(1)]; tensor mlp_out_55 = conv(dilations = mlp_out_55_dilations_0, groups = mlp_out_55_groups_0, pad = mlp_out_55_pad_0, pad_type = mlp_out_55_pad_type_0, strides = mlp_out_55_strides_0, weight = layers_27_mlp_down_proj_weight_palettized, x = input_813)[name = string("mlp_out_55")]; tensor var_16302_axes_0 = const()[name = string("op_16302_axes_0"), val = tensor([2])]; tensor var_16302 = squeeze(axes = var_16302_axes_0, x = mlp_out_55)[name = string("op_16302")]; tensor var_16306 = const()[name = string("op_16306"), val = tensor([0, 2, 1])]; int32 var_16312 = const()[name = string("op_16312"), val = int32(-1)]; fp16 const_503_promoted_to_fp16 = const()[name = string("const_503_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_893 = transpose(perm = var_16306, x = var_16302)[name = string("transpose_51")]; tensor var_16318_cast_fp16 = mul(x = x_893, y = const_503_promoted_to_fp16)[name = string("op_16318_cast_fp16")]; bool input_815_interleave_0 = const()[name = string("input_815_interleave_0"), val = bool(false)]; tensor input_815_cast_fp16 = concat(axis = var_16312, interleave = input_815_interleave_0, values = (x_893, var_16318_cast_fp16))[name = string("input_815_cast_fp16")]; tensor normed_865_axes_0 = const()[name = string("normed_865_axes_0"), val = tensor([-1])]; fp16 var_16310_to_fp16 = const()[name = string("op_16310_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_865_cast_fp16 = layer_norm(axes = normed_865_axes_0, epsilon = var_16310_to_fp16, x = input_815_cast_fp16)[name = string("normed_865_cast_fp16")]; tensor var_16323_split_sizes_0 = const()[name = string("op_16323_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_16323_axis_0 = const()[name = string("op_16323_axis_0"), val = int32(-1)]; tensor var_16323_cast_fp16_0, tensor var_16323_cast_fp16_1 = split(axis = var_16323_axis_0, split_sizes = var_16323_split_sizes_0, x = normed_865_cast_fp16)[name = string("op_16323_cast_fp16")]; tensor const_504_to_fp16 = const()[name = string("const_504_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2302747456)))]; tensor var_16326_cast_fp16 = mul(x = var_16323_cast_fp16_0, y = const_504_to_fp16)[name = string("op_16326_cast_fp16")]; tensor hidden_states_337_cast_fp16 = add(x = x_889_cast_fp16, y = var_16326_cast_fp16)[name = string("hidden_states_337_cast_fp16")]; tensor per_layer_slice_55_begin_0 = const()[name = string("per_layer_slice_55_begin_0"), val = tensor([0, 0, 6912])]; tensor per_layer_slice_55_end_0 = const()[name = string("per_layer_slice_55_end_0"), val = tensor([1, 1, 7168])]; tensor per_layer_slice_55_end_mask_0 = const()[name = string("per_layer_slice_55_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_55 = slice_by_index(begin = per_layer_slice_55_begin_0, end = per_layer_slice_55_end_0, end_mask = per_layer_slice_55_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_55")]; tensor gated_109 = linear(bias = linear_1_bias_0, weight = layers_27_per_layer_input_gate_weight_palettized, x = hidden_states_337_cast_fp16)[name = string("linear_55")]; string gated_111_mode_0 = const()[name = string("gated_111_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_111 = gelu(mode = gated_111_mode_0, x = gated_109)[name = string("gated_111")]; tensor input_819 = mul(x = gated_111, y = per_layer_slice_55)[name = string("input_819")]; tensor x_897 = linear(bias = linear_2_bias_0, weight = layers_27_per_layer_projection_weight_palettized, x = input_819)[name = string("linear_56")]; int32 var_16363 = const()[name = string("op_16363"), val = int32(-1)]; fp16 const_505_promoted_to_fp16 = const()[name = string("const_505_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_16369_cast_fp16 = mul(x = x_897, y = const_505_promoted_to_fp16)[name = string("op_16369_cast_fp16")]; bool input_821_interleave_0 = const()[name = string("input_821_interleave_0"), val = bool(false)]; tensor input_821_cast_fp16 = concat(axis = var_16363, interleave = input_821_interleave_0, values = (x_897, var_16369_cast_fp16))[name = string("input_821_cast_fp16")]; tensor normed_869_axes_0 = const()[name = string("normed_869_axes_0"), val = tensor([-1])]; fp16 var_16361_to_fp16 = const()[name = string("op_16361_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_869_cast_fp16 = layer_norm(axes = normed_869_axes_0, epsilon = var_16361_to_fp16, x = input_821_cast_fp16)[name = string("normed_869_cast_fp16")]; tensor var_16374_split_sizes_0 = const()[name = string("op_16374_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_16374_axis_0 = const()[name = string("op_16374_axis_0"), val = int32(-1)]; tensor var_16374_cast_fp16_0, tensor var_16374_cast_fp16_1 = split(axis = var_16374_axis_0, split_sizes = var_16374_split_sizes_0, x = normed_869_cast_fp16)[name = string("op_16374_cast_fp16")]; tensor const_506_to_fp16 = const()[name = string("const_506_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2302750592)))]; tensor var_16377_cast_fp16 = mul(x = var_16374_cast_fp16_0, y = const_506_to_fp16)[name = string("op_16377_cast_fp16")]; tensor hidden_states_341_cast_fp16 = add(x = hidden_states_337_cast_fp16, y = var_16377_cast_fp16)[name = string("hidden_states_341_cast_fp16")]; tensor layers_27_layer_scalar_to_fp16 = const()[name = string("layers_27_layer_scalar_to_fp16"), val = tensor([0x1.a4p-1])]; tensor x_901_cast_fp16 = mul(x = hidden_states_341_cast_fp16, y = layers_27_layer_scalar_to_fp16)[name = string("x_901_cast_fp16")]; int32 var_16385 = const()[name = string("op_16385"), val = int32(-1)]; fp16 const_507_promoted_to_fp16 = const()[name = string("const_507_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_16391_cast_fp16 = mul(x = x_901_cast_fp16, y = const_507_promoted_to_fp16)[name = string("op_16391_cast_fp16")]; bool input_823_interleave_0 = const()[name = string("input_823_interleave_0"), val = bool(false)]; tensor input_823_cast_fp16 = concat(axis = var_16385, interleave = input_823_interleave_0, values = (x_901_cast_fp16, var_16391_cast_fp16))[name = string("input_823_cast_fp16")]; tensor normed_873_axes_0 = const()[name = string("normed_873_axes_0"), val = tensor([-1])]; fp16 var_16383_to_fp16 = const()[name = string("op_16383_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_873_cast_fp16 = layer_norm(axes = normed_873_axes_0, epsilon = var_16383_to_fp16, x = input_823_cast_fp16)[name = string("normed_873_cast_fp16")]; tensor var_16396_split_sizes_0 = const()[name = string("op_16396_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_16396_axis_0 = const()[name = string("op_16396_axis_0"), val = int32(-1)]; tensor var_16396_cast_fp16_0, tensor var_16396_cast_fp16_1 = split(axis = var_16396_axis_0, split_sizes = var_16396_split_sizes_0, x = normed_873_cast_fp16)[name = string("op_16396_cast_fp16")]; tensor const_508_to_fp16 = const()[name = string("const_508_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2302753728)))]; tensor var_16399_cast_fp16 = mul(x = var_16396_cast_fp16_0, y = const_508_to_fp16)[name = string("op_16399_cast_fp16")]; tensor var_16407 = const()[name = string("op_16407"), val = tensor([0, 2, 1])]; tensor var_16410_axes_0 = const()[name = string("op_16410_axes_0"), val = tensor([2])]; tensor var_16408_cast_fp16 = transpose(perm = var_16407, x = var_16399_cast_fp16)[name = string("transpose_50")]; tensor var_16410_cast_fp16 = expand_dims(axes = var_16410_axes_0, x = var_16408_cast_fp16)[name = string("op_16410_cast_fp16")]; string var_16426_pad_type_0 = const()[name = string("op_16426_pad_type_0"), val = string("valid")]; tensor var_16426_strides_0 = const()[name = string("op_16426_strides_0"), val = tensor([1, 1])]; tensor var_16426_pad_0 = const()[name = string("op_16426_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_16426_dilations_0 = const()[name = string("op_16426_dilations_0"), val = tensor([1, 1])]; int32 var_16426_groups_0 = const()[name = string("op_16426_groups_0"), val = int32(1)]; tensor var_16426 = conv(dilations = var_16426_dilations_0, groups = var_16426_groups_0, pad = var_16426_pad_0, pad_type = var_16426_pad_type_0, strides = var_16426_strides_0, weight = layers_28_self_attn_q_proj_weight_palettized, x = var_16410_cast_fp16)[name = string("op_16426")]; tensor var_16431 = const()[name = string("op_16431"), val = tensor([1, 8, 256, 1])]; tensor var_16432 = reshape(shape = var_16431, x = var_16426)[name = string("op_16432")]; tensor var_16437 = const()[name = string("op_16437"), val = tensor([0, 1, 3, 2])]; tensor var_16447 = const()[name = string("op_16447"), val = tensor([1, 8, 256])]; tensor var_16438 = transpose(perm = var_16437, x = var_16432)[name = string("transpose_49")]; tensor x_905 = reshape(shape = var_16447, x = var_16438)[name = string("x_905")]; int32 var_16453 = const()[name = string("op_16453"), val = int32(-1)]; fp16 const_509_promoted_to_fp16 = const()[name = string("const_509_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_16459_cast_fp16 = mul(x = x_905, y = const_509_promoted_to_fp16)[name = string("op_16459_cast_fp16")]; bool input_827_interleave_0 = const()[name = string("input_827_interleave_0"), val = bool(false)]; tensor input_827_cast_fp16 = concat(axis = var_16453, interleave = input_827_interleave_0, values = (x_905, var_16459_cast_fp16))[name = string("input_827_cast_fp16")]; tensor normed_877_axes_0 = const()[name = string("normed_877_axes_0"), val = tensor([-1])]; fp16 var_16451_to_fp16 = const()[name = string("op_16451_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_877_cast_fp16 = layer_norm(axes = normed_877_axes_0, epsilon = var_16451_to_fp16, x = input_827_cast_fp16)[name = string("normed_877_cast_fp16")]; tensor var_16464_split_sizes_0 = const()[name = string("op_16464_split_sizes_0"), val = tensor([256, 256])]; int32 var_16464_axis_0 = const()[name = string("op_16464_axis_0"), val = int32(-1)]; tensor var_16464_cast_fp16_0, tensor var_16464_cast_fp16_1 = split(axis = var_16464_axis_0, split_sizes = var_16464_split_sizes_0, x = normed_877_cast_fp16)[name = string("op_16464_cast_fp16")]; tensor var_16467_cast_fp16 = mul(x = var_16464_cast_fp16_0, y = const_307_to_fp16)[name = string("op_16467_cast_fp16")]; tensor var_16473 = const()[name = string("op_16473"), val = tensor([1, 8, 1, 256])]; tensor q_201 = reshape(shape = var_16473, x = var_16467_cast_fp16)[name = string("q_201")]; tensor var_16475 = mul(x = q_201, y = cos_1)[name = string("op_16475")]; tensor var_16476_split_sizes_0 = const()[name = string("op_16476_split_sizes_0"), val = tensor([128, 128])]; int32 var_16476_axis_0 = const()[name = string("op_16476_axis_0"), val = int32(-1)]; tensor var_16476_0, tensor var_16476_1 = split(axis = var_16476_axis_0, split_sizes = var_16476_split_sizes_0, x = q_201)[name = string("op_16476")]; fp16 const_511_promoted = const()[name = string("const_511_promoted"), val = fp16(-0x1p+0)]; tensor var_16478 = mul(x = var_16476_1, y = const_511_promoted)[name = string("op_16478")]; int32 var_16480 = const()[name = string("op_16480"), val = int32(-1)]; bool var_16481_interleave_0 = const()[name = string("op_16481_interleave_0"), val = bool(false)]; tensor var_16481 = concat(axis = var_16480, interleave = var_16481_interleave_0, values = (var_16478, var_16476_0))[name = string("op_16481")]; tensor var_16482 = mul(x = var_16481, y = sin_1)[name = string("op_16482")]; tensor q_203 = add(x = var_16475, y = var_16482)[name = string("q_203")]; bool var_16506_transpose_x_0 = const()[name = string("op_16506_transpose_x_0"), val = bool(false)]; bool var_16506_transpose_y_0 = const()[name = string("op_16506_transpose_y_0"), val = bool(false)]; tensor var_16506_cast_fp16 = matmul(transpose_x = var_16506_transpose_x_0, transpose_y = var_16506_transpose_y_0, x = q_203, y = transpose_153_cast_fp16)[name = string("op_16506_cast_fp16")]; tensor attn_weights_171_cast_fp16 = add(x = var_16506_cast_fp16, y = causal_mask)[name = string("attn_weights_171_cast_fp16")]; int32 var_16516 = const()[name = string("op_16516"), val = int32(-1)]; tensor var_16518_cast_fp16 = softmax(axis = var_16516, x = attn_weights_171_cast_fp16)[name = string("op_16518_cast_fp16")]; bool var_16534_transpose_x_0 = const()[name = string("op_16534_transpose_x_0"), val = bool(false)]; bool var_16534_transpose_y_0 = const()[name = string("op_16534_transpose_y_0"), val = bool(false)]; tensor var_16534_cast_fp16 = matmul(transpose_x = var_16534_transpose_x_0, transpose_y = var_16534_transpose_y_0, x = var_16518_cast_fp16, y = V_expanded_27_cast_fp16)[name = string("op_16534_cast_fp16")]; tensor var_16544 = const()[name = string("op_16544"), val = tensor([0, 2, 1, 3])]; tensor var_16551 = const()[name = string("op_16551"), val = tensor([1, 1, -1])]; tensor var_16545 = transpose(perm = var_16544, x = var_16534_cast_fp16)[name = string("transpose_48")]; tensor attn_output_171 = reshape(shape = var_16551, x = var_16545)[name = string("attn_output_171")]; tensor var_16556 = const()[name = string("op_16556"), val = tensor([0, 2, 1])]; tensor squeeze_28_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2302756864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2304329792))))[name = string("squeeze_28_palettized")]; string var_16572_pad_type_0 = const()[name = string("op_16572_pad_type_0"), val = string("valid")]; int32 var_16572_groups_0 = const()[name = string("op_16572_groups_0"), val = int32(1)]; tensor var_16572_strides_0 = const()[name = string("op_16572_strides_0"), val = tensor([1])]; tensor var_16572_pad_0 = const()[name = string("op_16572_pad_0"), val = tensor([0, 0])]; tensor var_16572_dilations_0 = const()[name = string("op_16572_dilations_0"), val = tensor([1])]; tensor var_16557 = transpose(perm = var_16556, x = attn_output_171)[name = string("transpose_47")]; tensor var_16572 = conv(dilations = var_16572_dilations_0, groups = var_16572_groups_0, pad = var_16572_pad_0, pad_type = var_16572_pad_type_0, strides = var_16572_strides_0, weight = squeeze_28_palettized, x = var_16557)[name = string("op_16572")]; tensor var_16576 = const()[name = string("op_16576"), val = tensor([0, 2, 1])]; int32 var_16582 = const()[name = string("op_16582"), val = int32(-1)]; fp16 const_512_promoted_to_fp16 = const()[name = string("const_512_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_909 = transpose(perm = var_16576, x = var_16572)[name = string("transpose_46")]; tensor var_16588_cast_fp16 = mul(x = x_909, y = const_512_promoted_to_fp16)[name = string("op_16588_cast_fp16")]; bool input_831_interleave_0 = const()[name = string("input_831_interleave_0"), val = bool(false)]; tensor input_831_cast_fp16 = concat(axis = var_16582, interleave = input_831_interleave_0, values = (x_909, var_16588_cast_fp16))[name = string("input_831_cast_fp16")]; tensor normed_881_axes_0 = const()[name = string("normed_881_axes_0"), val = tensor([-1])]; fp16 var_16580_to_fp16 = const()[name = string("op_16580_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_881_cast_fp16 = layer_norm(axes = normed_881_axes_0, epsilon = var_16580_to_fp16, x = input_831_cast_fp16)[name = string("normed_881_cast_fp16")]; tensor var_16593_split_sizes_0 = const()[name = string("op_16593_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_16593_axis_0 = const()[name = string("op_16593_axis_0"), val = int32(-1)]; tensor var_16593_cast_fp16_0, tensor var_16593_cast_fp16_1 = split(axis = var_16593_axis_0, split_sizes = var_16593_split_sizes_0, x = normed_881_cast_fp16)[name = string("op_16593_cast_fp16")]; tensor const_513_to_fp16 = const()[name = string("const_513_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2304331392)))]; tensor var_16596_cast_fp16 = mul(x = var_16593_cast_fp16_0, y = const_513_to_fp16)[name = string("op_16596_cast_fp16")]; tensor x_913_cast_fp16 = add(x = x_901_cast_fp16, y = var_16596_cast_fp16)[name = string("x_913_cast_fp16")]; int32 var_16603 = const()[name = string("op_16603"), val = int32(-1)]; fp16 const_514_promoted_to_fp16 = const()[name = string("const_514_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_16609_cast_fp16 = mul(x = x_913_cast_fp16, y = const_514_promoted_to_fp16)[name = string("op_16609_cast_fp16")]; bool input_833_interleave_0 = const()[name = string("input_833_interleave_0"), val = bool(false)]; tensor input_833_cast_fp16 = concat(axis = var_16603, interleave = input_833_interleave_0, values = (x_913_cast_fp16, var_16609_cast_fp16))[name = string("input_833_cast_fp16")]; tensor normed_885_axes_0 = const()[name = string("normed_885_axes_0"), val = tensor([-1])]; fp16 var_16601_to_fp16 = const()[name = string("op_16601_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_885_cast_fp16 = layer_norm(axes = normed_885_axes_0, epsilon = var_16601_to_fp16, x = input_833_cast_fp16)[name = string("normed_885_cast_fp16")]; tensor var_16614_split_sizes_0 = const()[name = string("op_16614_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_16614_axis_0 = const()[name = string("op_16614_axis_0"), val = int32(-1)]; tensor var_16614_cast_fp16_0, tensor var_16614_cast_fp16_1 = split(axis = var_16614_axis_0, split_sizes = var_16614_split_sizes_0, x = normed_885_cast_fp16)[name = string("op_16614_cast_fp16")]; tensor const_515_to_fp16 = const()[name = string("const_515_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2304334528)))]; tensor var_16617_cast_fp16 = mul(x = var_16614_cast_fp16_0, y = const_515_to_fp16)[name = string("op_16617_cast_fp16")]; tensor var_16630 = const()[name = string("op_16630"), val = tensor([0, 2, 1])]; tensor input_835_axes_0 = const()[name = string("input_835_axes_0"), val = tensor([2])]; tensor var_16631 = transpose(perm = var_16630, x = var_16617_cast_fp16)[name = string("transpose_45")]; tensor input_835 = expand_dims(axes = input_835_axes_0, x = var_16631)[name = string("input_835")]; string gate_113_pad_type_0 = const()[name = string("gate_113_pad_type_0"), val = string("valid")]; tensor gate_113_strides_0 = const()[name = string("gate_113_strides_0"), val = tensor([1, 1])]; tensor gate_113_pad_0 = const()[name = string("gate_113_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_113_dilations_0 = const()[name = string("gate_113_dilations_0"), val = tensor([1, 1])]; int32 gate_113_groups_0 = const()[name = string("gate_113_groups_0"), val = int32(1)]; tensor gate_113 = conv(dilations = gate_113_dilations_0, groups = gate_113_groups_0, pad = gate_113_pad_0, pad_type = gate_113_pad_type_0, strides = gate_113_strides_0, weight = layers_28_mlp_gate_proj_weight_palettized, x = input_835)[name = string("gate_113")]; string up_57_pad_type_0 = const()[name = string("up_57_pad_type_0"), val = string("valid")]; tensor up_57_strides_0 = const()[name = string("up_57_strides_0"), val = tensor([1, 1])]; tensor up_57_pad_0 = const()[name = string("up_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_57_dilations_0 = const()[name = string("up_57_dilations_0"), val = tensor([1, 1])]; int32 up_57_groups_0 = const()[name = string("up_57_groups_0"), val = int32(1)]; tensor up_57 = conv(dilations = up_57_dilations_0, groups = up_57_groups_0, pad = up_57_pad_0, pad_type = up_57_pad_type_0, strides = up_57_strides_0, weight = layers_28_mlp_up_proj_weight_palettized, x = input_835)[name = string("up_57")]; string gate_115_mode_0 = const()[name = string("gate_115_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_115 = gelu(mode = gate_115_mode_0, x = gate_113)[name = string("gate_115")]; tensor input_837 = mul(x = gate_115, y = up_57)[name = string("input_837")]; string mlp_out_57_pad_type_0 = const()[name = string("mlp_out_57_pad_type_0"), val = string("valid")]; tensor mlp_out_57_strides_0 = const()[name = string("mlp_out_57_strides_0"), val = tensor([1, 1])]; tensor mlp_out_57_pad_0 = const()[name = string("mlp_out_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_57_dilations_0 = const()[name = string("mlp_out_57_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_57_groups_0 = const()[name = string("mlp_out_57_groups_0"), val = int32(1)]; tensor mlp_out_57 = conv(dilations = mlp_out_57_dilations_0, groups = mlp_out_57_groups_0, pad = mlp_out_57_pad_0, pad_type = mlp_out_57_pad_type_0, strides = mlp_out_57_strides_0, weight = layers_28_mlp_down_proj_weight_palettized, x = input_837)[name = string("mlp_out_57")]; tensor var_16671_axes_0 = const()[name = string("op_16671_axes_0"), val = tensor([2])]; tensor var_16671 = squeeze(axes = var_16671_axes_0, x = mlp_out_57)[name = string("op_16671")]; tensor var_16675 = const()[name = string("op_16675"), val = tensor([0, 2, 1])]; int32 var_16681 = const()[name = string("op_16681"), val = int32(-1)]; fp16 const_516_promoted_to_fp16 = const()[name = string("const_516_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_917 = transpose(perm = var_16675, x = var_16671)[name = string("transpose_44")]; tensor var_16687_cast_fp16 = mul(x = x_917, y = const_516_promoted_to_fp16)[name = string("op_16687_cast_fp16")]; bool input_839_interleave_0 = const()[name = string("input_839_interleave_0"), val = bool(false)]; tensor input_839_cast_fp16 = concat(axis = var_16681, interleave = input_839_interleave_0, values = (x_917, var_16687_cast_fp16))[name = string("input_839_cast_fp16")]; tensor normed_889_axes_0 = const()[name = string("normed_889_axes_0"), val = tensor([-1])]; fp16 var_16679_to_fp16 = const()[name = string("op_16679_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_889_cast_fp16 = layer_norm(axes = normed_889_axes_0, epsilon = var_16679_to_fp16, x = input_839_cast_fp16)[name = string("normed_889_cast_fp16")]; tensor var_16692_split_sizes_0 = const()[name = string("op_16692_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_16692_axis_0 = const()[name = string("op_16692_axis_0"), val = int32(-1)]; tensor var_16692_cast_fp16_0, tensor var_16692_cast_fp16_1 = split(axis = var_16692_axis_0, split_sizes = var_16692_split_sizes_0, x = normed_889_cast_fp16)[name = string("op_16692_cast_fp16")]; tensor const_517_to_fp16 = const()[name = string("const_517_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2304337664)))]; tensor var_16695_cast_fp16 = mul(x = var_16692_cast_fp16_0, y = const_517_to_fp16)[name = string("op_16695_cast_fp16")]; tensor hidden_states_349_cast_fp16 = add(x = x_913_cast_fp16, y = var_16695_cast_fp16)[name = string("hidden_states_349_cast_fp16")]; tensor per_layer_slice_57_begin_0 = const()[name = string("per_layer_slice_57_begin_0"), val = tensor([0, 0, 7168])]; tensor per_layer_slice_57_end_0 = const()[name = string("per_layer_slice_57_end_0"), val = tensor([1, 1, 7424])]; tensor per_layer_slice_57_end_mask_0 = const()[name = string("per_layer_slice_57_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_57 = slice_by_index(begin = per_layer_slice_57_begin_0, end = per_layer_slice_57_end_0, end_mask = per_layer_slice_57_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_57")]; tensor gated_113 = linear(bias = linear_1_bias_0, weight = layers_28_per_layer_input_gate_weight_palettized, x = hidden_states_349_cast_fp16)[name = string("linear_57")]; string gated_115_mode_0 = const()[name = string("gated_115_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_115 = gelu(mode = gated_115_mode_0, x = gated_113)[name = string("gated_115")]; tensor input_843 = mul(x = gated_115, y = per_layer_slice_57)[name = string("input_843")]; tensor x_921 = linear(bias = linear_2_bias_0, weight = layers_28_per_layer_projection_weight_palettized, x = input_843)[name = string("linear_58")]; int32 var_16732 = const()[name = string("op_16732"), val = int32(-1)]; fp16 const_518_promoted_to_fp16 = const()[name = string("const_518_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_16738_cast_fp16 = mul(x = x_921, y = const_518_promoted_to_fp16)[name = string("op_16738_cast_fp16")]; bool input_845_interleave_0 = const()[name = string("input_845_interleave_0"), val = bool(false)]; tensor input_845_cast_fp16 = concat(axis = var_16732, interleave = input_845_interleave_0, values = (x_921, var_16738_cast_fp16))[name = string("input_845_cast_fp16")]; tensor normed_893_axes_0 = const()[name = string("normed_893_axes_0"), val = tensor([-1])]; fp16 var_16730_to_fp16 = const()[name = string("op_16730_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_893_cast_fp16 = layer_norm(axes = normed_893_axes_0, epsilon = var_16730_to_fp16, x = input_845_cast_fp16)[name = string("normed_893_cast_fp16")]; tensor var_16743_split_sizes_0 = const()[name = string("op_16743_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_16743_axis_0 = const()[name = string("op_16743_axis_0"), val = int32(-1)]; tensor var_16743_cast_fp16_0, tensor var_16743_cast_fp16_1 = split(axis = var_16743_axis_0, split_sizes = var_16743_split_sizes_0, x = normed_893_cast_fp16)[name = string("op_16743_cast_fp16")]; tensor const_519_to_fp16 = const()[name = string("const_519_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2304340800)))]; tensor var_16746_cast_fp16 = mul(x = var_16743_cast_fp16_0, y = const_519_to_fp16)[name = string("op_16746_cast_fp16")]; tensor hidden_states_353_cast_fp16 = add(x = hidden_states_349_cast_fp16, y = var_16746_cast_fp16)[name = string("hidden_states_353_cast_fp16")]; tensor layers_28_layer_scalar_to_fp16 = const()[name = string("layers_28_layer_scalar_to_fp16"), val = tensor([0x1.a4p-1])]; tensor x_925_cast_fp16 = mul(x = hidden_states_353_cast_fp16, y = layers_28_layer_scalar_to_fp16)[name = string("x_925_cast_fp16")]; int32 var_16754 = const()[name = string("op_16754"), val = int32(-1)]; fp16 const_520_promoted_to_fp16 = const()[name = string("const_520_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_16760_cast_fp16 = mul(x = x_925_cast_fp16, y = const_520_promoted_to_fp16)[name = string("op_16760_cast_fp16")]; bool input_847_interleave_0 = const()[name = string("input_847_interleave_0"), val = bool(false)]; tensor input_847_cast_fp16 = concat(axis = var_16754, interleave = input_847_interleave_0, values = (x_925_cast_fp16, var_16760_cast_fp16))[name = string("input_847_cast_fp16")]; tensor normed_897_axes_0 = const()[name = string("normed_897_axes_0"), val = tensor([-1])]; fp16 var_16752_to_fp16 = const()[name = string("op_16752_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_897_cast_fp16 = layer_norm(axes = normed_897_axes_0, epsilon = var_16752_to_fp16, x = input_847_cast_fp16)[name = string("normed_897_cast_fp16")]; tensor var_16765_split_sizes_0 = const()[name = string("op_16765_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_16765_axis_0 = const()[name = string("op_16765_axis_0"), val = int32(-1)]; tensor var_16765_cast_fp16_0, tensor var_16765_cast_fp16_1 = split(axis = var_16765_axis_0, split_sizes = var_16765_split_sizes_0, x = normed_897_cast_fp16)[name = string("op_16765_cast_fp16")]; tensor const_521_to_fp16 = const()[name = string("const_521_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2304343936)))]; tensor var_16768_cast_fp16 = mul(x = var_16765_cast_fp16_0, y = const_521_to_fp16)[name = string("op_16768_cast_fp16")]; tensor var_16776 = const()[name = string("op_16776"), val = tensor([0, 2, 1])]; tensor var_16779_axes_0 = const()[name = string("op_16779_axes_0"), val = tensor([2])]; tensor var_16777_cast_fp16 = transpose(perm = var_16776, x = var_16768_cast_fp16)[name = string("transpose_43")]; tensor var_16779_cast_fp16 = expand_dims(axes = var_16779_axes_0, x = var_16777_cast_fp16)[name = string("op_16779_cast_fp16")]; string var_16795_pad_type_0 = const()[name = string("op_16795_pad_type_0"), val = string("valid")]; tensor var_16795_strides_0 = const()[name = string("op_16795_strides_0"), val = tensor([1, 1])]; tensor var_16795_pad_0 = const()[name = string("op_16795_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_16795_dilations_0 = const()[name = string("op_16795_dilations_0"), val = tensor([1, 1])]; int32 var_16795_groups_0 = const()[name = string("op_16795_groups_0"), val = int32(1)]; tensor var_16795 = conv(dilations = var_16795_dilations_0, groups = var_16795_groups_0, pad = var_16795_pad_0, pad_type = var_16795_pad_type_0, strides = var_16795_strides_0, weight = layers_29_self_attn_q_proj_weight_palettized, x = var_16779_cast_fp16)[name = string("op_16795")]; tensor var_16800 = const()[name = string("op_16800"), val = tensor([1, 8, 512, 1])]; tensor var_16801 = reshape(shape = var_16800, x = var_16795)[name = string("op_16801")]; tensor var_16806 = const()[name = string("op_16806"), val = tensor([0, 1, 3, 2])]; tensor var_16816 = const()[name = string("op_16816"), val = tensor([1, 8, 512])]; tensor var_16807 = transpose(perm = var_16806, x = var_16801)[name = string("transpose_42")]; tensor x_929 = reshape(shape = var_16816, x = var_16807)[name = string("x_929")]; int32 var_16822 = const()[name = string("op_16822"), val = int32(-1)]; fp16 const_522_promoted_to_fp16 = const()[name = string("const_522_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_16828_cast_fp16 = mul(x = x_929, y = const_522_promoted_to_fp16)[name = string("op_16828_cast_fp16")]; bool input_851_interleave_0 = const()[name = string("input_851_interleave_0"), val = bool(false)]; tensor input_851_cast_fp16 = concat(axis = var_16822, interleave = input_851_interleave_0, values = (x_929, var_16828_cast_fp16))[name = string("input_851_cast_fp16")]; tensor normed_901_axes_0 = const()[name = string("normed_901_axes_0"), val = tensor([-1])]; fp16 var_16820_to_fp16 = const()[name = string("op_16820_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_901_cast_fp16 = layer_norm(axes = normed_901_axes_0, epsilon = var_16820_to_fp16, x = input_851_cast_fp16)[name = string("normed_901_cast_fp16")]; tensor var_16833_split_sizes_0 = const()[name = string("op_16833_split_sizes_0"), val = tensor([512, 512])]; int32 var_16833_axis_0 = const()[name = string("op_16833_axis_0"), val = int32(-1)]; tensor var_16833_cast_fp16_0, tensor var_16833_cast_fp16_1 = split(axis = var_16833_axis_0, split_sizes = var_16833_split_sizes_0, x = normed_901_cast_fp16)[name = string("op_16833_cast_fp16")]; tensor var_16836_cast_fp16 = mul(x = var_16833_cast_fp16_0, y = const_325_to_fp16)[name = string("op_16836_cast_fp16")]; tensor var_16842 = const()[name = string("op_16842"), val = tensor([1, 8, 1, 512])]; tensor q_207 = reshape(shape = var_16842, x = var_16836_cast_fp16)[name = string("q_207")]; tensor var_16844 = mul(x = q_207, y = cos)[name = string("op_16844")]; tensor var_16845_split_sizes_0 = const()[name = string("op_16845_split_sizes_0"), val = tensor([256, 256])]; int32 var_16845_axis_0 = const()[name = string("op_16845_axis_0"), val = int32(-1)]; tensor var_16845_0, tensor var_16845_1 = split(axis = var_16845_axis_0, split_sizes = var_16845_split_sizes_0, x = q_207)[name = string("op_16845")]; fp16 const_524_promoted = const()[name = string("const_524_promoted"), val = fp16(-0x1p+0)]; tensor var_16847 = mul(x = var_16845_1, y = const_524_promoted)[name = string("op_16847")]; int32 var_16849 = const()[name = string("op_16849"), val = int32(-1)]; bool var_16850_interleave_0 = const()[name = string("op_16850_interleave_0"), val = bool(false)]; tensor var_16850 = concat(axis = var_16849, interleave = var_16850_interleave_0, values = (var_16847, var_16845_0))[name = string("op_16850")]; tensor var_16851 = mul(x = var_16850, y = sin)[name = string("op_16851")]; tensor q_209 = add(x = var_16844, y = var_16851)[name = string("q_209")]; bool var_16875_transpose_x_0 = const()[name = string("op_16875_transpose_x_0"), val = bool(false)]; bool var_16875_transpose_y_0 = const()[name = string("op_16875_transpose_y_0"), val = bool(false)]; tensor var_16875_cast_fp16 = matmul(transpose_x = var_16875_transpose_x_0, transpose_y = var_16875_transpose_y_0, x = q_209, y = transpose_154_cast_fp16)[name = string("op_16875_cast_fp16")]; tensor attn_weights_177_cast_fp16 = add(x = var_16875_cast_fp16, y = causal_mask)[name = string("attn_weights_177_cast_fp16")]; int32 var_16885 = const()[name = string("op_16885"), val = int32(-1)]; tensor var_16887_cast_fp16 = softmax(axis = var_16885, x = attn_weights_177_cast_fp16)[name = string("op_16887_cast_fp16")]; bool var_16903_transpose_x_0 = const()[name = string("op_16903_transpose_x_0"), val = bool(false)]; bool var_16903_transpose_y_0 = const()[name = string("op_16903_transpose_y_0"), val = bool(false)]; tensor var_16903_cast_fp16 = matmul(transpose_x = var_16903_transpose_x_0, transpose_y = var_16903_transpose_y_0, x = var_16887_cast_fp16, y = V_expanded_29_cast_fp16)[name = string("op_16903_cast_fp16")]; tensor var_16913 = const()[name = string("op_16913"), val = tensor([0, 2, 1, 3])]; tensor var_16920 = const()[name = string("op_16920"), val = tensor([1, 1, -1])]; tensor var_16914 = transpose(perm = var_16913, x = var_16903_cast_fp16)[name = string("transpose_41")]; tensor attn_output_177 = reshape(shape = var_16920, x = var_16914)[name = string("attn_output_177")]; tensor var_16925 = const()[name = string("op_16925"), val = tensor([0, 2, 1])]; tensor squeeze_29_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2304347072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2307492864))))[name = string("squeeze_29_palettized")]; string var_16941_pad_type_0 = const()[name = string("op_16941_pad_type_0"), val = string("valid")]; int32 var_16941_groups_0 = const()[name = string("op_16941_groups_0"), val = int32(1)]; tensor var_16941_strides_0 = const()[name = string("op_16941_strides_0"), val = tensor([1])]; tensor var_16941_pad_0 = const()[name = string("op_16941_pad_0"), val = tensor([0, 0])]; tensor var_16941_dilations_0 = const()[name = string("op_16941_dilations_0"), val = tensor([1])]; tensor var_16926 = transpose(perm = var_16925, x = attn_output_177)[name = string("transpose_40")]; tensor var_16941 = conv(dilations = var_16941_dilations_0, groups = var_16941_groups_0, pad = var_16941_pad_0, pad_type = var_16941_pad_type_0, strides = var_16941_strides_0, weight = squeeze_29_palettized, x = var_16926)[name = string("op_16941")]; tensor var_16945 = const()[name = string("op_16945"), val = tensor([0, 2, 1])]; int32 var_16951 = const()[name = string("op_16951"), val = int32(-1)]; fp16 const_525_promoted_to_fp16 = const()[name = string("const_525_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_933 = transpose(perm = var_16945, x = var_16941)[name = string("transpose_39")]; tensor var_16957_cast_fp16 = mul(x = x_933, y = const_525_promoted_to_fp16)[name = string("op_16957_cast_fp16")]; bool input_855_interleave_0 = const()[name = string("input_855_interleave_0"), val = bool(false)]; tensor input_855_cast_fp16 = concat(axis = var_16951, interleave = input_855_interleave_0, values = (x_933, var_16957_cast_fp16))[name = string("input_855_cast_fp16")]; tensor normed_905_axes_0 = const()[name = string("normed_905_axes_0"), val = tensor([-1])]; fp16 var_16949_to_fp16 = const()[name = string("op_16949_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_905_cast_fp16 = layer_norm(axes = normed_905_axes_0, epsilon = var_16949_to_fp16, x = input_855_cast_fp16)[name = string("normed_905_cast_fp16")]; tensor var_16962_split_sizes_0 = const()[name = string("op_16962_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_16962_axis_0 = const()[name = string("op_16962_axis_0"), val = int32(-1)]; tensor var_16962_cast_fp16_0, tensor var_16962_cast_fp16_1 = split(axis = var_16962_axis_0, split_sizes = var_16962_split_sizes_0, x = normed_905_cast_fp16)[name = string("op_16962_cast_fp16")]; tensor const_526_to_fp16 = const()[name = string("const_526_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2307494464)))]; tensor var_16965_cast_fp16 = mul(x = var_16962_cast_fp16_0, y = const_526_to_fp16)[name = string("op_16965_cast_fp16")]; tensor x_937_cast_fp16 = add(x = x_925_cast_fp16, y = var_16965_cast_fp16)[name = string("x_937_cast_fp16")]; int32 var_16972 = const()[name = string("op_16972"), val = int32(-1)]; fp16 const_527_promoted_to_fp16 = const()[name = string("const_527_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_16978_cast_fp16 = mul(x = x_937_cast_fp16, y = const_527_promoted_to_fp16)[name = string("op_16978_cast_fp16")]; bool input_857_interleave_0 = const()[name = string("input_857_interleave_0"), val = bool(false)]; tensor input_857_cast_fp16 = concat(axis = var_16972, interleave = input_857_interleave_0, values = (x_937_cast_fp16, var_16978_cast_fp16))[name = string("input_857_cast_fp16")]; tensor normed_909_axes_0 = const()[name = string("normed_909_axes_0"), val = tensor([-1])]; fp16 var_16970_to_fp16 = const()[name = string("op_16970_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_909_cast_fp16 = layer_norm(axes = normed_909_axes_0, epsilon = var_16970_to_fp16, x = input_857_cast_fp16)[name = string("normed_909_cast_fp16")]; tensor var_16983_split_sizes_0 = const()[name = string("op_16983_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_16983_axis_0 = const()[name = string("op_16983_axis_0"), val = int32(-1)]; tensor var_16983_cast_fp16_0, tensor var_16983_cast_fp16_1 = split(axis = var_16983_axis_0, split_sizes = var_16983_split_sizes_0, x = normed_909_cast_fp16)[name = string("op_16983_cast_fp16")]; tensor const_528_to_fp16 = const()[name = string("const_528_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2307497600)))]; tensor var_16986_cast_fp16 = mul(x = var_16983_cast_fp16_0, y = const_528_to_fp16)[name = string("op_16986_cast_fp16")]; tensor var_16999 = const()[name = string("op_16999"), val = tensor([0, 2, 1])]; tensor input_859_axes_0 = const()[name = string("input_859_axes_0"), val = tensor([2])]; tensor var_17000 = transpose(perm = var_16999, x = var_16986_cast_fp16)[name = string("transpose_38")]; tensor input_859 = expand_dims(axes = input_859_axes_0, x = var_17000)[name = string("input_859")]; string gate_117_pad_type_0 = const()[name = string("gate_117_pad_type_0"), val = string("valid")]; tensor gate_117_strides_0 = const()[name = string("gate_117_strides_0"), val = tensor([1, 1])]; tensor gate_117_pad_0 = const()[name = string("gate_117_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_117_dilations_0 = const()[name = string("gate_117_dilations_0"), val = tensor([1, 1])]; int32 gate_117_groups_0 = const()[name = string("gate_117_groups_0"), val = int32(1)]; tensor gate_117 = conv(dilations = gate_117_dilations_0, groups = gate_117_groups_0, pad = gate_117_pad_0, pad_type = gate_117_pad_type_0, strides = gate_117_strides_0, weight = layers_29_mlp_gate_proj_weight_palettized, x = input_859)[name = string("gate_117")]; string up_59_pad_type_0 = const()[name = string("up_59_pad_type_0"), val = string("valid")]; tensor up_59_strides_0 = const()[name = string("up_59_strides_0"), val = tensor([1, 1])]; tensor up_59_pad_0 = const()[name = string("up_59_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_59_dilations_0 = const()[name = string("up_59_dilations_0"), val = tensor([1, 1])]; int32 up_59_groups_0 = const()[name = string("up_59_groups_0"), val = int32(1)]; tensor up_59 = conv(dilations = up_59_dilations_0, groups = up_59_groups_0, pad = up_59_pad_0, pad_type = up_59_pad_type_0, strides = up_59_strides_0, weight = layers_29_mlp_up_proj_weight_palettized, x = input_859)[name = string("up_59")]; string gate_119_mode_0 = const()[name = string("gate_119_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_119 = gelu(mode = gate_119_mode_0, x = gate_117)[name = string("gate_119")]; tensor input_861 = mul(x = gate_119, y = up_59)[name = string("input_861")]; string mlp_out_59_pad_type_0 = const()[name = string("mlp_out_59_pad_type_0"), val = string("valid")]; tensor mlp_out_59_strides_0 = const()[name = string("mlp_out_59_strides_0"), val = tensor([1, 1])]; tensor mlp_out_59_pad_0 = const()[name = string("mlp_out_59_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_59_dilations_0 = const()[name = string("mlp_out_59_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_59_groups_0 = const()[name = string("mlp_out_59_groups_0"), val = int32(1)]; tensor mlp_out_59 = conv(dilations = mlp_out_59_dilations_0, groups = mlp_out_59_groups_0, pad = mlp_out_59_pad_0, pad_type = mlp_out_59_pad_type_0, strides = mlp_out_59_strides_0, weight = layers_29_mlp_down_proj_weight_palettized, x = input_861)[name = string("mlp_out_59")]; tensor var_17040_axes_0 = const()[name = string("op_17040_axes_0"), val = tensor([2])]; tensor var_17040 = squeeze(axes = var_17040_axes_0, x = mlp_out_59)[name = string("op_17040")]; tensor var_17044 = const()[name = string("op_17044"), val = tensor([0, 2, 1])]; int32 var_17050 = const()[name = string("op_17050"), val = int32(-1)]; fp16 const_529_promoted_to_fp16 = const()[name = string("const_529_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_941 = transpose(perm = var_17044, x = var_17040)[name = string("transpose_37")]; tensor var_17056_cast_fp16 = mul(x = x_941, y = const_529_promoted_to_fp16)[name = string("op_17056_cast_fp16")]; bool input_863_interleave_0 = const()[name = string("input_863_interleave_0"), val = bool(false)]; tensor input_863_cast_fp16 = concat(axis = var_17050, interleave = input_863_interleave_0, values = (x_941, var_17056_cast_fp16))[name = string("input_863_cast_fp16")]; tensor normed_913_axes_0 = const()[name = string("normed_913_axes_0"), val = tensor([-1])]; fp16 var_17048_to_fp16 = const()[name = string("op_17048_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_913_cast_fp16 = layer_norm(axes = normed_913_axes_0, epsilon = var_17048_to_fp16, x = input_863_cast_fp16)[name = string("normed_913_cast_fp16")]; tensor var_17061_split_sizes_0 = const()[name = string("op_17061_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_17061_axis_0 = const()[name = string("op_17061_axis_0"), val = int32(-1)]; tensor var_17061_cast_fp16_0, tensor var_17061_cast_fp16_1 = split(axis = var_17061_axis_0, split_sizes = var_17061_split_sizes_0, x = normed_913_cast_fp16)[name = string("op_17061_cast_fp16")]; tensor const_530_to_fp16 = const()[name = string("const_530_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2307500736)))]; tensor var_17064_cast_fp16 = mul(x = var_17061_cast_fp16_0, y = const_530_to_fp16)[name = string("op_17064_cast_fp16")]; tensor hidden_states_361_cast_fp16 = add(x = x_937_cast_fp16, y = var_17064_cast_fp16)[name = string("hidden_states_361_cast_fp16")]; tensor per_layer_slice_59_begin_0 = const()[name = string("per_layer_slice_59_begin_0"), val = tensor([0, 0, 7424])]; tensor per_layer_slice_59_end_0 = const()[name = string("per_layer_slice_59_end_0"), val = tensor([1, 1, 7680])]; tensor per_layer_slice_59_end_mask_0 = const()[name = string("per_layer_slice_59_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_59 = slice_by_index(begin = per_layer_slice_59_begin_0, end = per_layer_slice_59_end_0, end_mask = per_layer_slice_59_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_59")]; tensor gated_117 = linear(bias = linear_1_bias_0, weight = layers_29_per_layer_input_gate_weight_palettized, x = hidden_states_361_cast_fp16)[name = string("linear_59")]; string gated_119_mode_0 = const()[name = string("gated_119_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_119 = gelu(mode = gated_119_mode_0, x = gated_117)[name = string("gated_119")]; tensor input_867 = mul(x = gated_119, y = per_layer_slice_59)[name = string("input_867")]; tensor x_945 = linear(bias = linear_2_bias_0, weight = layers_29_per_layer_projection_weight_palettized, x = input_867)[name = string("linear_60")]; int32 var_17101 = const()[name = string("op_17101"), val = int32(-1)]; fp16 const_531_promoted_to_fp16 = const()[name = string("const_531_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_17107_cast_fp16 = mul(x = x_945, y = const_531_promoted_to_fp16)[name = string("op_17107_cast_fp16")]; bool input_869_interleave_0 = const()[name = string("input_869_interleave_0"), val = bool(false)]; tensor input_869_cast_fp16 = concat(axis = var_17101, interleave = input_869_interleave_0, values = (x_945, var_17107_cast_fp16))[name = string("input_869_cast_fp16")]; tensor normed_917_axes_0 = const()[name = string("normed_917_axes_0"), val = tensor([-1])]; fp16 var_17099_to_fp16 = const()[name = string("op_17099_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_917_cast_fp16 = layer_norm(axes = normed_917_axes_0, epsilon = var_17099_to_fp16, x = input_869_cast_fp16)[name = string("normed_917_cast_fp16")]; tensor var_17112_split_sizes_0 = const()[name = string("op_17112_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_17112_axis_0 = const()[name = string("op_17112_axis_0"), val = int32(-1)]; tensor var_17112_cast_fp16_0, tensor var_17112_cast_fp16_1 = split(axis = var_17112_axis_0, split_sizes = var_17112_split_sizes_0, x = normed_917_cast_fp16)[name = string("op_17112_cast_fp16")]; tensor const_532_to_fp16 = const()[name = string("const_532_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2307503872)))]; tensor var_17115_cast_fp16 = mul(x = var_17112_cast_fp16_0, y = const_532_to_fp16)[name = string("op_17115_cast_fp16")]; tensor hidden_states_365_cast_fp16 = add(x = hidden_states_361_cast_fp16, y = var_17115_cast_fp16)[name = string("hidden_states_365_cast_fp16")]; tensor layers_29_layer_scalar_to_fp16 = const()[name = string("layers_29_layer_scalar_to_fp16"), val = tensor([0x1.ap-1])]; tensor x_949_cast_fp16 = mul(x = hidden_states_365_cast_fp16, y = layers_29_layer_scalar_to_fp16)[name = string("x_949_cast_fp16")]; int32 var_17123 = const()[name = string("op_17123"), val = int32(-1)]; fp16 const_533_promoted_to_fp16 = const()[name = string("const_533_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_17129_cast_fp16 = mul(x = x_949_cast_fp16, y = const_533_promoted_to_fp16)[name = string("op_17129_cast_fp16")]; bool input_871_interleave_0 = const()[name = string("input_871_interleave_0"), val = bool(false)]; tensor input_871_cast_fp16 = concat(axis = var_17123, interleave = input_871_interleave_0, values = (x_949_cast_fp16, var_17129_cast_fp16))[name = string("input_871_cast_fp16")]; tensor normed_921_axes_0 = const()[name = string("normed_921_axes_0"), val = tensor([-1])]; fp16 var_17121_to_fp16 = const()[name = string("op_17121_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_921_cast_fp16 = layer_norm(axes = normed_921_axes_0, epsilon = var_17121_to_fp16, x = input_871_cast_fp16)[name = string("normed_921_cast_fp16")]; tensor var_17134_split_sizes_0 = const()[name = string("op_17134_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_17134_axis_0 = const()[name = string("op_17134_axis_0"), val = int32(-1)]; tensor var_17134_cast_fp16_0, tensor var_17134_cast_fp16_1 = split(axis = var_17134_axis_0, split_sizes = var_17134_split_sizes_0, x = normed_921_cast_fp16)[name = string("op_17134_cast_fp16")]; tensor const_534_to_fp16 = const()[name = string("const_534_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2307507008)))]; tensor var_17137_cast_fp16 = mul(x = var_17134_cast_fp16_0, y = const_534_to_fp16)[name = string("op_17137_cast_fp16")]; tensor var_17145 = const()[name = string("op_17145"), val = tensor([0, 2, 1])]; tensor var_17148_axes_0 = const()[name = string("op_17148_axes_0"), val = tensor([2])]; tensor var_17146_cast_fp16 = transpose(perm = var_17145, x = var_17137_cast_fp16)[name = string("transpose_36")]; tensor var_17148_cast_fp16 = expand_dims(axes = var_17148_axes_0, x = var_17146_cast_fp16)[name = string("op_17148_cast_fp16")]; string var_17164_pad_type_0 = const()[name = string("op_17164_pad_type_0"), val = string("valid")]; tensor var_17164_strides_0 = const()[name = string("op_17164_strides_0"), val = tensor([1, 1])]; tensor var_17164_pad_0 = const()[name = string("op_17164_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_17164_dilations_0 = const()[name = string("op_17164_dilations_0"), val = tensor([1, 1])]; int32 var_17164_groups_0 = const()[name = string("op_17164_groups_0"), val = int32(1)]; tensor var_17164 = conv(dilations = var_17164_dilations_0, groups = var_17164_groups_0, pad = var_17164_pad_0, pad_type = var_17164_pad_type_0, strides = var_17164_strides_0, weight = layers_30_self_attn_q_proj_weight_palettized, x = var_17148_cast_fp16)[name = string("op_17164")]; tensor var_17169 = const()[name = string("op_17169"), val = tensor([1, 8, 256, 1])]; tensor var_17170 = reshape(shape = var_17169, x = var_17164)[name = string("op_17170")]; tensor var_17175 = const()[name = string("op_17175"), val = tensor([0, 1, 3, 2])]; tensor var_17185 = const()[name = string("op_17185"), val = tensor([1, 8, 256])]; tensor var_17176 = transpose(perm = var_17175, x = var_17170)[name = string("transpose_35")]; tensor x_953 = reshape(shape = var_17185, x = var_17176)[name = string("x_953")]; int32 var_17191 = const()[name = string("op_17191"), val = int32(-1)]; fp16 const_535_promoted_to_fp16 = const()[name = string("const_535_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_17197_cast_fp16 = mul(x = x_953, y = const_535_promoted_to_fp16)[name = string("op_17197_cast_fp16")]; bool input_875_interleave_0 = const()[name = string("input_875_interleave_0"), val = bool(false)]; tensor input_875_cast_fp16 = concat(axis = var_17191, interleave = input_875_interleave_0, values = (x_953, var_17197_cast_fp16))[name = string("input_875_cast_fp16")]; tensor normed_925_axes_0 = const()[name = string("normed_925_axes_0"), val = tensor([-1])]; fp16 var_17189_to_fp16 = const()[name = string("op_17189_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_925_cast_fp16 = layer_norm(axes = normed_925_axes_0, epsilon = var_17189_to_fp16, x = input_875_cast_fp16)[name = string("normed_925_cast_fp16")]; tensor var_17202_split_sizes_0 = const()[name = string("op_17202_split_sizes_0"), val = tensor([256, 256])]; int32 var_17202_axis_0 = const()[name = string("op_17202_axis_0"), val = int32(-1)]; tensor var_17202_cast_fp16_0, tensor var_17202_cast_fp16_1 = split(axis = var_17202_axis_0, split_sizes = var_17202_split_sizes_0, x = normed_925_cast_fp16)[name = string("op_17202_cast_fp16")]; tensor var_17205_cast_fp16 = mul(x = var_17202_cast_fp16_0, y = const_307_to_fp16)[name = string("op_17205_cast_fp16")]; tensor var_17211 = const()[name = string("op_17211"), val = tensor([1, 8, 1, 256])]; tensor q_213 = reshape(shape = var_17211, x = var_17205_cast_fp16)[name = string("q_213")]; tensor var_17213 = mul(x = q_213, y = cos_1)[name = string("op_17213")]; tensor var_17214_split_sizes_0 = const()[name = string("op_17214_split_sizes_0"), val = tensor([128, 128])]; int32 var_17214_axis_0 = const()[name = string("op_17214_axis_0"), val = int32(-1)]; tensor var_17214_0, tensor var_17214_1 = split(axis = var_17214_axis_0, split_sizes = var_17214_split_sizes_0, x = q_213)[name = string("op_17214")]; fp16 const_537_promoted = const()[name = string("const_537_promoted"), val = fp16(-0x1p+0)]; tensor var_17216 = mul(x = var_17214_1, y = const_537_promoted)[name = string("op_17216")]; int32 var_17218 = const()[name = string("op_17218"), val = int32(-1)]; bool var_17219_interleave_0 = const()[name = string("op_17219_interleave_0"), val = bool(false)]; tensor var_17219 = concat(axis = var_17218, interleave = var_17219_interleave_0, values = (var_17216, var_17214_0))[name = string("op_17219")]; tensor var_17220 = mul(x = var_17219, y = sin_1)[name = string("op_17220")]; tensor q_215 = add(x = var_17213, y = var_17220)[name = string("q_215")]; bool var_17244_transpose_x_0 = const()[name = string("op_17244_transpose_x_0"), val = bool(false)]; bool var_17244_transpose_y_0 = const()[name = string("op_17244_transpose_y_0"), val = bool(false)]; tensor var_17244_cast_fp16 = matmul(transpose_x = var_17244_transpose_x_0, transpose_y = var_17244_transpose_y_0, x = q_215, y = transpose_153_cast_fp16)[name = string("op_17244_cast_fp16")]; tensor attn_weights_183_cast_fp16 = add(x = var_17244_cast_fp16, y = causal_mask)[name = string("attn_weights_183_cast_fp16")]; int32 var_17254 = const()[name = string("op_17254"), val = int32(-1)]; tensor var_17256_cast_fp16 = softmax(axis = var_17254, x = attn_weights_183_cast_fp16)[name = string("op_17256_cast_fp16")]; bool var_17272_transpose_x_0 = const()[name = string("op_17272_transpose_x_0"), val = bool(false)]; bool var_17272_transpose_y_0 = const()[name = string("op_17272_transpose_y_0"), val = bool(false)]; tensor var_17272_cast_fp16 = matmul(transpose_x = var_17272_transpose_x_0, transpose_y = var_17272_transpose_y_0, x = var_17256_cast_fp16, y = V_expanded_27_cast_fp16)[name = string("op_17272_cast_fp16")]; tensor var_17282 = const()[name = string("op_17282"), val = tensor([0, 2, 1, 3])]; tensor var_17289 = const()[name = string("op_17289"), val = tensor([1, 1, -1])]; tensor var_17283 = transpose(perm = var_17282, x = var_17272_cast_fp16)[name = string("transpose_34")]; tensor attn_output_183 = reshape(shape = var_17289, x = var_17283)[name = string("attn_output_183")]; tensor var_17294 = const()[name = string("op_17294"), val = tensor([0, 2, 1])]; tensor squeeze_30_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2307510144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2309083072))))[name = string("squeeze_30_palettized")]; string var_17310_pad_type_0 = const()[name = string("op_17310_pad_type_0"), val = string("valid")]; int32 var_17310_groups_0 = const()[name = string("op_17310_groups_0"), val = int32(1)]; tensor var_17310_strides_0 = const()[name = string("op_17310_strides_0"), val = tensor([1])]; tensor var_17310_pad_0 = const()[name = string("op_17310_pad_0"), val = tensor([0, 0])]; tensor var_17310_dilations_0 = const()[name = string("op_17310_dilations_0"), val = tensor([1])]; tensor var_17295 = transpose(perm = var_17294, x = attn_output_183)[name = string("transpose_33")]; tensor var_17310 = conv(dilations = var_17310_dilations_0, groups = var_17310_groups_0, pad = var_17310_pad_0, pad_type = var_17310_pad_type_0, strides = var_17310_strides_0, weight = squeeze_30_palettized, x = var_17295)[name = string("op_17310")]; tensor var_17314 = const()[name = string("op_17314"), val = tensor([0, 2, 1])]; int32 var_17320 = const()[name = string("op_17320"), val = int32(-1)]; fp16 const_538_promoted_to_fp16 = const()[name = string("const_538_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_957 = transpose(perm = var_17314, x = var_17310)[name = string("transpose_32")]; tensor var_17326_cast_fp16 = mul(x = x_957, y = const_538_promoted_to_fp16)[name = string("op_17326_cast_fp16")]; bool input_879_interleave_0 = const()[name = string("input_879_interleave_0"), val = bool(false)]; tensor input_879_cast_fp16 = concat(axis = var_17320, interleave = input_879_interleave_0, values = (x_957, var_17326_cast_fp16))[name = string("input_879_cast_fp16")]; tensor normed_929_axes_0 = const()[name = string("normed_929_axes_0"), val = tensor([-1])]; fp16 var_17318_to_fp16 = const()[name = string("op_17318_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_929_cast_fp16 = layer_norm(axes = normed_929_axes_0, epsilon = var_17318_to_fp16, x = input_879_cast_fp16)[name = string("normed_929_cast_fp16")]; tensor var_17331_split_sizes_0 = const()[name = string("op_17331_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_17331_axis_0 = const()[name = string("op_17331_axis_0"), val = int32(-1)]; tensor var_17331_cast_fp16_0, tensor var_17331_cast_fp16_1 = split(axis = var_17331_axis_0, split_sizes = var_17331_split_sizes_0, x = normed_929_cast_fp16)[name = string("op_17331_cast_fp16")]; tensor const_539_to_fp16 = const()[name = string("const_539_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2309084672)))]; tensor var_17334_cast_fp16 = mul(x = var_17331_cast_fp16_0, y = const_539_to_fp16)[name = string("op_17334_cast_fp16")]; tensor x_961_cast_fp16 = add(x = x_949_cast_fp16, y = var_17334_cast_fp16)[name = string("x_961_cast_fp16")]; int32 var_17341 = const()[name = string("op_17341"), val = int32(-1)]; fp16 const_540_promoted_to_fp16 = const()[name = string("const_540_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_17347_cast_fp16 = mul(x = x_961_cast_fp16, y = const_540_promoted_to_fp16)[name = string("op_17347_cast_fp16")]; bool input_881_interleave_0 = const()[name = string("input_881_interleave_0"), val = bool(false)]; tensor input_881_cast_fp16 = concat(axis = var_17341, interleave = input_881_interleave_0, values = (x_961_cast_fp16, var_17347_cast_fp16))[name = string("input_881_cast_fp16")]; tensor normed_933_axes_0 = const()[name = string("normed_933_axes_0"), val = tensor([-1])]; fp16 var_17339_to_fp16 = const()[name = string("op_17339_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_933_cast_fp16 = layer_norm(axes = normed_933_axes_0, epsilon = var_17339_to_fp16, x = input_881_cast_fp16)[name = string("normed_933_cast_fp16")]; tensor var_17352_split_sizes_0 = const()[name = string("op_17352_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_17352_axis_0 = const()[name = string("op_17352_axis_0"), val = int32(-1)]; tensor var_17352_cast_fp16_0, tensor var_17352_cast_fp16_1 = split(axis = var_17352_axis_0, split_sizes = var_17352_split_sizes_0, x = normed_933_cast_fp16)[name = string("op_17352_cast_fp16")]; tensor const_541_to_fp16 = const()[name = string("const_541_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2309087808)))]; tensor var_17355_cast_fp16 = mul(x = var_17352_cast_fp16_0, y = const_541_to_fp16)[name = string("op_17355_cast_fp16")]; tensor var_17368 = const()[name = string("op_17368"), val = tensor([0, 2, 1])]; tensor input_883_axes_0 = const()[name = string("input_883_axes_0"), val = tensor([2])]; tensor var_17369 = transpose(perm = var_17368, x = var_17355_cast_fp16)[name = string("transpose_31")]; tensor input_883 = expand_dims(axes = input_883_axes_0, x = var_17369)[name = string("input_883")]; string gate_121_pad_type_0 = const()[name = string("gate_121_pad_type_0"), val = string("valid")]; tensor gate_121_strides_0 = const()[name = string("gate_121_strides_0"), val = tensor([1, 1])]; tensor gate_121_pad_0 = const()[name = string("gate_121_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_121_dilations_0 = const()[name = string("gate_121_dilations_0"), val = tensor([1, 1])]; int32 gate_121_groups_0 = const()[name = string("gate_121_groups_0"), val = int32(1)]; tensor gate_121 = conv(dilations = gate_121_dilations_0, groups = gate_121_groups_0, pad = gate_121_pad_0, pad_type = gate_121_pad_type_0, strides = gate_121_strides_0, weight = layers_30_mlp_gate_proj_weight_palettized, x = input_883)[name = string("gate_121")]; string up_61_pad_type_0 = const()[name = string("up_61_pad_type_0"), val = string("valid")]; tensor up_61_strides_0 = const()[name = string("up_61_strides_0"), val = tensor([1, 1])]; tensor up_61_pad_0 = const()[name = string("up_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_61_dilations_0 = const()[name = string("up_61_dilations_0"), val = tensor([1, 1])]; int32 up_61_groups_0 = const()[name = string("up_61_groups_0"), val = int32(1)]; tensor up_61 = conv(dilations = up_61_dilations_0, groups = up_61_groups_0, pad = up_61_pad_0, pad_type = up_61_pad_type_0, strides = up_61_strides_0, weight = layers_30_mlp_up_proj_weight_palettized, x = input_883)[name = string("up_61")]; string gate_123_mode_0 = const()[name = string("gate_123_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_123 = gelu(mode = gate_123_mode_0, x = gate_121)[name = string("gate_123")]; tensor input_885 = mul(x = gate_123, y = up_61)[name = string("input_885")]; string mlp_out_61_pad_type_0 = const()[name = string("mlp_out_61_pad_type_0"), val = string("valid")]; tensor mlp_out_61_strides_0 = const()[name = string("mlp_out_61_strides_0"), val = tensor([1, 1])]; tensor mlp_out_61_pad_0 = const()[name = string("mlp_out_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_61_dilations_0 = const()[name = string("mlp_out_61_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_61_groups_0 = const()[name = string("mlp_out_61_groups_0"), val = int32(1)]; tensor mlp_out_61 = conv(dilations = mlp_out_61_dilations_0, groups = mlp_out_61_groups_0, pad = mlp_out_61_pad_0, pad_type = mlp_out_61_pad_type_0, strides = mlp_out_61_strides_0, weight = layers_30_mlp_down_proj_weight_palettized, x = input_885)[name = string("mlp_out_61")]; tensor var_17409_axes_0 = const()[name = string("op_17409_axes_0"), val = tensor([2])]; tensor var_17409 = squeeze(axes = var_17409_axes_0, x = mlp_out_61)[name = string("op_17409")]; tensor var_17413 = const()[name = string("op_17413"), val = tensor([0, 2, 1])]; int32 var_17419 = const()[name = string("op_17419"), val = int32(-1)]; fp16 const_542_promoted_to_fp16 = const()[name = string("const_542_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_965 = transpose(perm = var_17413, x = var_17409)[name = string("transpose_30")]; tensor var_17425_cast_fp16 = mul(x = x_965, y = const_542_promoted_to_fp16)[name = string("op_17425_cast_fp16")]; bool input_887_interleave_0 = const()[name = string("input_887_interleave_0"), val = bool(false)]; tensor input_887_cast_fp16 = concat(axis = var_17419, interleave = input_887_interleave_0, values = (x_965, var_17425_cast_fp16))[name = string("input_887_cast_fp16")]; tensor normed_937_axes_0 = const()[name = string("normed_937_axes_0"), val = tensor([-1])]; fp16 var_17417_to_fp16 = const()[name = string("op_17417_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_937_cast_fp16 = layer_norm(axes = normed_937_axes_0, epsilon = var_17417_to_fp16, x = input_887_cast_fp16)[name = string("normed_937_cast_fp16")]; tensor var_17430_split_sizes_0 = const()[name = string("op_17430_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_17430_axis_0 = const()[name = string("op_17430_axis_0"), val = int32(-1)]; tensor var_17430_cast_fp16_0, tensor var_17430_cast_fp16_1 = split(axis = var_17430_axis_0, split_sizes = var_17430_split_sizes_0, x = normed_937_cast_fp16)[name = string("op_17430_cast_fp16")]; tensor const_543_to_fp16 = const()[name = string("const_543_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2309090944)))]; tensor var_17433_cast_fp16 = mul(x = var_17430_cast_fp16_0, y = const_543_to_fp16)[name = string("op_17433_cast_fp16")]; tensor hidden_states_373_cast_fp16 = add(x = x_961_cast_fp16, y = var_17433_cast_fp16)[name = string("hidden_states_373_cast_fp16")]; tensor per_layer_slice_61_begin_0 = const()[name = string("per_layer_slice_61_begin_0"), val = tensor([0, 0, 7680])]; tensor per_layer_slice_61_end_0 = const()[name = string("per_layer_slice_61_end_0"), val = tensor([1, 1, 7936])]; tensor per_layer_slice_61_end_mask_0 = const()[name = string("per_layer_slice_61_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_61 = slice_by_index(begin = per_layer_slice_61_begin_0, end = per_layer_slice_61_end_0, end_mask = per_layer_slice_61_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_61")]; tensor gated_121 = linear(bias = linear_1_bias_0, weight = layers_30_per_layer_input_gate_weight_palettized, x = hidden_states_373_cast_fp16)[name = string("linear_61")]; string gated_123_mode_0 = const()[name = string("gated_123_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_123 = gelu(mode = gated_123_mode_0, x = gated_121)[name = string("gated_123")]; tensor input_891 = mul(x = gated_123, y = per_layer_slice_61)[name = string("input_891")]; tensor x_969 = linear(bias = linear_2_bias_0, weight = layers_30_per_layer_projection_weight_palettized, x = input_891)[name = string("linear_62")]; int32 var_17470 = const()[name = string("op_17470"), val = int32(-1)]; fp16 const_544_promoted_to_fp16 = const()[name = string("const_544_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_17476_cast_fp16 = mul(x = x_969, y = const_544_promoted_to_fp16)[name = string("op_17476_cast_fp16")]; bool input_893_interleave_0 = const()[name = string("input_893_interleave_0"), val = bool(false)]; tensor input_893_cast_fp16 = concat(axis = var_17470, interleave = input_893_interleave_0, values = (x_969, var_17476_cast_fp16))[name = string("input_893_cast_fp16")]; tensor normed_941_axes_0 = const()[name = string("normed_941_axes_0"), val = tensor([-1])]; fp16 var_17468_to_fp16 = const()[name = string("op_17468_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_941_cast_fp16 = layer_norm(axes = normed_941_axes_0, epsilon = var_17468_to_fp16, x = input_893_cast_fp16)[name = string("normed_941_cast_fp16")]; tensor var_17481_split_sizes_0 = const()[name = string("op_17481_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_17481_axis_0 = const()[name = string("op_17481_axis_0"), val = int32(-1)]; tensor var_17481_cast_fp16_0, tensor var_17481_cast_fp16_1 = split(axis = var_17481_axis_0, split_sizes = var_17481_split_sizes_0, x = normed_941_cast_fp16)[name = string("op_17481_cast_fp16")]; tensor const_545_to_fp16 = const()[name = string("const_545_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2309094080)))]; tensor var_17484_cast_fp16 = mul(x = var_17481_cast_fp16_0, y = const_545_to_fp16)[name = string("op_17484_cast_fp16")]; tensor hidden_states_377_cast_fp16 = add(x = hidden_states_373_cast_fp16, y = var_17484_cast_fp16)[name = string("hidden_states_377_cast_fp16")]; tensor layers_30_layer_scalar_to_fp16 = const()[name = string("layers_30_layer_scalar_to_fp16"), val = tensor([0x1.bep-1])]; tensor x_973_cast_fp16 = mul(x = hidden_states_377_cast_fp16, y = layers_30_layer_scalar_to_fp16)[name = string("x_973_cast_fp16")]; int32 var_17492 = const()[name = string("op_17492"), val = int32(-1)]; fp16 const_546_promoted_to_fp16 = const()[name = string("const_546_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_17498_cast_fp16 = mul(x = x_973_cast_fp16, y = const_546_promoted_to_fp16)[name = string("op_17498_cast_fp16")]; bool input_895_interleave_0 = const()[name = string("input_895_interleave_0"), val = bool(false)]; tensor input_895_cast_fp16 = concat(axis = var_17492, interleave = input_895_interleave_0, values = (x_973_cast_fp16, var_17498_cast_fp16))[name = string("input_895_cast_fp16")]; tensor normed_945_axes_0 = const()[name = string("normed_945_axes_0"), val = tensor([-1])]; fp16 var_17490_to_fp16 = const()[name = string("op_17490_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_945_cast_fp16 = layer_norm(axes = normed_945_axes_0, epsilon = var_17490_to_fp16, x = input_895_cast_fp16)[name = string("normed_945_cast_fp16")]; tensor var_17503_split_sizes_0 = const()[name = string("op_17503_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_17503_axis_0 = const()[name = string("op_17503_axis_0"), val = int32(-1)]; tensor var_17503_cast_fp16_0, tensor var_17503_cast_fp16_1 = split(axis = var_17503_axis_0, split_sizes = var_17503_split_sizes_0, x = normed_945_cast_fp16)[name = string("op_17503_cast_fp16")]; tensor const_547_to_fp16 = const()[name = string("const_547_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2309097216)))]; tensor var_17506_cast_fp16 = mul(x = var_17503_cast_fp16_0, y = const_547_to_fp16)[name = string("op_17506_cast_fp16")]; tensor var_17514 = const()[name = string("op_17514"), val = tensor([0, 2, 1])]; tensor var_17517_axes_0 = const()[name = string("op_17517_axes_0"), val = tensor([2])]; tensor var_17515_cast_fp16 = transpose(perm = var_17514, x = var_17506_cast_fp16)[name = string("transpose_29")]; tensor var_17517_cast_fp16 = expand_dims(axes = var_17517_axes_0, x = var_17515_cast_fp16)[name = string("op_17517_cast_fp16")]; string var_17533_pad_type_0 = const()[name = string("op_17533_pad_type_0"), val = string("valid")]; tensor var_17533_strides_0 = const()[name = string("op_17533_strides_0"), val = tensor([1, 1])]; tensor var_17533_pad_0 = const()[name = string("op_17533_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_17533_dilations_0 = const()[name = string("op_17533_dilations_0"), val = tensor([1, 1])]; int32 var_17533_groups_0 = const()[name = string("op_17533_groups_0"), val = int32(1)]; tensor var_17533 = conv(dilations = var_17533_dilations_0, groups = var_17533_groups_0, pad = var_17533_pad_0, pad_type = var_17533_pad_type_0, strides = var_17533_strides_0, weight = layers_31_self_attn_q_proj_weight_palettized, x = var_17517_cast_fp16)[name = string("op_17533")]; tensor var_17538 = const()[name = string("op_17538"), val = tensor([1, 8, 256, 1])]; tensor var_17539 = reshape(shape = var_17538, x = var_17533)[name = string("op_17539")]; tensor var_17544 = const()[name = string("op_17544"), val = tensor([0, 1, 3, 2])]; tensor var_17554 = const()[name = string("op_17554"), val = tensor([1, 8, 256])]; tensor var_17545 = transpose(perm = var_17544, x = var_17539)[name = string("transpose_28")]; tensor x_977 = reshape(shape = var_17554, x = var_17545)[name = string("x_977")]; int32 var_17560 = const()[name = string("op_17560"), val = int32(-1)]; fp16 const_548_promoted_to_fp16 = const()[name = string("const_548_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_17566_cast_fp16 = mul(x = x_977, y = const_548_promoted_to_fp16)[name = string("op_17566_cast_fp16")]; bool input_899_interleave_0 = const()[name = string("input_899_interleave_0"), val = bool(false)]; tensor input_899_cast_fp16 = concat(axis = var_17560, interleave = input_899_interleave_0, values = (x_977, var_17566_cast_fp16))[name = string("input_899_cast_fp16")]; tensor normed_949_axes_0 = const()[name = string("normed_949_axes_0"), val = tensor([-1])]; fp16 var_17558_to_fp16 = const()[name = string("op_17558_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_949_cast_fp16 = layer_norm(axes = normed_949_axes_0, epsilon = var_17558_to_fp16, x = input_899_cast_fp16)[name = string("normed_949_cast_fp16")]; tensor var_17571_split_sizes_0 = const()[name = string("op_17571_split_sizes_0"), val = tensor([256, 256])]; int32 var_17571_axis_0 = const()[name = string("op_17571_axis_0"), val = int32(-1)]; tensor var_17571_cast_fp16_0, tensor var_17571_cast_fp16_1 = split(axis = var_17571_axis_0, split_sizes = var_17571_split_sizes_0, x = normed_949_cast_fp16)[name = string("op_17571_cast_fp16")]; tensor var_17574_cast_fp16 = mul(x = var_17571_cast_fp16_0, y = const_307_to_fp16)[name = string("op_17574_cast_fp16")]; tensor var_17580 = const()[name = string("op_17580"), val = tensor([1, 8, 1, 256])]; tensor q_219 = reshape(shape = var_17580, x = var_17574_cast_fp16)[name = string("q_219")]; tensor var_17582 = mul(x = q_219, y = cos_1)[name = string("op_17582")]; tensor var_17583_split_sizes_0 = const()[name = string("op_17583_split_sizes_0"), val = tensor([128, 128])]; int32 var_17583_axis_0 = const()[name = string("op_17583_axis_0"), val = int32(-1)]; tensor var_17583_0, tensor var_17583_1 = split(axis = var_17583_axis_0, split_sizes = var_17583_split_sizes_0, x = q_219)[name = string("op_17583")]; fp16 const_550_promoted = const()[name = string("const_550_promoted"), val = fp16(-0x1p+0)]; tensor var_17585 = mul(x = var_17583_1, y = const_550_promoted)[name = string("op_17585")]; int32 var_17587 = const()[name = string("op_17587"), val = int32(-1)]; bool var_17588_interleave_0 = const()[name = string("op_17588_interleave_0"), val = bool(false)]; tensor var_17588 = concat(axis = var_17587, interleave = var_17588_interleave_0, values = (var_17585, var_17583_0))[name = string("op_17588")]; tensor var_17589 = mul(x = var_17588, y = sin_1)[name = string("op_17589")]; tensor q_221 = add(x = var_17582, y = var_17589)[name = string("q_221")]; bool var_17613_transpose_x_0 = const()[name = string("op_17613_transpose_x_0"), val = bool(false)]; bool var_17613_transpose_y_0 = const()[name = string("op_17613_transpose_y_0"), val = bool(false)]; tensor var_17613_cast_fp16 = matmul(transpose_x = var_17613_transpose_x_0, transpose_y = var_17613_transpose_y_0, x = q_221, y = transpose_153_cast_fp16)[name = string("op_17613_cast_fp16")]; tensor attn_weights_189_cast_fp16 = add(x = var_17613_cast_fp16, y = causal_mask)[name = string("attn_weights_189_cast_fp16")]; int32 var_17623 = const()[name = string("op_17623"), val = int32(-1)]; tensor var_17625_cast_fp16 = softmax(axis = var_17623, x = attn_weights_189_cast_fp16)[name = string("op_17625_cast_fp16")]; bool var_17641_transpose_x_0 = const()[name = string("op_17641_transpose_x_0"), val = bool(false)]; bool var_17641_transpose_y_0 = const()[name = string("op_17641_transpose_y_0"), val = bool(false)]; tensor var_17641_cast_fp16 = matmul(transpose_x = var_17641_transpose_x_0, transpose_y = var_17641_transpose_y_0, x = var_17625_cast_fp16, y = V_expanded_27_cast_fp16)[name = string("op_17641_cast_fp16")]; tensor var_17651 = const()[name = string("op_17651"), val = tensor([0, 2, 1, 3])]; tensor var_17658 = const()[name = string("op_17658"), val = tensor([1, 1, -1])]; tensor var_17652 = transpose(perm = var_17651, x = var_17641_cast_fp16)[name = string("transpose_27")]; tensor attn_output_189 = reshape(shape = var_17658, x = var_17652)[name = string("attn_output_189")]; tensor var_17663 = const()[name = string("op_17663"), val = tensor([0, 2, 1])]; tensor squeeze_31_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2309100352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2310673280))))[name = string("squeeze_31_palettized")]; string var_17679_pad_type_0 = const()[name = string("op_17679_pad_type_0"), val = string("valid")]; int32 var_17679_groups_0 = const()[name = string("op_17679_groups_0"), val = int32(1)]; tensor var_17679_strides_0 = const()[name = string("op_17679_strides_0"), val = tensor([1])]; tensor var_17679_pad_0 = const()[name = string("op_17679_pad_0"), val = tensor([0, 0])]; tensor var_17679_dilations_0 = const()[name = string("op_17679_dilations_0"), val = tensor([1])]; tensor var_17664 = transpose(perm = var_17663, x = attn_output_189)[name = string("transpose_26")]; tensor var_17679 = conv(dilations = var_17679_dilations_0, groups = var_17679_groups_0, pad = var_17679_pad_0, pad_type = var_17679_pad_type_0, strides = var_17679_strides_0, weight = squeeze_31_palettized, x = var_17664)[name = string("op_17679")]; tensor var_17683 = const()[name = string("op_17683"), val = tensor([0, 2, 1])]; int32 var_17689 = const()[name = string("op_17689"), val = int32(-1)]; fp16 const_551_promoted_to_fp16 = const()[name = string("const_551_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_981 = transpose(perm = var_17683, x = var_17679)[name = string("transpose_25")]; tensor var_17695_cast_fp16 = mul(x = x_981, y = const_551_promoted_to_fp16)[name = string("op_17695_cast_fp16")]; bool input_903_interleave_0 = const()[name = string("input_903_interleave_0"), val = bool(false)]; tensor input_903_cast_fp16 = concat(axis = var_17689, interleave = input_903_interleave_0, values = (x_981, var_17695_cast_fp16))[name = string("input_903_cast_fp16")]; tensor normed_953_axes_0 = const()[name = string("normed_953_axes_0"), val = tensor([-1])]; fp16 var_17687_to_fp16 = const()[name = string("op_17687_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_953_cast_fp16 = layer_norm(axes = normed_953_axes_0, epsilon = var_17687_to_fp16, x = input_903_cast_fp16)[name = string("normed_953_cast_fp16")]; tensor var_17700_split_sizes_0 = const()[name = string("op_17700_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_17700_axis_0 = const()[name = string("op_17700_axis_0"), val = int32(-1)]; tensor var_17700_cast_fp16_0, tensor var_17700_cast_fp16_1 = split(axis = var_17700_axis_0, split_sizes = var_17700_split_sizes_0, x = normed_953_cast_fp16)[name = string("op_17700_cast_fp16")]; tensor const_552_to_fp16 = const()[name = string("const_552_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2310674880)))]; tensor var_17703_cast_fp16 = mul(x = var_17700_cast_fp16_0, y = const_552_to_fp16)[name = string("op_17703_cast_fp16")]; tensor x_985_cast_fp16 = add(x = x_973_cast_fp16, y = var_17703_cast_fp16)[name = string("x_985_cast_fp16")]; int32 var_17710 = const()[name = string("op_17710"), val = int32(-1)]; fp16 const_553_promoted_to_fp16 = const()[name = string("const_553_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_17716_cast_fp16 = mul(x = x_985_cast_fp16, y = const_553_promoted_to_fp16)[name = string("op_17716_cast_fp16")]; bool input_905_interleave_0 = const()[name = string("input_905_interleave_0"), val = bool(false)]; tensor input_905_cast_fp16 = concat(axis = var_17710, interleave = input_905_interleave_0, values = (x_985_cast_fp16, var_17716_cast_fp16))[name = string("input_905_cast_fp16")]; tensor normed_957_axes_0 = const()[name = string("normed_957_axes_0"), val = tensor([-1])]; fp16 var_17708_to_fp16 = const()[name = string("op_17708_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_957_cast_fp16 = layer_norm(axes = normed_957_axes_0, epsilon = var_17708_to_fp16, x = input_905_cast_fp16)[name = string("normed_957_cast_fp16")]; tensor var_17721_split_sizes_0 = const()[name = string("op_17721_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_17721_axis_0 = const()[name = string("op_17721_axis_0"), val = int32(-1)]; tensor var_17721_cast_fp16_0, tensor var_17721_cast_fp16_1 = split(axis = var_17721_axis_0, split_sizes = var_17721_split_sizes_0, x = normed_957_cast_fp16)[name = string("op_17721_cast_fp16")]; tensor const_554_to_fp16 = const()[name = string("const_554_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2310678016)))]; tensor var_17724_cast_fp16 = mul(x = var_17721_cast_fp16_0, y = const_554_to_fp16)[name = string("op_17724_cast_fp16")]; tensor var_17737 = const()[name = string("op_17737"), val = tensor([0, 2, 1])]; tensor input_907_axes_0 = const()[name = string("input_907_axes_0"), val = tensor([2])]; tensor var_17738 = transpose(perm = var_17737, x = var_17724_cast_fp16)[name = string("transpose_24")]; tensor input_907 = expand_dims(axes = input_907_axes_0, x = var_17738)[name = string("input_907")]; string gate_125_pad_type_0 = const()[name = string("gate_125_pad_type_0"), val = string("valid")]; tensor gate_125_strides_0 = const()[name = string("gate_125_strides_0"), val = tensor([1, 1])]; tensor gate_125_pad_0 = const()[name = string("gate_125_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_125_dilations_0 = const()[name = string("gate_125_dilations_0"), val = tensor([1, 1])]; int32 gate_125_groups_0 = const()[name = string("gate_125_groups_0"), val = int32(1)]; tensor gate_125 = conv(dilations = gate_125_dilations_0, groups = gate_125_groups_0, pad = gate_125_pad_0, pad_type = gate_125_pad_type_0, strides = gate_125_strides_0, weight = layers_31_mlp_gate_proj_weight_palettized, x = input_907)[name = string("gate_125")]; string up_63_pad_type_0 = const()[name = string("up_63_pad_type_0"), val = string("valid")]; tensor up_63_strides_0 = const()[name = string("up_63_strides_0"), val = tensor([1, 1])]; tensor up_63_pad_0 = const()[name = string("up_63_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_63_dilations_0 = const()[name = string("up_63_dilations_0"), val = tensor([1, 1])]; int32 up_63_groups_0 = const()[name = string("up_63_groups_0"), val = int32(1)]; tensor up_63 = conv(dilations = up_63_dilations_0, groups = up_63_groups_0, pad = up_63_pad_0, pad_type = up_63_pad_type_0, strides = up_63_strides_0, weight = layers_31_mlp_up_proj_weight_palettized, x = input_907)[name = string("up_63")]; string gate_127_mode_0 = const()[name = string("gate_127_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_127 = gelu(mode = gate_127_mode_0, x = gate_125)[name = string("gate_127")]; tensor input_909 = mul(x = gate_127, y = up_63)[name = string("input_909")]; string mlp_out_63_pad_type_0 = const()[name = string("mlp_out_63_pad_type_0"), val = string("valid")]; tensor mlp_out_63_strides_0 = const()[name = string("mlp_out_63_strides_0"), val = tensor([1, 1])]; tensor mlp_out_63_pad_0 = const()[name = string("mlp_out_63_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_63_dilations_0 = const()[name = string("mlp_out_63_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_63_groups_0 = const()[name = string("mlp_out_63_groups_0"), val = int32(1)]; tensor mlp_out_63 = conv(dilations = mlp_out_63_dilations_0, groups = mlp_out_63_groups_0, pad = mlp_out_63_pad_0, pad_type = mlp_out_63_pad_type_0, strides = mlp_out_63_strides_0, weight = layers_31_mlp_down_proj_weight_palettized, x = input_909)[name = string("mlp_out_63")]; tensor var_17778_axes_0 = const()[name = string("op_17778_axes_0"), val = tensor([2])]; tensor var_17778 = squeeze(axes = var_17778_axes_0, x = mlp_out_63)[name = string("op_17778")]; tensor var_17782 = const()[name = string("op_17782"), val = tensor([0, 2, 1])]; int32 var_17788 = const()[name = string("op_17788"), val = int32(-1)]; fp16 const_555_promoted_to_fp16 = const()[name = string("const_555_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_989 = transpose(perm = var_17782, x = var_17778)[name = string("transpose_23")]; tensor var_17794_cast_fp16 = mul(x = x_989, y = const_555_promoted_to_fp16)[name = string("op_17794_cast_fp16")]; bool input_911_interleave_0 = const()[name = string("input_911_interleave_0"), val = bool(false)]; tensor input_911_cast_fp16 = concat(axis = var_17788, interleave = input_911_interleave_0, values = (x_989, var_17794_cast_fp16))[name = string("input_911_cast_fp16")]; tensor normed_961_axes_0 = const()[name = string("normed_961_axes_0"), val = tensor([-1])]; fp16 var_17786_to_fp16 = const()[name = string("op_17786_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_961_cast_fp16 = layer_norm(axes = normed_961_axes_0, epsilon = var_17786_to_fp16, x = input_911_cast_fp16)[name = string("normed_961_cast_fp16")]; tensor var_17799_split_sizes_0 = const()[name = string("op_17799_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_17799_axis_0 = const()[name = string("op_17799_axis_0"), val = int32(-1)]; tensor var_17799_cast_fp16_0, tensor var_17799_cast_fp16_1 = split(axis = var_17799_axis_0, split_sizes = var_17799_split_sizes_0, x = normed_961_cast_fp16)[name = string("op_17799_cast_fp16")]; tensor const_556_to_fp16 = const()[name = string("const_556_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2310681152)))]; tensor var_17802_cast_fp16 = mul(x = var_17799_cast_fp16_0, y = const_556_to_fp16)[name = string("op_17802_cast_fp16")]; tensor hidden_states_385_cast_fp16 = add(x = x_985_cast_fp16, y = var_17802_cast_fp16)[name = string("hidden_states_385_cast_fp16")]; tensor per_layer_slice_63_begin_0 = const()[name = string("per_layer_slice_63_begin_0"), val = tensor([0, 0, 7936])]; tensor per_layer_slice_63_end_0 = const()[name = string("per_layer_slice_63_end_0"), val = tensor([1, 1, 8192])]; tensor per_layer_slice_63_end_mask_0 = const()[name = string("per_layer_slice_63_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_63 = slice_by_index(begin = per_layer_slice_63_begin_0, end = per_layer_slice_63_end_0, end_mask = per_layer_slice_63_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_63")]; tensor gated_125 = linear(bias = linear_1_bias_0, weight = layers_31_per_layer_input_gate_weight_palettized, x = hidden_states_385_cast_fp16)[name = string("linear_63")]; string gated_127_mode_0 = const()[name = string("gated_127_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_127 = gelu(mode = gated_127_mode_0, x = gated_125)[name = string("gated_127")]; tensor input_915 = mul(x = gated_127, y = per_layer_slice_63)[name = string("input_915")]; tensor x_993 = linear(bias = linear_2_bias_0, weight = layers_31_per_layer_projection_weight_palettized, x = input_915)[name = string("linear_64")]; int32 var_17839 = const()[name = string("op_17839"), val = int32(-1)]; fp16 const_557_promoted_to_fp16 = const()[name = string("const_557_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_17845_cast_fp16 = mul(x = x_993, y = const_557_promoted_to_fp16)[name = string("op_17845_cast_fp16")]; bool input_917_interleave_0 = const()[name = string("input_917_interleave_0"), val = bool(false)]; tensor input_917_cast_fp16 = concat(axis = var_17839, interleave = input_917_interleave_0, values = (x_993, var_17845_cast_fp16))[name = string("input_917_cast_fp16")]; tensor normed_965_axes_0 = const()[name = string("normed_965_axes_0"), val = tensor([-1])]; fp16 var_17837_to_fp16 = const()[name = string("op_17837_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_965_cast_fp16 = layer_norm(axes = normed_965_axes_0, epsilon = var_17837_to_fp16, x = input_917_cast_fp16)[name = string("normed_965_cast_fp16")]; tensor var_17850_split_sizes_0 = const()[name = string("op_17850_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_17850_axis_0 = const()[name = string("op_17850_axis_0"), val = int32(-1)]; tensor var_17850_cast_fp16_0, tensor var_17850_cast_fp16_1 = split(axis = var_17850_axis_0, split_sizes = var_17850_split_sizes_0, x = normed_965_cast_fp16)[name = string("op_17850_cast_fp16")]; tensor const_558_to_fp16 = const()[name = string("const_558_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2310684288)))]; tensor var_17853_cast_fp16 = mul(x = var_17850_cast_fp16_0, y = const_558_to_fp16)[name = string("op_17853_cast_fp16")]; tensor hidden_states_389_cast_fp16 = add(x = hidden_states_385_cast_fp16, y = var_17853_cast_fp16)[name = string("hidden_states_389_cast_fp16")]; tensor layers_31_layer_scalar_to_fp16 = const()[name = string("layers_31_layer_scalar_to_fp16"), val = tensor([0x1.a8p-1])]; tensor x_997_cast_fp16 = mul(x = hidden_states_389_cast_fp16, y = layers_31_layer_scalar_to_fp16)[name = string("x_997_cast_fp16")]; int32 var_17861 = const()[name = string("op_17861"), val = int32(-1)]; fp16 const_559_promoted_to_fp16 = const()[name = string("const_559_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_17867_cast_fp16 = mul(x = x_997_cast_fp16, y = const_559_promoted_to_fp16)[name = string("op_17867_cast_fp16")]; bool input_919_interleave_0 = const()[name = string("input_919_interleave_0"), val = bool(false)]; tensor input_919_cast_fp16 = concat(axis = var_17861, interleave = input_919_interleave_0, values = (x_997_cast_fp16, var_17867_cast_fp16))[name = string("input_919_cast_fp16")]; tensor normed_969_axes_0 = const()[name = string("normed_969_axes_0"), val = tensor([-1])]; fp16 var_17859_to_fp16 = const()[name = string("op_17859_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_969_cast_fp16 = layer_norm(axes = normed_969_axes_0, epsilon = var_17859_to_fp16, x = input_919_cast_fp16)[name = string("normed_969_cast_fp16")]; tensor var_17872_split_sizes_0 = const()[name = string("op_17872_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_17872_axis_0 = const()[name = string("op_17872_axis_0"), val = int32(-1)]; tensor var_17872_cast_fp16_0, tensor var_17872_cast_fp16_1 = split(axis = var_17872_axis_0, split_sizes = var_17872_split_sizes_0, x = normed_969_cast_fp16)[name = string("op_17872_cast_fp16")]; tensor const_560_to_fp16 = const()[name = string("const_560_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2310687424)))]; tensor var_17875_cast_fp16 = mul(x = var_17872_cast_fp16_0, y = const_560_to_fp16)[name = string("op_17875_cast_fp16")]; tensor var_17883 = const()[name = string("op_17883"), val = tensor([0, 2, 1])]; tensor var_17886_axes_0 = const()[name = string("op_17886_axes_0"), val = tensor([2])]; tensor var_17884_cast_fp16 = transpose(perm = var_17883, x = var_17875_cast_fp16)[name = string("transpose_22")]; tensor var_17886_cast_fp16 = expand_dims(axes = var_17886_axes_0, x = var_17884_cast_fp16)[name = string("op_17886_cast_fp16")]; string var_17902_pad_type_0 = const()[name = string("op_17902_pad_type_0"), val = string("valid")]; tensor var_17902_strides_0 = const()[name = string("op_17902_strides_0"), val = tensor([1, 1])]; tensor var_17902_pad_0 = const()[name = string("op_17902_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_17902_dilations_0 = const()[name = string("op_17902_dilations_0"), val = tensor([1, 1])]; int32 var_17902_groups_0 = const()[name = string("op_17902_groups_0"), val = int32(1)]; tensor var_17902 = conv(dilations = var_17902_dilations_0, groups = var_17902_groups_0, pad = var_17902_pad_0, pad_type = var_17902_pad_type_0, strides = var_17902_strides_0, weight = layers_32_self_attn_q_proj_weight_palettized, x = var_17886_cast_fp16)[name = string("op_17902")]; tensor var_17907 = const()[name = string("op_17907"), val = tensor([1, 8, 256, 1])]; tensor var_17908 = reshape(shape = var_17907, x = var_17902)[name = string("op_17908")]; tensor var_17913 = const()[name = string("op_17913"), val = tensor([0, 1, 3, 2])]; tensor var_17923 = const()[name = string("op_17923"), val = tensor([1, 8, 256])]; tensor var_17914 = transpose(perm = var_17913, x = var_17908)[name = string("transpose_21")]; tensor x_1001 = reshape(shape = var_17923, x = var_17914)[name = string("x_1001")]; int32 var_17929 = const()[name = string("op_17929"), val = int32(-1)]; fp16 const_561_promoted_to_fp16 = const()[name = string("const_561_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_17935_cast_fp16 = mul(x = x_1001, y = const_561_promoted_to_fp16)[name = string("op_17935_cast_fp16")]; bool input_923_interleave_0 = const()[name = string("input_923_interleave_0"), val = bool(false)]; tensor input_923_cast_fp16 = concat(axis = var_17929, interleave = input_923_interleave_0, values = (x_1001, var_17935_cast_fp16))[name = string("input_923_cast_fp16")]; tensor normed_973_axes_0 = const()[name = string("normed_973_axes_0"), val = tensor([-1])]; fp16 var_17927_to_fp16 = const()[name = string("op_17927_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_973_cast_fp16 = layer_norm(axes = normed_973_axes_0, epsilon = var_17927_to_fp16, x = input_923_cast_fp16)[name = string("normed_973_cast_fp16")]; tensor var_17940_split_sizes_0 = const()[name = string("op_17940_split_sizes_0"), val = tensor([256, 256])]; int32 var_17940_axis_0 = const()[name = string("op_17940_axis_0"), val = int32(-1)]; tensor var_17940_cast_fp16_0, tensor var_17940_cast_fp16_1 = split(axis = var_17940_axis_0, split_sizes = var_17940_split_sizes_0, x = normed_973_cast_fp16)[name = string("op_17940_cast_fp16")]; tensor var_17943_cast_fp16 = mul(x = var_17940_cast_fp16_0, y = const_307_to_fp16)[name = string("op_17943_cast_fp16")]; tensor var_17949 = const()[name = string("op_17949"), val = tensor([1, 8, 1, 256])]; tensor q_225 = reshape(shape = var_17949, x = var_17943_cast_fp16)[name = string("q_225")]; tensor var_17951 = mul(x = q_225, y = cos_1)[name = string("op_17951")]; tensor var_17952_split_sizes_0 = const()[name = string("op_17952_split_sizes_0"), val = tensor([128, 128])]; int32 var_17952_axis_0 = const()[name = string("op_17952_axis_0"), val = int32(-1)]; tensor var_17952_0, tensor var_17952_1 = split(axis = var_17952_axis_0, split_sizes = var_17952_split_sizes_0, x = q_225)[name = string("op_17952")]; fp16 const_563_promoted = const()[name = string("const_563_promoted"), val = fp16(-0x1p+0)]; tensor var_17954 = mul(x = var_17952_1, y = const_563_promoted)[name = string("op_17954")]; int32 var_17956 = const()[name = string("op_17956"), val = int32(-1)]; bool var_17957_interleave_0 = const()[name = string("op_17957_interleave_0"), val = bool(false)]; tensor var_17957 = concat(axis = var_17956, interleave = var_17957_interleave_0, values = (var_17954, var_17952_0))[name = string("op_17957")]; tensor var_17958 = mul(x = var_17957, y = sin_1)[name = string("op_17958")]; tensor q_227 = add(x = var_17951, y = var_17958)[name = string("q_227")]; bool var_17982_transpose_x_0 = const()[name = string("op_17982_transpose_x_0"), val = bool(false)]; bool var_17982_transpose_y_0 = const()[name = string("op_17982_transpose_y_0"), val = bool(false)]; tensor var_17982_cast_fp16 = matmul(transpose_x = var_17982_transpose_x_0, transpose_y = var_17982_transpose_y_0, x = q_227, y = transpose_153_cast_fp16)[name = string("op_17982_cast_fp16")]; tensor attn_weights_195_cast_fp16 = add(x = var_17982_cast_fp16, y = causal_mask)[name = string("attn_weights_195_cast_fp16")]; int32 var_17992 = const()[name = string("op_17992"), val = int32(-1)]; tensor var_17994_cast_fp16 = softmax(axis = var_17992, x = attn_weights_195_cast_fp16)[name = string("op_17994_cast_fp16")]; bool var_18010_transpose_x_0 = const()[name = string("op_18010_transpose_x_0"), val = bool(false)]; bool var_18010_transpose_y_0 = const()[name = string("op_18010_transpose_y_0"), val = bool(false)]; tensor var_18010_cast_fp16 = matmul(transpose_x = var_18010_transpose_x_0, transpose_y = var_18010_transpose_y_0, x = var_17994_cast_fp16, y = V_expanded_27_cast_fp16)[name = string("op_18010_cast_fp16")]; tensor var_18020 = const()[name = string("op_18020"), val = tensor([0, 2, 1, 3])]; tensor var_18027 = const()[name = string("op_18027"), val = tensor([1, 1, -1])]; tensor var_18021 = transpose(perm = var_18020, x = var_18010_cast_fp16)[name = string("transpose_20")]; tensor attn_output_195 = reshape(shape = var_18027, x = var_18021)[name = string("attn_output_195")]; tensor var_18032 = const()[name = string("op_18032"), val = tensor([0, 2, 1])]; tensor squeeze_32_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2310690560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2312263488))))[name = string("squeeze_32_palettized")]; string var_18048_pad_type_0 = const()[name = string("op_18048_pad_type_0"), val = string("valid")]; int32 var_18048_groups_0 = const()[name = string("op_18048_groups_0"), val = int32(1)]; tensor var_18048_strides_0 = const()[name = string("op_18048_strides_0"), val = tensor([1])]; tensor var_18048_pad_0 = const()[name = string("op_18048_pad_0"), val = tensor([0, 0])]; tensor var_18048_dilations_0 = const()[name = string("op_18048_dilations_0"), val = tensor([1])]; tensor var_18033 = transpose(perm = var_18032, x = attn_output_195)[name = string("transpose_19")]; tensor var_18048 = conv(dilations = var_18048_dilations_0, groups = var_18048_groups_0, pad = var_18048_pad_0, pad_type = var_18048_pad_type_0, strides = var_18048_strides_0, weight = squeeze_32_palettized, x = var_18033)[name = string("op_18048")]; tensor var_18052 = const()[name = string("op_18052"), val = tensor([0, 2, 1])]; int32 var_18058 = const()[name = string("op_18058"), val = int32(-1)]; fp16 const_564_promoted_to_fp16 = const()[name = string("const_564_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_1005 = transpose(perm = var_18052, x = var_18048)[name = string("transpose_18")]; tensor var_18064_cast_fp16 = mul(x = x_1005, y = const_564_promoted_to_fp16)[name = string("op_18064_cast_fp16")]; bool input_927_interleave_0 = const()[name = string("input_927_interleave_0"), val = bool(false)]; tensor input_927_cast_fp16 = concat(axis = var_18058, interleave = input_927_interleave_0, values = (x_1005, var_18064_cast_fp16))[name = string("input_927_cast_fp16")]; tensor normed_977_axes_0 = const()[name = string("normed_977_axes_0"), val = tensor([-1])]; fp16 var_18056_to_fp16 = const()[name = string("op_18056_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_977_cast_fp16 = layer_norm(axes = normed_977_axes_0, epsilon = var_18056_to_fp16, x = input_927_cast_fp16)[name = string("normed_977_cast_fp16")]; tensor var_18069_split_sizes_0 = const()[name = string("op_18069_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_18069_axis_0 = const()[name = string("op_18069_axis_0"), val = int32(-1)]; tensor var_18069_cast_fp16_0, tensor var_18069_cast_fp16_1 = split(axis = var_18069_axis_0, split_sizes = var_18069_split_sizes_0, x = normed_977_cast_fp16)[name = string("op_18069_cast_fp16")]; tensor const_565_to_fp16 = const()[name = string("const_565_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2312265088)))]; tensor var_18072_cast_fp16 = mul(x = var_18069_cast_fp16_0, y = const_565_to_fp16)[name = string("op_18072_cast_fp16")]; tensor x_1009_cast_fp16 = add(x = x_997_cast_fp16, y = var_18072_cast_fp16)[name = string("x_1009_cast_fp16")]; int32 var_18079 = const()[name = string("op_18079"), val = int32(-1)]; fp16 const_566_promoted_to_fp16 = const()[name = string("const_566_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_18085_cast_fp16 = mul(x = x_1009_cast_fp16, y = const_566_promoted_to_fp16)[name = string("op_18085_cast_fp16")]; bool input_929_interleave_0 = const()[name = string("input_929_interleave_0"), val = bool(false)]; tensor input_929_cast_fp16 = concat(axis = var_18079, interleave = input_929_interleave_0, values = (x_1009_cast_fp16, var_18085_cast_fp16))[name = string("input_929_cast_fp16")]; tensor normed_981_axes_0 = const()[name = string("normed_981_axes_0"), val = tensor([-1])]; fp16 var_18077_to_fp16 = const()[name = string("op_18077_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_981_cast_fp16 = layer_norm(axes = normed_981_axes_0, epsilon = var_18077_to_fp16, x = input_929_cast_fp16)[name = string("normed_981_cast_fp16")]; tensor var_18090_split_sizes_0 = const()[name = string("op_18090_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_18090_axis_0 = const()[name = string("op_18090_axis_0"), val = int32(-1)]; tensor var_18090_cast_fp16_0, tensor var_18090_cast_fp16_1 = split(axis = var_18090_axis_0, split_sizes = var_18090_split_sizes_0, x = normed_981_cast_fp16)[name = string("op_18090_cast_fp16")]; tensor const_567_to_fp16 = const()[name = string("const_567_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2312268224)))]; tensor var_18093_cast_fp16 = mul(x = var_18090_cast_fp16_0, y = const_567_to_fp16)[name = string("op_18093_cast_fp16")]; tensor var_18106 = const()[name = string("op_18106"), val = tensor([0, 2, 1])]; tensor input_931_axes_0 = const()[name = string("input_931_axes_0"), val = tensor([2])]; tensor var_18107 = transpose(perm = var_18106, x = var_18093_cast_fp16)[name = string("transpose_17")]; tensor input_931 = expand_dims(axes = input_931_axes_0, x = var_18107)[name = string("input_931")]; string gate_129_pad_type_0 = const()[name = string("gate_129_pad_type_0"), val = string("valid")]; tensor gate_129_strides_0 = const()[name = string("gate_129_strides_0"), val = tensor([1, 1])]; tensor gate_129_pad_0 = const()[name = string("gate_129_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_129_dilations_0 = const()[name = string("gate_129_dilations_0"), val = tensor([1, 1])]; int32 gate_129_groups_0 = const()[name = string("gate_129_groups_0"), val = int32(1)]; tensor gate_129 = conv(dilations = gate_129_dilations_0, groups = gate_129_groups_0, pad = gate_129_pad_0, pad_type = gate_129_pad_type_0, strides = gate_129_strides_0, weight = layers_32_mlp_gate_proj_weight_palettized, x = input_931)[name = string("gate_129")]; string up_65_pad_type_0 = const()[name = string("up_65_pad_type_0"), val = string("valid")]; tensor up_65_strides_0 = const()[name = string("up_65_strides_0"), val = tensor([1, 1])]; tensor up_65_pad_0 = const()[name = string("up_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_65_dilations_0 = const()[name = string("up_65_dilations_0"), val = tensor([1, 1])]; int32 up_65_groups_0 = const()[name = string("up_65_groups_0"), val = int32(1)]; tensor up_65 = conv(dilations = up_65_dilations_0, groups = up_65_groups_0, pad = up_65_pad_0, pad_type = up_65_pad_type_0, strides = up_65_strides_0, weight = layers_32_mlp_up_proj_weight_palettized, x = input_931)[name = string("up_65")]; string gate_131_mode_0 = const()[name = string("gate_131_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_131 = gelu(mode = gate_131_mode_0, x = gate_129)[name = string("gate_131")]; tensor input_933 = mul(x = gate_131, y = up_65)[name = string("input_933")]; string mlp_out_65_pad_type_0 = const()[name = string("mlp_out_65_pad_type_0"), val = string("valid")]; tensor mlp_out_65_strides_0 = const()[name = string("mlp_out_65_strides_0"), val = tensor([1, 1])]; tensor mlp_out_65_pad_0 = const()[name = string("mlp_out_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_65_dilations_0 = const()[name = string("mlp_out_65_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_65_groups_0 = const()[name = string("mlp_out_65_groups_0"), val = int32(1)]; tensor mlp_out_65 = conv(dilations = mlp_out_65_dilations_0, groups = mlp_out_65_groups_0, pad = mlp_out_65_pad_0, pad_type = mlp_out_65_pad_type_0, strides = mlp_out_65_strides_0, weight = layers_32_mlp_down_proj_weight_palettized, x = input_933)[name = string("mlp_out_65")]; tensor var_18147_axes_0 = const()[name = string("op_18147_axes_0"), val = tensor([2])]; tensor var_18147 = squeeze(axes = var_18147_axes_0, x = mlp_out_65)[name = string("op_18147")]; tensor var_18151 = const()[name = string("op_18151"), val = tensor([0, 2, 1])]; int32 var_18157 = const()[name = string("op_18157"), val = int32(-1)]; fp16 const_568_promoted_to_fp16 = const()[name = string("const_568_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_1013 = transpose(perm = var_18151, x = var_18147)[name = string("transpose_16")]; tensor var_18163_cast_fp16 = mul(x = x_1013, y = const_568_promoted_to_fp16)[name = string("op_18163_cast_fp16")]; bool input_935_interleave_0 = const()[name = string("input_935_interleave_0"), val = bool(false)]; tensor input_935_cast_fp16 = concat(axis = var_18157, interleave = input_935_interleave_0, values = (x_1013, var_18163_cast_fp16))[name = string("input_935_cast_fp16")]; tensor normed_985_axes_0 = const()[name = string("normed_985_axes_0"), val = tensor([-1])]; fp16 var_18155_to_fp16 = const()[name = string("op_18155_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_985_cast_fp16 = layer_norm(axes = normed_985_axes_0, epsilon = var_18155_to_fp16, x = input_935_cast_fp16)[name = string("normed_985_cast_fp16")]; tensor var_18168_split_sizes_0 = const()[name = string("op_18168_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_18168_axis_0 = const()[name = string("op_18168_axis_0"), val = int32(-1)]; tensor var_18168_cast_fp16_0, tensor var_18168_cast_fp16_1 = split(axis = var_18168_axis_0, split_sizes = var_18168_split_sizes_0, x = normed_985_cast_fp16)[name = string("op_18168_cast_fp16")]; tensor const_569_to_fp16 = const()[name = string("const_569_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2312271360)))]; tensor var_18171_cast_fp16 = mul(x = var_18168_cast_fp16_0, y = const_569_to_fp16)[name = string("op_18171_cast_fp16")]; tensor hidden_states_397_cast_fp16 = add(x = x_1009_cast_fp16, y = var_18171_cast_fp16)[name = string("hidden_states_397_cast_fp16")]; tensor per_layer_slice_65_begin_0 = const()[name = string("per_layer_slice_65_begin_0"), val = tensor([0, 0, 8192])]; tensor per_layer_slice_65_end_0 = const()[name = string("per_layer_slice_65_end_0"), val = tensor([1, 1, 8448])]; tensor per_layer_slice_65_end_mask_0 = const()[name = string("per_layer_slice_65_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_65 = slice_by_index(begin = per_layer_slice_65_begin_0, end = per_layer_slice_65_end_0, end_mask = per_layer_slice_65_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_65")]; tensor gated_129 = linear(bias = linear_1_bias_0, weight = layers_32_per_layer_input_gate_weight_palettized, x = hidden_states_397_cast_fp16)[name = string("linear_65")]; string gated_131_mode_0 = const()[name = string("gated_131_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_131 = gelu(mode = gated_131_mode_0, x = gated_129)[name = string("gated_131")]; tensor input_939 = mul(x = gated_131, y = per_layer_slice_65)[name = string("input_939")]; tensor x_1017 = linear(bias = linear_2_bias_0, weight = layers_32_per_layer_projection_weight_palettized, x = input_939)[name = string("linear_66")]; int32 var_18208 = const()[name = string("op_18208"), val = int32(-1)]; fp16 const_570_promoted_to_fp16 = const()[name = string("const_570_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_18214_cast_fp16 = mul(x = x_1017, y = const_570_promoted_to_fp16)[name = string("op_18214_cast_fp16")]; bool input_941_interleave_0 = const()[name = string("input_941_interleave_0"), val = bool(false)]; tensor input_941_cast_fp16 = concat(axis = var_18208, interleave = input_941_interleave_0, values = (x_1017, var_18214_cast_fp16))[name = string("input_941_cast_fp16")]; tensor normed_989_axes_0 = const()[name = string("normed_989_axes_0"), val = tensor([-1])]; fp16 var_18206_to_fp16 = const()[name = string("op_18206_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_989_cast_fp16 = layer_norm(axes = normed_989_axes_0, epsilon = var_18206_to_fp16, x = input_941_cast_fp16)[name = string("normed_989_cast_fp16")]; tensor var_18219_split_sizes_0 = const()[name = string("op_18219_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_18219_axis_0 = const()[name = string("op_18219_axis_0"), val = int32(-1)]; tensor var_18219_cast_fp16_0, tensor var_18219_cast_fp16_1 = split(axis = var_18219_axis_0, split_sizes = var_18219_split_sizes_0, x = normed_989_cast_fp16)[name = string("op_18219_cast_fp16")]; tensor const_571_to_fp16 = const()[name = string("const_571_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2312274496)))]; tensor var_18222_cast_fp16 = mul(x = var_18219_cast_fp16_0, y = const_571_to_fp16)[name = string("op_18222_cast_fp16")]; tensor hidden_states_401_cast_fp16 = add(x = hidden_states_397_cast_fp16, y = var_18222_cast_fp16)[name = string("hidden_states_401_cast_fp16")]; tensor layers_32_layer_scalar_to_fp16 = const()[name = string("layers_32_layer_scalar_to_fp16"), val = tensor([0x1.bep-1])]; tensor x_1021_cast_fp16 = mul(x = hidden_states_401_cast_fp16, y = layers_32_layer_scalar_to_fp16)[name = string("x_1021_cast_fp16")]; int32 var_18230 = const()[name = string("op_18230"), val = int32(-1)]; fp16 const_572_promoted_to_fp16 = const()[name = string("const_572_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_18236_cast_fp16 = mul(x = x_1021_cast_fp16, y = const_572_promoted_to_fp16)[name = string("op_18236_cast_fp16")]; bool input_943_interleave_0 = const()[name = string("input_943_interleave_0"), val = bool(false)]; tensor input_943_cast_fp16 = concat(axis = var_18230, interleave = input_943_interleave_0, values = (x_1021_cast_fp16, var_18236_cast_fp16))[name = string("input_943_cast_fp16")]; tensor normed_993_axes_0 = const()[name = string("normed_993_axes_0"), val = tensor([-1])]; fp16 var_18228_to_fp16 = const()[name = string("op_18228_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_993_cast_fp16 = layer_norm(axes = normed_993_axes_0, epsilon = var_18228_to_fp16, x = input_943_cast_fp16)[name = string("normed_993_cast_fp16")]; tensor var_18241_split_sizes_0 = const()[name = string("op_18241_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_18241_axis_0 = const()[name = string("op_18241_axis_0"), val = int32(-1)]; tensor var_18241_cast_fp16_0, tensor var_18241_cast_fp16_1 = split(axis = var_18241_axis_0, split_sizes = var_18241_split_sizes_0, x = normed_993_cast_fp16)[name = string("op_18241_cast_fp16")]; tensor const_573_to_fp16 = const()[name = string("const_573_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2312277632)))]; tensor var_18244_cast_fp16 = mul(x = var_18241_cast_fp16_0, y = const_573_to_fp16)[name = string("op_18244_cast_fp16")]; tensor var_18252 = const()[name = string("op_18252"), val = tensor([0, 2, 1])]; tensor var_18255_axes_0 = const()[name = string("op_18255_axes_0"), val = tensor([2])]; tensor var_18253_cast_fp16 = transpose(perm = var_18252, x = var_18244_cast_fp16)[name = string("transpose_15")]; tensor var_18255_cast_fp16 = expand_dims(axes = var_18255_axes_0, x = var_18253_cast_fp16)[name = string("op_18255_cast_fp16")]; string var_18271_pad_type_0 = const()[name = string("op_18271_pad_type_0"), val = string("valid")]; tensor var_18271_strides_0 = const()[name = string("op_18271_strides_0"), val = tensor([1, 1])]; tensor var_18271_pad_0 = const()[name = string("op_18271_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_18271_dilations_0 = const()[name = string("op_18271_dilations_0"), val = tensor([1, 1])]; int32 var_18271_groups_0 = const()[name = string("op_18271_groups_0"), val = int32(1)]; tensor var_18271 = conv(dilations = var_18271_dilations_0, groups = var_18271_groups_0, pad = var_18271_pad_0, pad_type = var_18271_pad_type_0, strides = var_18271_strides_0, weight = layers_33_self_attn_q_proj_weight_palettized, x = var_18255_cast_fp16)[name = string("op_18271")]; tensor var_18276 = const()[name = string("op_18276"), val = tensor([1, 8, 256, 1])]; tensor var_18277 = reshape(shape = var_18276, x = var_18271)[name = string("op_18277")]; tensor var_18282 = const()[name = string("op_18282"), val = tensor([0, 1, 3, 2])]; tensor var_18292 = const()[name = string("op_18292"), val = tensor([1, 8, 256])]; tensor var_18283 = transpose(perm = var_18282, x = var_18277)[name = string("transpose_14")]; tensor x_1025 = reshape(shape = var_18292, x = var_18283)[name = string("x_1025")]; int32 var_18298 = const()[name = string("op_18298"), val = int32(-1)]; fp16 const_574_promoted_to_fp16 = const()[name = string("const_574_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_18304_cast_fp16 = mul(x = x_1025, y = const_574_promoted_to_fp16)[name = string("op_18304_cast_fp16")]; bool input_947_interleave_0 = const()[name = string("input_947_interleave_0"), val = bool(false)]; tensor input_947_cast_fp16 = concat(axis = var_18298, interleave = input_947_interleave_0, values = (x_1025, var_18304_cast_fp16))[name = string("input_947_cast_fp16")]; tensor normed_997_axes_0 = const()[name = string("normed_997_axes_0"), val = tensor([-1])]; fp16 var_18296_to_fp16 = const()[name = string("op_18296_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_997_cast_fp16 = layer_norm(axes = normed_997_axes_0, epsilon = var_18296_to_fp16, x = input_947_cast_fp16)[name = string("normed_997_cast_fp16")]; tensor var_18309_split_sizes_0 = const()[name = string("op_18309_split_sizes_0"), val = tensor([256, 256])]; int32 var_18309_axis_0 = const()[name = string("op_18309_axis_0"), val = int32(-1)]; tensor var_18309_cast_fp16_0, tensor var_18309_cast_fp16_1 = split(axis = var_18309_axis_0, split_sizes = var_18309_split_sizes_0, x = normed_997_cast_fp16)[name = string("op_18309_cast_fp16")]; tensor var_18312_cast_fp16 = mul(x = var_18309_cast_fp16_0, y = const_307_to_fp16)[name = string("op_18312_cast_fp16")]; tensor var_18318 = const()[name = string("op_18318"), val = tensor([1, 8, 1, 256])]; tensor q_231 = reshape(shape = var_18318, x = var_18312_cast_fp16)[name = string("q_231")]; tensor var_18320 = mul(x = q_231, y = cos_1)[name = string("op_18320")]; tensor var_18321_split_sizes_0 = const()[name = string("op_18321_split_sizes_0"), val = tensor([128, 128])]; int32 var_18321_axis_0 = const()[name = string("op_18321_axis_0"), val = int32(-1)]; tensor var_18321_0, tensor var_18321_1 = split(axis = var_18321_axis_0, split_sizes = var_18321_split_sizes_0, x = q_231)[name = string("op_18321")]; fp16 const_576_promoted = const()[name = string("const_576_promoted"), val = fp16(-0x1p+0)]; tensor var_18323 = mul(x = var_18321_1, y = const_576_promoted)[name = string("op_18323")]; int32 var_18325 = const()[name = string("op_18325"), val = int32(-1)]; bool var_18326_interleave_0 = const()[name = string("op_18326_interleave_0"), val = bool(false)]; tensor var_18326 = concat(axis = var_18325, interleave = var_18326_interleave_0, values = (var_18323, var_18321_0))[name = string("op_18326")]; tensor var_18327 = mul(x = var_18326, y = sin_1)[name = string("op_18327")]; tensor q_233 = add(x = var_18320, y = var_18327)[name = string("q_233")]; bool var_18351_transpose_x_0 = const()[name = string("op_18351_transpose_x_0"), val = bool(false)]; bool var_18351_transpose_y_0 = const()[name = string("op_18351_transpose_y_0"), val = bool(false)]; tensor var_18351_cast_fp16 = matmul(transpose_x = var_18351_transpose_x_0, transpose_y = var_18351_transpose_y_0, x = q_233, y = transpose_153_cast_fp16)[name = string("op_18351_cast_fp16")]; tensor attn_weights_201_cast_fp16 = add(x = var_18351_cast_fp16, y = causal_mask)[name = string("attn_weights_201_cast_fp16")]; int32 var_18361 = const()[name = string("op_18361"), val = int32(-1)]; tensor var_18363_cast_fp16 = softmax(axis = var_18361, x = attn_weights_201_cast_fp16)[name = string("op_18363_cast_fp16")]; bool var_18379_transpose_x_0 = const()[name = string("op_18379_transpose_x_0"), val = bool(false)]; bool var_18379_transpose_y_0 = const()[name = string("op_18379_transpose_y_0"), val = bool(false)]; tensor var_18379_cast_fp16 = matmul(transpose_x = var_18379_transpose_x_0, transpose_y = var_18379_transpose_y_0, x = var_18363_cast_fp16, y = V_expanded_27_cast_fp16)[name = string("op_18379_cast_fp16")]; tensor var_18389 = const()[name = string("op_18389"), val = tensor([0, 2, 1, 3])]; tensor var_18396 = const()[name = string("op_18396"), val = tensor([1, 1, -1])]; tensor var_18390 = transpose(perm = var_18389, x = var_18379_cast_fp16)[name = string("transpose_13")]; tensor attn_output_201 = reshape(shape = var_18396, x = var_18390)[name = string("attn_output_201")]; tensor var_18401 = const()[name = string("op_18401"), val = tensor([0, 2, 1])]; tensor squeeze_33_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2312280768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2313853696))))[name = string("squeeze_33_palettized")]; string var_18417_pad_type_0 = const()[name = string("op_18417_pad_type_0"), val = string("valid")]; int32 var_18417_groups_0 = const()[name = string("op_18417_groups_0"), val = int32(1)]; tensor var_18417_strides_0 = const()[name = string("op_18417_strides_0"), val = tensor([1])]; tensor var_18417_pad_0 = const()[name = string("op_18417_pad_0"), val = tensor([0, 0])]; tensor var_18417_dilations_0 = const()[name = string("op_18417_dilations_0"), val = tensor([1])]; tensor var_18402 = transpose(perm = var_18401, x = attn_output_201)[name = string("transpose_12")]; tensor var_18417 = conv(dilations = var_18417_dilations_0, groups = var_18417_groups_0, pad = var_18417_pad_0, pad_type = var_18417_pad_type_0, strides = var_18417_strides_0, weight = squeeze_33_palettized, x = var_18402)[name = string("op_18417")]; tensor var_18421 = const()[name = string("op_18421"), val = tensor([0, 2, 1])]; int32 var_18427 = const()[name = string("op_18427"), val = int32(-1)]; fp16 const_577_promoted_to_fp16 = const()[name = string("const_577_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_1029 = transpose(perm = var_18421, x = var_18417)[name = string("transpose_11")]; tensor var_18433_cast_fp16 = mul(x = x_1029, y = const_577_promoted_to_fp16)[name = string("op_18433_cast_fp16")]; bool input_951_interleave_0 = const()[name = string("input_951_interleave_0"), val = bool(false)]; tensor input_951_cast_fp16 = concat(axis = var_18427, interleave = input_951_interleave_0, values = (x_1029, var_18433_cast_fp16))[name = string("input_951_cast_fp16")]; tensor normed_1001_axes_0 = const()[name = string("normed_1001_axes_0"), val = tensor([-1])]; fp16 var_18425_to_fp16 = const()[name = string("op_18425_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_1001_cast_fp16 = layer_norm(axes = normed_1001_axes_0, epsilon = var_18425_to_fp16, x = input_951_cast_fp16)[name = string("normed_1001_cast_fp16")]; tensor var_18438_split_sizes_0 = const()[name = string("op_18438_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_18438_axis_0 = const()[name = string("op_18438_axis_0"), val = int32(-1)]; tensor var_18438_cast_fp16_0, tensor var_18438_cast_fp16_1 = split(axis = var_18438_axis_0, split_sizes = var_18438_split_sizes_0, x = normed_1001_cast_fp16)[name = string("op_18438_cast_fp16")]; tensor const_578_to_fp16 = const()[name = string("const_578_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2313855296)))]; tensor var_18441_cast_fp16 = mul(x = var_18438_cast_fp16_0, y = const_578_to_fp16)[name = string("op_18441_cast_fp16")]; tensor x_1033_cast_fp16 = add(x = x_1021_cast_fp16, y = var_18441_cast_fp16)[name = string("x_1033_cast_fp16")]; int32 var_18448 = const()[name = string("op_18448"), val = int32(-1)]; fp16 const_579_promoted_to_fp16 = const()[name = string("const_579_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_18454_cast_fp16 = mul(x = x_1033_cast_fp16, y = const_579_promoted_to_fp16)[name = string("op_18454_cast_fp16")]; bool input_953_interleave_0 = const()[name = string("input_953_interleave_0"), val = bool(false)]; tensor input_953_cast_fp16 = concat(axis = var_18448, interleave = input_953_interleave_0, values = (x_1033_cast_fp16, var_18454_cast_fp16))[name = string("input_953_cast_fp16")]; tensor normed_1005_axes_0 = const()[name = string("normed_1005_axes_0"), val = tensor([-1])]; fp16 var_18446_to_fp16 = const()[name = string("op_18446_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_1005_cast_fp16 = layer_norm(axes = normed_1005_axes_0, epsilon = var_18446_to_fp16, x = input_953_cast_fp16)[name = string("normed_1005_cast_fp16")]; tensor var_18459_split_sizes_0 = const()[name = string("op_18459_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_18459_axis_0 = const()[name = string("op_18459_axis_0"), val = int32(-1)]; tensor var_18459_cast_fp16_0, tensor var_18459_cast_fp16_1 = split(axis = var_18459_axis_0, split_sizes = var_18459_split_sizes_0, x = normed_1005_cast_fp16)[name = string("op_18459_cast_fp16")]; tensor const_580_to_fp16 = const()[name = string("const_580_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2313858432)))]; tensor var_18462_cast_fp16 = mul(x = var_18459_cast_fp16_0, y = const_580_to_fp16)[name = string("op_18462_cast_fp16")]; tensor var_18475 = const()[name = string("op_18475"), val = tensor([0, 2, 1])]; tensor input_955_axes_0 = const()[name = string("input_955_axes_0"), val = tensor([2])]; tensor var_18476 = transpose(perm = var_18475, x = var_18462_cast_fp16)[name = string("transpose_10")]; tensor input_955 = expand_dims(axes = input_955_axes_0, x = var_18476)[name = string("input_955")]; string gate_133_pad_type_0 = const()[name = string("gate_133_pad_type_0"), val = string("valid")]; tensor gate_133_strides_0 = const()[name = string("gate_133_strides_0"), val = tensor([1, 1])]; tensor gate_133_pad_0 = const()[name = string("gate_133_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_133_dilations_0 = const()[name = string("gate_133_dilations_0"), val = tensor([1, 1])]; int32 gate_133_groups_0 = const()[name = string("gate_133_groups_0"), val = int32(1)]; tensor gate_133 = conv(dilations = gate_133_dilations_0, groups = gate_133_groups_0, pad = gate_133_pad_0, pad_type = gate_133_pad_type_0, strides = gate_133_strides_0, weight = layers_33_mlp_gate_proj_weight_palettized, x = input_955)[name = string("gate_133")]; string up_67_pad_type_0 = const()[name = string("up_67_pad_type_0"), val = string("valid")]; tensor up_67_strides_0 = const()[name = string("up_67_strides_0"), val = tensor([1, 1])]; tensor up_67_pad_0 = const()[name = string("up_67_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_67_dilations_0 = const()[name = string("up_67_dilations_0"), val = tensor([1, 1])]; int32 up_67_groups_0 = const()[name = string("up_67_groups_0"), val = int32(1)]; tensor up_67 = conv(dilations = up_67_dilations_0, groups = up_67_groups_0, pad = up_67_pad_0, pad_type = up_67_pad_type_0, strides = up_67_strides_0, weight = layers_33_mlp_up_proj_weight_palettized, x = input_955)[name = string("up_67")]; string gate_135_mode_0 = const()[name = string("gate_135_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate_135 = gelu(mode = gate_135_mode_0, x = gate_133)[name = string("gate_135")]; tensor input_957 = mul(x = gate_135, y = up_67)[name = string("input_957")]; string mlp_out_67_pad_type_0 = const()[name = string("mlp_out_67_pad_type_0"), val = string("valid")]; tensor mlp_out_67_strides_0 = const()[name = string("mlp_out_67_strides_0"), val = tensor([1, 1])]; tensor mlp_out_67_pad_0 = const()[name = string("mlp_out_67_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_67_dilations_0 = const()[name = string("mlp_out_67_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_67_groups_0 = const()[name = string("mlp_out_67_groups_0"), val = int32(1)]; tensor mlp_out_67 = conv(dilations = mlp_out_67_dilations_0, groups = mlp_out_67_groups_0, pad = mlp_out_67_pad_0, pad_type = mlp_out_67_pad_type_0, strides = mlp_out_67_strides_0, weight = layers_33_mlp_down_proj_weight_palettized, x = input_957)[name = string("mlp_out_67")]; tensor var_18516_axes_0 = const()[name = string("op_18516_axes_0"), val = tensor([2])]; tensor var_18516 = squeeze(axes = var_18516_axes_0, x = mlp_out_67)[name = string("op_18516")]; tensor var_18520 = const()[name = string("op_18520"), val = tensor([0, 2, 1])]; int32 var_18526 = const()[name = string("op_18526"), val = int32(-1)]; fp16 const_581_promoted_to_fp16 = const()[name = string("const_581_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_1037 = transpose(perm = var_18520, x = var_18516)[name = string("transpose_9")]; tensor var_18532_cast_fp16 = mul(x = x_1037, y = const_581_promoted_to_fp16)[name = string("op_18532_cast_fp16")]; bool input_959_interleave_0 = const()[name = string("input_959_interleave_0"), val = bool(false)]; tensor input_959_cast_fp16 = concat(axis = var_18526, interleave = input_959_interleave_0, values = (x_1037, var_18532_cast_fp16))[name = string("input_959_cast_fp16")]; tensor normed_1009_axes_0 = const()[name = string("normed_1009_axes_0"), val = tensor([-1])]; fp16 var_18524_to_fp16 = const()[name = string("op_18524_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_1009_cast_fp16 = layer_norm(axes = normed_1009_axes_0, epsilon = var_18524_to_fp16, x = input_959_cast_fp16)[name = string("normed_1009_cast_fp16")]; tensor var_18537_split_sizes_0 = const()[name = string("op_18537_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_18537_axis_0 = const()[name = string("op_18537_axis_0"), val = int32(-1)]; tensor var_18537_cast_fp16_0, tensor var_18537_cast_fp16_1 = split(axis = var_18537_axis_0, split_sizes = var_18537_split_sizes_0, x = normed_1009_cast_fp16)[name = string("op_18537_cast_fp16")]; tensor const_582_to_fp16 = const()[name = string("const_582_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2313861568)))]; tensor var_18540_cast_fp16 = mul(x = var_18537_cast_fp16_0, y = const_582_to_fp16)[name = string("op_18540_cast_fp16")]; tensor hidden_states_409_cast_fp16 = add(x = x_1033_cast_fp16, y = var_18540_cast_fp16)[name = string("hidden_states_409_cast_fp16")]; tensor per_layer_slice_67_begin_0 = const()[name = string("per_layer_slice_67_begin_0"), val = tensor([0, 0, 8448])]; tensor per_layer_slice_67_end_0 = const()[name = string("per_layer_slice_67_end_0"), val = tensor([1, 1, 8704])]; tensor per_layer_slice_67_end_mask_0 = const()[name = string("per_layer_slice_67_end_mask_0"), val = tensor([true, true, false])]; tensor per_layer_slice_67 = slice_by_index(begin = per_layer_slice_67_begin_0, end = per_layer_slice_67_end_0, end_mask = per_layer_slice_67_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_67")]; tensor gated_133 = linear(bias = linear_1_bias_0, weight = layers_33_per_layer_input_gate_weight_palettized, x = hidden_states_409_cast_fp16)[name = string("linear_67")]; string gated_135_mode_0 = const()[name = string("gated_135_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated_135 = gelu(mode = gated_135_mode_0, x = gated_133)[name = string("gated_135")]; tensor input_963 = mul(x = gated_135, y = per_layer_slice_67)[name = string("input_963")]; tensor x_1041 = linear(bias = linear_2_bias_0, weight = layers_33_per_layer_projection_weight_palettized, x = input_963)[name = string("linear_68")]; int32 var_18577 = const()[name = string("op_18577"), val = int32(-1)]; fp16 const_583_promoted_to_fp16 = const()[name = string("const_583_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_18583_cast_fp16 = mul(x = x_1041, y = const_583_promoted_to_fp16)[name = string("op_18583_cast_fp16")]; bool input_965_interleave_0 = const()[name = string("input_965_interleave_0"), val = bool(false)]; tensor input_965_cast_fp16 = concat(axis = var_18577, interleave = input_965_interleave_0, values = (x_1041, var_18583_cast_fp16))[name = string("input_965_cast_fp16")]; tensor normed_1013_axes_0 = const()[name = string("normed_1013_axes_0"), val = tensor([-1])]; fp16 var_18575_to_fp16 = const()[name = string("op_18575_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_1013_cast_fp16 = layer_norm(axes = normed_1013_axes_0, epsilon = var_18575_to_fp16, x = input_965_cast_fp16)[name = string("normed_1013_cast_fp16")]; tensor var_18588_split_sizes_0 = const()[name = string("op_18588_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_18588_axis_0 = const()[name = string("op_18588_axis_0"), val = int32(-1)]; tensor var_18588_cast_fp16_0, tensor var_18588_cast_fp16_1 = split(axis = var_18588_axis_0, split_sizes = var_18588_split_sizes_0, x = normed_1013_cast_fp16)[name = string("op_18588_cast_fp16")]; tensor const_584_to_fp16 = const()[name = string("const_584_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2313864704)))]; tensor var_18591_cast_fp16 = mul(x = var_18588_cast_fp16_0, y = const_584_to_fp16)[name = string("op_18591_cast_fp16")]; tensor hidden_states_413_cast_fp16 = add(x = hidden_states_409_cast_fp16, y = var_18591_cast_fp16)[name = string("hidden_states_413_cast_fp16")]; tensor layers_33_layer_scalar_to_fp16 = const()[name = string("layers_33_layer_scalar_to_fp16"), val = tensor([0x1.64p-1])]; tensor x_1045_cast_fp16 = mul(x = hidden_states_413_cast_fp16, y = layers_33_layer_scalar_to_fp16)[name = string("x_1045_cast_fp16")]; int32 var_18599 = const()[name = string("op_18599"), val = int32(-1)]; fp16 const_585_promoted_to_fp16 = const()[name = string("const_585_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_18605_cast_fp16 = mul(x = x_1045_cast_fp16, y = const_585_promoted_to_fp16)[name = string("op_18605_cast_fp16")]; bool input_967_interleave_0 = const()[name = string("input_967_interleave_0"), val = bool(false)]; tensor input_967_cast_fp16 = concat(axis = var_18599, interleave = input_967_interleave_0, values = (x_1045_cast_fp16, var_18605_cast_fp16))[name = string("input_967_cast_fp16")]; tensor normed_1017_axes_0 = const()[name = string("normed_1017_axes_0"), val = tensor([-1])]; fp16 var_18597_to_fp16 = const()[name = string("op_18597_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_1017_cast_fp16 = layer_norm(axes = normed_1017_axes_0, epsilon = var_18597_to_fp16, x = input_967_cast_fp16)[name = string("normed_1017_cast_fp16")]; tensor var_18610_split_sizes_0 = const()[name = string("op_18610_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_18610_axis_0 = const()[name = string("op_18610_axis_0"), val = int32(-1)]; tensor var_18610_cast_fp16_0, tensor var_18610_cast_fp16_1 = split(axis = var_18610_axis_0, split_sizes = var_18610_split_sizes_0, x = normed_1017_cast_fp16)[name = string("op_18610_cast_fp16")]; tensor const_586_to_fp16 = const()[name = string("const_586_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2313867840)))]; tensor var_18613_cast_fp16 = mul(x = var_18610_cast_fp16_0, y = const_586_to_fp16)[name = string("op_18613_cast_fp16")]; tensor var_18621 = const()[name = string("op_18621"), val = tensor([0, 2, 1])]; tensor var_18624_axes_0 = const()[name = string("op_18624_axes_0"), val = tensor([2])]; tensor var_18622_cast_fp16 = transpose(perm = var_18621, x = var_18613_cast_fp16)[name = string("transpose_8")]; tensor var_18624_cast_fp16 = expand_dims(axes = var_18624_axes_0, x = var_18622_cast_fp16)[name = string("op_18624_cast_fp16")]; string var_18640_pad_type_0 = const()[name = string("op_18640_pad_type_0"), val = string("valid")]; tensor var_18640_strides_0 = const()[name = string("op_18640_strides_0"), val = tensor([1, 1])]; tensor var_18640_pad_0 = const()[name = string("op_18640_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_18640_dilations_0 = const()[name = string("op_18640_dilations_0"), val = tensor([1, 1])]; int32 var_18640_groups_0 = const()[name = string("op_18640_groups_0"), val = int32(1)]; tensor var_18640 = conv(dilations = var_18640_dilations_0, groups = var_18640_groups_0, pad = var_18640_pad_0, pad_type = var_18640_pad_type_0, strides = var_18640_strides_0, weight = layers_34_self_attn_q_proj_weight_palettized, x = var_18624_cast_fp16)[name = string("op_18640")]; tensor var_18645 = const()[name = string("op_18645"), val = tensor([1, 8, 512, 1])]; tensor var_18646 = reshape(shape = var_18645, x = var_18640)[name = string("op_18646")]; tensor var_18651 = const()[name = string("op_18651"), val = tensor([0, 1, 3, 2])]; tensor var_18661 = const()[name = string("op_18661"), val = tensor([1, 8, 512])]; tensor var_18652 = transpose(perm = var_18651, x = var_18646)[name = string("transpose_7")]; tensor x_1049 = reshape(shape = var_18661, x = var_18652)[name = string("x_1049")]; int32 var_18667 = const()[name = string("op_18667"), val = int32(-1)]; fp16 const_587_promoted_to_fp16 = const()[name = string("const_587_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_18673_cast_fp16 = mul(x = x_1049, y = const_587_promoted_to_fp16)[name = string("op_18673_cast_fp16")]; bool input_971_interleave_0 = const()[name = string("input_971_interleave_0"), val = bool(false)]; tensor input_971_cast_fp16 = concat(axis = var_18667, interleave = input_971_interleave_0, values = (x_1049, var_18673_cast_fp16))[name = string("input_971_cast_fp16")]; tensor normed_1021_axes_0 = const()[name = string("normed_1021_axes_0"), val = tensor([-1])]; fp16 var_18665_to_fp16 = const()[name = string("op_18665_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_1021_cast_fp16 = layer_norm(axes = normed_1021_axes_0, epsilon = var_18665_to_fp16, x = input_971_cast_fp16)[name = string("normed_1021_cast_fp16")]; tensor var_18678_split_sizes_0 = const()[name = string("op_18678_split_sizes_0"), val = tensor([512, 512])]; int32 var_18678_axis_0 = const()[name = string("op_18678_axis_0"), val = int32(-1)]; tensor var_18678_cast_fp16_0, tensor var_18678_cast_fp16_1 = split(axis = var_18678_axis_0, split_sizes = var_18678_split_sizes_0, x = normed_1021_cast_fp16)[name = string("op_18678_cast_fp16")]; tensor var_18681_cast_fp16 = mul(x = var_18678_cast_fp16_0, y = const_325_to_fp16)[name = string("op_18681_cast_fp16")]; tensor var_18687 = const()[name = string("op_18687"), val = tensor([1, 8, 1, 512])]; tensor q_237 = reshape(shape = var_18687, x = var_18681_cast_fp16)[name = string("q_237")]; tensor var_18689 = mul(x = q_237, y = cos)[name = string("op_18689")]; tensor var_18690_split_sizes_0 = const()[name = string("op_18690_split_sizes_0"), val = tensor([256, 256])]; int32 var_18690_axis_0 = const()[name = string("op_18690_axis_0"), val = int32(-1)]; tensor var_18690_0, tensor var_18690_1 = split(axis = var_18690_axis_0, split_sizes = var_18690_split_sizes_0, x = q_237)[name = string("op_18690")]; fp16 const_589_promoted = const()[name = string("const_589_promoted"), val = fp16(-0x1p+0)]; tensor var_18692 = mul(x = var_18690_1, y = const_589_promoted)[name = string("op_18692")]; int32 var_18694 = const()[name = string("op_18694"), val = int32(-1)]; bool var_18695_interleave_0 = const()[name = string("op_18695_interleave_0"), val = bool(false)]; tensor var_18695 = concat(axis = var_18694, interleave = var_18695_interleave_0, values = (var_18692, var_18690_0))[name = string("op_18695")]; tensor var_18696 = mul(x = var_18695, y = sin)[name = string("op_18696")]; tensor q = add(x = var_18689, y = var_18696)[name = string("q")]; bool var_18720_transpose_x_0 = const()[name = string("op_18720_transpose_x_0"), val = bool(false)]; bool var_18720_transpose_y_0 = const()[name = string("op_18720_transpose_y_0"), val = bool(false)]; tensor var_18720_cast_fp16 = matmul(transpose_x = var_18720_transpose_x_0, transpose_y = var_18720_transpose_y_0, x = q, y = transpose_154_cast_fp16)[name = string("op_18720_cast_fp16")]; tensor attn_weights_207_cast_fp16 = add(x = var_18720_cast_fp16, y = causal_mask)[name = string("attn_weights_207_cast_fp16")]; int32 var_18730 = const()[name = string("op_18730"), val = int32(-1)]; tensor var_18732_cast_fp16 = softmax(axis = var_18730, x = attn_weights_207_cast_fp16)[name = string("op_18732_cast_fp16")]; bool var_18748_transpose_x_0 = const()[name = string("op_18748_transpose_x_0"), val = bool(false)]; bool var_18748_transpose_y_0 = const()[name = string("op_18748_transpose_y_0"), val = bool(false)]; tensor var_18748_cast_fp16 = matmul(transpose_x = var_18748_transpose_x_0, transpose_y = var_18748_transpose_y_0, x = var_18732_cast_fp16, y = V_expanded_29_cast_fp16)[name = string("op_18748_cast_fp16")]; tensor var_18758 = const()[name = string("op_18758"), val = tensor([0, 2, 1, 3])]; tensor var_18765 = const()[name = string("op_18765"), val = tensor([1, 1, -1])]; tensor var_18759 = transpose(perm = var_18758, x = var_18748_cast_fp16)[name = string("transpose_6")]; tensor attn_output_207 = reshape(shape = var_18765, x = var_18759)[name = string("attn_output_207")]; tensor var_18770 = const()[name = string("op_18770"), val = tensor([0, 2, 1])]; tensor squeeze_34_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2313870976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2317016768))))[name = string("squeeze_34_palettized")]; string var_18786_pad_type_0 = const()[name = string("op_18786_pad_type_0"), val = string("valid")]; int32 var_18786_groups_0 = const()[name = string("op_18786_groups_0"), val = int32(1)]; tensor var_18786_strides_0 = const()[name = string("op_18786_strides_0"), val = tensor([1])]; tensor var_18786_pad_0 = const()[name = string("op_18786_pad_0"), val = tensor([0, 0])]; tensor var_18786_dilations_0 = const()[name = string("op_18786_dilations_0"), val = tensor([1])]; tensor var_18771 = transpose(perm = var_18770, x = attn_output_207)[name = string("transpose_5")]; tensor var_18786 = conv(dilations = var_18786_dilations_0, groups = var_18786_groups_0, pad = var_18786_pad_0, pad_type = var_18786_pad_type_0, strides = var_18786_strides_0, weight = squeeze_34_palettized, x = var_18771)[name = string("op_18786")]; tensor var_18790 = const()[name = string("op_18790"), val = tensor([0, 2, 1])]; int32 var_18796 = const()[name = string("op_18796"), val = int32(-1)]; fp16 const_590_promoted_to_fp16 = const()[name = string("const_590_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_1053 = transpose(perm = var_18790, x = var_18786)[name = string("transpose_4")]; tensor var_18802_cast_fp16 = mul(x = x_1053, y = const_590_promoted_to_fp16)[name = string("op_18802_cast_fp16")]; bool input_975_interleave_0 = const()[name = string("input_975_interleave_0"), val = bool(false)]; tensor input_975_cast_fp16 = concat(axis = var_18796, interleave = input_975_interleave_0, values = (x_1053, var_18802_cast_fp16))[name = string("input_975_cast_fp16")]; tensor normed_1025_axes_0 = const()[name = string("normed_1025_axes_0"), val = tensor([-1])]; fp16 var_18794_to_fp16 = const()[name = string("op_18794_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_1025_cast_fp16 = layer_norm(axes = normed_1025_axes_0, epsilon = var_18794_to_fp16, x = input_975_cast_fp16)[name = string("normed_1025_cast_fp16")]; tensor var_18807_split_sizes_0 = const()[name = string("op_18807_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_18807_axis_0 = const()[name = string("op_18807_axis_0"), val = int32(-1)]; tensor var_18807_cast_fp16_0, tensor var_18807_cast_fp16_1 = split(axis = var_18807_axis_0, split_sizes = var_18807_split_sizes_0, x = normed_1025_cast_fp16)[name = string("op_18807_cast_fp16")]; tensor const_591_to_fp16 = const()[name = string("const_591_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2317018368)))]; tensor var_18810_cast_fp16 = mul(x = var_18807_cast_fp16_0, y = const_591_to_fp16)[name = string("op_18810_cast_fp16")]; tensor x_1057_cast_fp16 = add(x = x_1045_cast_fp16, y = var_18810_cast_fp16)[name = string("x_1057_cast_fp16")]; int32 var_18817 = const()[name = string("op_18817"), val = int32(-1)]; fp16 const_592_promoted_to_fp16 = const()[name = string("const_592_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_18823_cast_fp16 = mul(x = x_1057_cast_fp16, y = const_592_promoted_to_fp16)[name = string("op_18823_cast_fp16")]; bool input_977_interleave_0 = const()[name = string("input_977_interleave_0"), val = bool(false)]; tensor input_977_cast_fp16 = concat(axis = var_18817, interleave = input_977_interleave_0, values = (x_1057_cast_fp16, var_18823_cast_fp16))[name = string("input_977_cast_fp16")]; tensor normed_1029_axes_0 = const()[name = string("normed_1029_axes_0"), val = tensor([-1])]; fp16 var_18815_to_fp16 = const()[name = string("op_18815_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_1029_cast_fp16 = layer_norm(axes = normed_1029_axes_0, epsilon = var_18815_to_fp16, x = input_977_cast_fp16)[name = string("normed_1029_cast_fp16")]; tensor var_18828_split_sizes_0 = const()[name = string("op_18828_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_18828_axis_0 = const()[name = string("op_18828_axis_0"), val = int32(-1)]; tensor var_18828_cast_fp16_0, tensor var_18828_cast_fp16_1 = split(axis = var_18828_axis_0, split_sizes = var_18828_split_sizes_0, x = normed_1029_cast_fp16)[name = string("op_18828_cast_fp16")]; tensor const_593_to_fp16 = const()[name = string("const_593_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2317021504)))]; tensor var_18831_cast_fp16 = mul(x = var_18828_cast_fp16_0, y = const_593_to_fp16)[name = string("op_18831_cast_fp16")]; tensor var_18844 = const()[name = string("op_18844"), val = tensor([0, 2, 1])]; tensor input_979_axes_0 = const()[name = string("input_979_axes_0"), val = tensor([2])]; tensor var_18845 = transpose(perm = var_18844, x = var_18831_cast_fp16)[name = string("transpose_3")]; tensor input_979 = expand_dims(axes = input_979_axes_0, x = var_18845)[name = string("input_979")]; string gate_137_pad_type_0 = const()[name = string("gate_137_pad_type_0"), val = string("valid")]; tensor gate_137_strides_0 = const()[name = string("gate_137_strides_0"), val = tensor([1, 1])]; tensor gate_137_pad_0 = const()[name = string("gate_137_pad_0"), val = tensor([0, 0, 0, 0])]; tensor gate_137_dilations_0 = const()[name = string("gate_137_dilations_0"), val = tensor([1, 1])]; int32 gate_137_groups_0 = const()[name = string("gate_137_groups_0"), val = int32(1)]; tensor gate_137 = conv(dilations = gate_137_dilations_0, groups = gate_137_groups_0, pad = gate_137_pad_0, pad_type = gate_137_pad_type_0, strides = gate_137_strides_0, weight = layers_34_mlp_gate_proj_weight_palettized, x = input_979)[name = string("gate_137")]; string up_pad_type_0 = const()[name = string("up_pad_type_0"), val = string("valid")]; tensor up_strides_0 = const()[name = string("up_strides_0"), val = tensor([1, 1])]; tensor up_pad_0 = const()[name = string("up_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_dilations_0 = const()[name = string("up_dilations_0"), val = tensor([1, 1])]; int32 up_groups_0 = const()[name = string("up_groups_0"), val = int32(1)]; tensor up = conv(dilations = up_dilations_0, groups = up_groups_0, pad = up_pad_0, pad_type = up_pad_type_0, strides = up_strides_0, weight = layers_34_mlp_up_proj_weight_palettized, x = input_979)[name = string("up")]; string gate_mode_0 = const()[name = string("gate_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gate = gelu(mode = gate_mode_0, x = gate_137)[name = string("gate")]; tensor input_981 = mul(x = gate, y = up)[name = string("input_981")]; string mlp_out_pad_type_0 = const()[name = string("mlp_out_pad_type_0"), val = string("valid")]; tensor mlp_out_strides_0 = const()[name = string("mlp_out_strides_0"), val = tensor([1, 1])]; tensor mlp_out_pad_0 = const()[name = string("mlp_out_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mlp_out_dilations_0 = const()[name = string("mlp_out_dilations_0"), val = tensor([1, 1])]; int32 mlp_out_groups_0 = const()[name = string("mlp_out_groups_0"), val = int32(1)]; tensor mlp_out = conv(dilations = mlp_out_dilations_0, groups = mlp_out_groups_0, pad = mlp_out_pad_0, pad_type = mlp_out_pad_type_0, strides = mlp_out_strides_0, weight = layers_34_mlp_down_proj_weight_palettized, x = input_981)[name = string("mlp_out")]; tensor var_18885_axes_0 = const()[name = string("op_18885_axes_0"), val = tensor([2])]; tensor var_18885 = squeeze(axes = var_18885_axes_0, x = mlp_out)[name = string("op_18885")]; tensor var_18889 = const()[name = string("op_18889"), val = tensor([0, 2, 1])]; int32 var_18895 = const()[name = string("op_18895"), val = int32(-1)]; fp16 const_594_promoted_to_fp16 = const()[name = string("const_594_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor x_1061 = transpose(perm = var_18889, x = var_18885)[name = string("transpose_2")]; tensor var_18901_cast_fp16 = mul(x = x_1061, y = const_594_promoted_to_fp16)[name = string("op_18901_cast_fp16")]; bool input_983_interleave_0 = const()[name = string("input_983_interleave_0"), val = bool(false)]; tensor input_983_cast_fp16 = concat(axis = var_18895, interleave = input_983_interleave_0, values = (x_1061, var_18901_cast_fp16))[name = string("input_983_cast_fp16")]; tensor normed_1033_axes_0 = const()[name = string("normed_1033_axes_0"), val = tensor([-1])]; fp16 var_18893_to_fp16 = const()[name = string("op_18893_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_1033_cast_fp16 = layer_norm(axes = normed_1033_axes_0, epsilon = var_18893_to_fp16, x = input_983_cast_fp16)[name = string("normed_1033_cast_fp16")]; tensor var_18906_split_sizes_0 = const()[name = string("op_18906_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_18906_axis_0 = const()[name = string("op_18906_axis_0"), val = int32(-1)]; tensor var_18906_cast_fp16_0, tensor var_18906_cast_fp16_1 = split(axis = var_18906_axis_0, split_sizes = var_18906_split_sizes_0, x = normed_1033_cast_fp16)[name = string("op_18906_cast_fp16")]; tensor const_595_to_fp16 = const()[name = string("const_595_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2317024640)))]; tensor var_18909_cast_fp16 = mul(x = var_18906_cast_fp16_0, y = const_595_to_fp16)[name = string("op_18909_cast_fp16")]; tensor hidden_states_421_cast_fp16 = add(x = x_1057_cast_fp16, y = var_18909_cast_fp16)[name = string("hidden_states_421_cast_fp16")]; tensor per_layer_slice_begin_0 = const()[name = string("per_layer_slice_begin_0"), val = tensor([0, 0, 8704])]; tensor per_layer_slice_end_0 = const()[name = string("per_layer_slice_end_0"), val = tensor([1, 1, 1])]; tensor per_layer_slice_end_mask_0 = const()[name = string("per_layer_slice_end_mask_0"), val = tensor([true, true, true])]; tensor per_layer_slice = slice_by_index(begin = per_layer_slice_begin_0, end = per_layer_slice_end_0, end_mask = per_layer_slice_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice")]; tensor gated_137 = linear(bias = linear_1_bias_0, weight = layers_34_per_layer_input_gate_weight_palettized, x = hidden_states_421_cast_fp16)[name = string("linear_69")]; string gated_mode_0 = const()[name = string("gated_mode_0"), val = string("TANH_APPROXIMATION")]; tensor gated = gelu(mode = gated_mode_0, x = gated_137)[name = string("gated")]; tensor input_987 = mul(x = gated, y = per_layer_slice)[name = string("input_987")]; tensor x_1065 = linear(bias = linear_2_bias_0, weight = layers_34_per_layer_projection_weight_palettized, x = input_987)[name = string("linear_70")]; int32 var_18946 = const()[name = string("op_18946"), val = int32(-1)]; fp16 const_596_promoted_to_fp16 = const()[name = string("const_596_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_18952_cast_fp16 = mul(x = x_1065, y = const_596_promoted_to_fp16)[name = string("op_18952_cast_fp16")]; bool input_989_interleave_0 = const()[name = string("input_989_interleave_0"), val = bool(false)]; tensor input_989_cast_fp16 = concat(axis = var_18946, interleave = input_989_interleave_0, values = (x_1065, var_18952_cast_fp16))[name = string("input_989_cast_fp16")]; tensor normed_1037_axes_0 = const()[name = string("normed_1037_axes_0"), val = tensor([-1])]; fp16 var_18944_to_fp16 = const()[name = string("op_18944_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_1037_cast_fp16 = layer_norm(axes = normed_1037_axes_0, epsilon = var_18944_to_fp16, x = input_989_cast_fp16)[name = string("normed_1037_cast_fp16")]; tensor var_18957_split_sizes_0 = const()[name = string("op_18957_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_18957_axis_0 = const()[name = string("op_18957_axis_0"), val = int32(-1)]; tensor var_18957_cast_fp16_0, tensor var_18957_cast_fp16_1 = split(axis = var_18957_axis_0, split_sizes = var_18957_split_sizes_0, x = normed_1037_cast_fp16)[name = string("op_18957_cast_fp16")]; tensor const_597_to_fp16 = const()[name = string("const_597_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2317027776)))]; tensor var_18960_cast_fp16 = mul(x = var_18957_cast_fp16_0, y = const_597_to_fp16)[name = string("op_18960_cast_fp16")]; tensor hidden_states_425_cast_fp16 = add(x = hidden_states_421_cast_fp16, y = var_18960_cast_fp16)[name = string("hidden_states_425_cast_fp16")]; tensor layers_34_layer_scalar_to_fp16 = const()[name = string("layers_34_layer_scalar_to_fp16"), val = tensor([0x1.56p-3])]; tensor x_1069_cast_fp16 = mul(x = hidden_states_425_cast_fp16, y = layers_34_layer_scalar_to_fp16)[name = string("x_1069_cast_fp16")]; int32 var_18968 = const()[name = string("op_18968"), val = int32(-1)]; fp16 const_598_promoted_to_fp16 = const()[name = string("const_598_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_18974_cast_fp16 = mul(x = x_1069_cast_fp16, y = const_598_promoted_to_fp16)[name = string("op_18974_cast_fp16")]; bool input_991_interleave_0 = const()[name = string("input_991_interleave_0"), val = bool(false)]; tensor input_991_cast_fp16 = concat(axis = var_18968, interleave = input_991_interleave_0, values = (x_1069_cast_fp16, var_18974_cast_fp16))[name = string("input_991_cast_fp16")]; tensor normed_1041_axes_0 = const()[name = string("normed_1041_axes_0"), val = tensor([-1])]; fp16 var_18966_to_fp16 = const()[name = string("op_18966_to_fp16"), val = fp16(0x1.1p-20)]; tensor normed_1041_cast_fp16 = layer_norm(axes = normed_1041_axes_0, epsilon = var_18966_to_fp16, x = input_991_cast_fp16)[name = string("normed_1041_cast_fp16")]; tensor var_18979_split_sizes_0 = const()[name = string("op_18979_split_sizes_0"), val = tensor([1536, 1536])]; int32 var_18979_axis_0 = const()[name = string("op_18979_axis_0"), val = int32(-1)]; tensor var_18979_cast_fp16_0, tensor var_18979_cast_fp16_1 = split(axis = var_18979_axis_0, split_sizes = var_18979_split_sizes_0, x = normed_1041_cast_fp16)[name = string("op_18979_cast_fp16")]; tensor const_599_to_fp16 = const()[name = string("const_599_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2317030912)))]; tensor var_18982_cast_fp16 = mul(x = var_18979_cast_fp16_0, y = const_599_to_fp16)[name = string("op_18982_cast_fp16")]; tensor var_18992 = const()[name = string("op_18992"), val = tensor([0, 2, 1])]; tensor squeeze_35_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2317034048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2518360704))))[name = string("squeeze_35_palettized")]; string var_19008_pad_type_0 = const()[name = string("op_19008_pad_type_0"), val = string("valid")]; int32 var_19008_groups_0 = const()[name = string("op_19008_groups_0"), val = int32(1)]; tensor var_19008_strides_0 = const()[name = string("op_19008_strides_0"), val = tensor([1])]; tensor var_19008_pad_0 = const()[name = string("op_19008_pad_0"), val = tensor([0, 0])]; tensor var_19008_dilations_0 = const()[name = string("op_19008_dilations_0"), val = tensor([1])]; tensor var_18993 = transpose(perm = var_18992, x = var_18982_cast_fp16)[name = string("transpose_1")]; tensor var_19008 = conv(dilations = var_19008_dilations_0, groups = var_19008_groups_0, pad = var_19008_pad_0, pad_type = var_19008_pad_type_0, strides = var_19008_strides_0, weight = squeeze_35_palettized, x = var_18993)[name = string("op_19008")]; tensor var_19012 = const()[name = string("op_19012"), val = tensor([0, 2, 1])]; fp16 _inversed_19015_y_0_to_fp16 = const()[name = string("_inversed_19015_y_0_to_fp16"), val = fp16(0x1.11p-5)]; tensor logits_1 = transpose(perm = var_19012, x = var_19008)[name = string("transpose_0")]; tensor _inversed_19015_cast_fp16 = mul(x = logits_1, y = _inversed_19015_y_0_to_fp16)[name = string("_inversed_19015_cast_fp16")]; tensor var_19016_cast_fp16 = tanh(x = _inversed_19015_cast_fp16)[name = string("op_19016_cast_fp16")]; fp16 var_19017_to_fp16 = const()[name = string("op_19017_to_fp16"), val = fp16(0x1.ep+4)]; tensor logits_3_cast_fp16 = mul(x = var_19016_cast_fp16, y = var_19017_to_fp16)[name = string("logits_3_cast_fp16")]; tensor logits_axes_0 = const()[name = string("logits_axes_0"), val = tensor([0])]; tensor logits_cast_fp16 = squeeze(axes = logits_axes_0, x = logits_3_cast_fp16)[name = string("logits_cast_fp16")]; int32 var_19022 = const()[name = string("op_19022"), val = int32(-1)]; int32 token_id_axis_0 = const()[name = string("token_id_axis_0"), val = int32(-1)]; bool token_id_keep_dims_0 = const()[name = string("token_id_keep_dims_0"), val = bool(false)]; string token_id_output_dtype_0 = const()[name = string("token_id_output_dtype_0"), val = string("int32")]; tensor token_id = reduce_argmax(axis = token_id_axis_0, keep_dims = token_id_keep_dims_0, output_dtype = token_id_output_dtype_0, x = logits_cast_fp16)[name = string("token_id_cast_fp16")]; tensor var_19024_axes_0 = const()[name = string("op_19024_axes_0"), val = tensor([-1])]; tensor var_19024 = expand_dims(axes = var_19024_axes_0, x = token_id)[name = string("op_19024")]; bool var_19025_validate_indices_0 = const()[name = string("op_19025_validate_indices_0"), val = bool(false)]; tensor var_19025_cast_fp16 = gather_along_axis(axis = var_19022, indices = var_19024, validate_indices = var_19025_validate_indices_0, x = logits_cast_fp16)[name = string("op_19025_cast_fp16")]; tensor var_19026_axes_0 = const()[name = string("op_19026_axes_0"), val = tensor([-1])]; tensor token_logit = squeeze(axes = var_19026_axes_0, x = var_19025_cast_fp16)[name = string("op_19026_cast_fp16")]; } -> (token_id, token_logit); }