File size: 155,142 Bytes

3b03423

program(1.3)
[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}})]
{
    func main<ios18>(tensor<int32, [1]> cache_length, tensor<fp16, [1, 1024, 1, 1]> input_embeds, tensor<fp16, [1, 5120, 1, 16]> key_cache, tensor<fp16, [1, 16]> key_padding_mask, tensor<fp16, [1, 16]> kv_cache_update_mask, tensor<fp16, [1, 5120, 1, 16]> value_cache) {
            string cast_0_dtype_0 = const()[name = string("cast_0_dtype_0"), val = string("fp32")];
            string cast_1_dtype_0 = const()[name = string("cast_1_dtype_0"), val = string("fp32")];
            string cast_2_dtype_0 = const()[name = string("cast_2_dtype_0"), val = string("fp32")];
            string cast_3_dtype_0 = const()[name = string("cast_3_dtype_0"), val = string("fp32")];
            string cast_4_dtype_0 = const()[name = string("cast_4_dtype_0"), val = string("fp32")];
            tensor<fp32, [2048, 1024]> layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2097280))))[name = string("layers_0_self_attn_q_proj_weight_palettized")];
            tensor<fp32, [1024, 1024]> layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2098368))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3147008))))[name = string("layers_0_self_attn_k_proj_weight_palettized")];
            tensor<fp32, [1024, 1024]> layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3148096))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4196736))))[name = string("layers_0_self_attn_v_proj_weight_palettized")];
            tensor<fp32, [1024, 2048]> layers_0_self_attn_o_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4197824))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6295040))))[name = string("layers_0_self_attn_o_proj_weight_palettized")];
            tensor<fp32, [3072, 1024]> layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6296128))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9441920))))[name = string("layers_0_mlp_gate_proj_weight_palettized")];
            tensor<fp32, [3072, 1024]> layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9443008))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12588800))))[name = string("layers_0_mlp_up_proj_weight_palettized")];
            tensor<fp32, [1024, 3072]> layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12589888))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15735680))))[name = string("layers_0_mlp_down_proj_weight_palettized")];
            tensor<fp32, [2048, 1024]> layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15736768))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17833984))))[name = string("layers_1_self_attn_q_proj_weight_palettized")];
            tensor<fp32, [1024, 1024]> layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17835072))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18883712))))[name = string("layers_1_self_attn_k_proj_weight_palettized")];
            tensor<fp32, [1024, 1024]> layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18884800))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19933440))))[name = string("layers_1_self_attn_v_proj_weight_palettized")];
            tensor<fp32, [1024, 2048]> layers_1_self_attn_o_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19934528))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22031744))))[name = string("layers_1_self_attn_o_proj_weight_palettized")];
            tensor<fp32, [3072, 1024]> layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22032832))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25178624))))[name = string("layers_1_mlp_gate_proj_weight_palettized")];
            tensor<fp32, [3072, 1024]> layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25179712))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28325504))))[name = string("layers_1_mlp_up_proj_weight_palettized")];
            tensor<fp32, [1024, 3072]> layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28326592))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31472384))))[name = string("layers_1_mlp_down_proj_weight_palettized")];
            tensor<fp32, [2048, 1024]> layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31473472))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33570688))))[name = string("layers_2_self_attn_q_proj_weight_palettized")];
            tensor<fp32, [1024, 1024]> layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33571776))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34620416))))[name = string("layers_2_self_attn_k_proj_weight_palettized")];
            tensor<fp32, [1024, 1024]> layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34621504))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35670144))))[name = string("layers_2_self_attn_v_proj_weight_palettized")];
            tensor<fp32, [1024, 2048]> layers_2_self_attn_o_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35671232))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37768448))))[name = string("layers_2_self_attn_o_proj_weight_palettized")];
            tensor<fp32, [3072, 1024]> layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37769536))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40915328))))[name = string("layers_2_mlp_gate_proj_weight_palettized")];
            tensor<fp32, [3072, 1024]> layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40916416))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44062208))))[name = string("layers_2_mlp_up_proj_weight_palettized")];
            tensor<fp32, [1024, 3072]> layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44063296))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47209088))))[name = string("layers_2_mlp_down_proj_weight_palettized")];
            tensor<fp32, [2048, 1024]> layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47210176))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49307392))))[name = string("layers_3_self_attn_q_proj_weight_palettized")];
            tensor<fp32, [1024, 1024]> layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49308480))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50357120))))[name = string("layers_3_self_attn_k_proj_weight_palettized")];
            tensor<fp32, [1024, 1024]> layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50358208))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51406848))))[name = string("layers_3_self_attn_v_proj_weight_palettized")];
            tensor<fp32, [1024, 2048]> layers_3_self_attn_o_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51407936))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53505152))))[name = string("layers_3_self_attn_o_proj_weight_palettized")];
            tensor<fp32, [3072, 1024]> layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53506240))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56652032))))[name = string("layers_3_mlp_gate_proj_weight_palettized")];
            tensor<fp32, [3072, 1024]> layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56653120))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59798912))))[name = string("layers_3_mlp_up_proj_weight_palettized")];
            tensor<fp32, [1024, 3072]> layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59800000))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62945792))))[name = string("layers_3_mlp_down_proj_weight_palettized")];
            tensor<fp32, [2048, 1024]> layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62946880))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65044096))))[name = string("layers_4_self_attn_q_proj_weight_palettized")];
            tensor<fp32, [1024, 1024]> layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65045184))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66093824))))[name = string("layers_4_self_attn_k_proj_weight_palettized")];
            tensor<fp32, [1024, 1024]> layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66094912))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67143552))))[name = string("layers_4_self_attn_v_proj_weight_palettized")];
            tensor<fp32, [1024, 2048]> layers_4_self_attn_o_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67144640))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69241856))))[name = string("layers_4_self_attn_o_proj_weight_palettized")];
            tensor<fp32, [3072, 1024]> layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69242944))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72388736))))[name = string("layers_4_mlp_gate_proj_weight_palettized")];
            tensor<fp32, [3072, 1024]> layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72389824))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75535616))))[name = string("layers_4_mlp_up_proj_weight_palettized")];
            tensor<fp32, [1024, 3072]> layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75536704))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78682496))))[name = string("layers_4_mlp_down_proj_weight_palettized")];
            tensor<fp32, [2048, 1024]> lm_heads_0_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78683584))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80780800))))[name = string("lm_heads_0_weight_palettized")];
            tensor<fp32, [2048, 1024]> lm_heads_1_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80781888))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82879104))))[name = string("lm_heads_1_weight_palettized")];
            tensor<fp32, [2048, 1024]> lm_heads_2_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82880192))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84977408))))[name = string("lm_heads_2_weight_palettized")];
            tensor<fp32, [2048, 1024]> lm_heads_3_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84978496))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87075712))))[name = string("lm_heads_3_weight_palettized")];
            tensor<fp32, [2048, 1024]> lm_heads_4_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87076800))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89174016))))[name = string("lm_heads_4_weight_palettized")];
            tensor<fp32, [2048, 1024]> lm_heads_5_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89175104))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91272320))))[name = string("lm_heads_5_weight_palettized")];
            tensor<fp32, [2048, 1024]> lm_heads_6_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91273408))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93370624))))[name = string("lm_heads_6_weight_palettized")];
            tensor<fp32, [2048, 1024]> lm_heads_7_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93371712))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95468928))))[name = string("lm_heads_7_weight_palettized")];
            tensor<fp32, [2048, 1024]> lm_heads_8_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95470016))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97567232))))[name = string("lm_heads_8_weight_palettized")];
            tensor<fp32, [2048, 1024]> lm_heads_9_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97568320))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99665536))))[name = string("lm_heads_9_weight_palettized")];
            tensor<fp32, [2048, 1024]> lm_heads_10_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99666624))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101763840))))[name = string("lm_heads_10_weight_palettized")];
            tensor<fp32, [2048, 1024]> lm_heads_11_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101764928))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103862144))))[name = string("lm_heads_11_weight_palettized")];
            tensor<fp32, [2048, 1024]> lm_heads_12_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103863232))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105960448))))[name = string("lm_heads_12_weight_palettized")];
            tensor<fp32, [2048, 1024]> lm_heads_13_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105961536))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108058752))))[name = string("lm_heads_13_weight_palettized")];
            tensor<fp32, [2048, 1024]> lm_heads_14_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108059840))), lut = tensor<fp32, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110157056))))[name = string("lm_heads_14_weight_palettized")];
            tensor<int32, [1]> var_205_axes_0 = const()[name = string("op_205_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1, 1]> var_205 = expand_dims(axes = var_205_axes_0, x = cache_length)[name = string("op_205")];
            string position_ids_dtype_0 = const()[name = string("position_ids_dtype_0"), val = string("fp32")];
            tensor<fp32, [1, 64, 1]> const_0 = const()[name = string("const_0"), val = tensor<fp32, [1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110158144)))];
            tensor<int32, [1]> var_226_axes_0 = const()[name = string("op_226_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1]> position_ids = cast(dtype = position_ids_dtype_0, x = var_205)[name = string("cast_9")];
            tensor<fp32, [1, 1, 1]> var_226 = expand_dims(axes = var_226_axes_0, x = position_ids)[name = string("op_226")];
            bool var_227_transpose_x_0 = const()[name = string("op_227_transpose_x_0"), val = bool(false)];
            bool var_227_transpose_y_0 = const()[name = string("op_227_transpose_y_0"), val = bool(false)];
            tensor<fp32, [1, 64, 1]> var_227 = matmul(transpose_x = var_227_transpose_x_0, transpose_y = var_227_transpose_y_0, x = const_0, y = var_226)[name = string("op_227")];
            tensor<int32, [3]> freqs_perm_0 = const()[name = string("freqs_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
            int32 var_232 = const()[name = string("op_232"), val = int32(-1)];
            bool emb_interleave_0 = const()[name = string("emb_interleave_0"), val = bool(false)];
            tensor<fp32, [1, 1, 64]> freqs = transpose(perm = freqs_perm_0, x = var_227)[name = string("transpose_20")];
            tensor<fp32, [1, 1, 128]> emb = concat(axis = var_232, interleave = emb_interleave_0, values = (freqs, freqs))[name = string("emb")];
            tensor<fp32, [1, 1, 128]> var_234 = cos(x = emb)[name = string("op_234")];
            tensor<fp32, [1, 1, 128]> var_242 = sin(x = emb)[name = string("op_242")];
            tensor<int32, [4]> var_259_begin_0 = const()[name = string("op_259_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> var_259_end_0 = const()[name = string("op_259_end_0"), val = tensor<int32, [4]>([1, 1024, 1, 16])];
            tensor<bool, [4]> var_259_end_mask_0 = const()[name = string("op_259_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp32, [1, 5120, 1, 16]> cast_1 = cast(dtype = cast_1_dtype_0, x = key_cache)[name = string("cast_8")];
            tensor<fp32, [1, 1024, 1, 16]> var_259 = slice_by_index(begin = var_259_begin_0, end = var_259_end_0, end_mask = var_259_end_mask_0, x = cast_1)[name = string("op_259")];
            tensor<int32, [4]> var_279_begin_0 = const()[name = string("op_279_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> var_279_end_0 = const()[name = string("op_279_end_0"), val = tensor<int32, [4]>([1, 1024, 1, 16])];
            tensor<bool, [4]> var_279_end_mask_0 = const()[name = string("op_279_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp32, [1, 5120, 1, 16]> cast_4 = cast(dtype = cast_4_dtype_0, x = value_cache)[name = string("cast_7")];
            tensor<fp32, [1, 1024, 1, 16]> var_279 = slice_by_index(begin = var_279_begin_0, end = var_279_end_0, end_mask = var_279_end_mask_0, x = cast_4)[name = string("op_279")];
            tensor<int32, [1]> var_291_axes_0 = const()[name = string("op_291_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024, 1, 1]> cast_0 = cast(dtype = cast_0_dtype_0, x = input_embeds)[name = string("cast_6")];
            tensor<fp32, [1, 1024, 1]> var_291 = squeeze(axes = var_291_axes_0, x = cast_0)[name = string("op_291")];
            tensor<int32, [1]> var_293_axes_0 = const()[name = string("op_293_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024]> var_293 = squeeze(axes = var_293_axes_0, x = var_291)[name = string("op_293")];
            tensor<int32, [1]> hidden_states_1_axes_0 = const()[name = string("hidden_states_1_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1, 1024]> hidden_states_1 = expand_dims(axes = hidden_states_1_axes_0, x = var_293)[name = string("hidden_states_1")];
            fp32 var_299_promoted = const()[name = string("op_299_promoted"), val = fp32(0x1p+1)];
            tensor<fp32, [1, 1, 1024]> var_305 = pow(x = hidden_states_1, y = var_299_promoted)[name = string("op_305")];
            tensor<int32, [1]> variance_1_axes_0 = const()[name = string("variance_1_axes_0"), val = tensor<int32, [1]>([-1])];
            bool variance_1_keep_dims_0 = const()[name = string("variance_1_keep_dims_0"), val = bool(true)];
            tensor<fp32, [1, 1, 1]> variance_1 = reduce_mean(axes = variance_1_axes_0, keep_dims = variance_1_keep_dims_0, x = var_305)[name = string("variance_1")];
            fp32 var_308 = const()[name = string("op_308"), val = fp32(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 1]> var_309 = add(x = variance_1, y = var_308)[name = string("op_309")];
            fp32 var_310_epsilon_0 = const()[name = string("op_310_epsilon_0"), val = fp32(0x1.197998p-40)];
            tensor<fp32, [1, 1, 1]> var_310 = rsqrt(epsilon = var_310_epsilon_0, x = var_309)[name = string("op_310")];
            tensor<fp32, [1, 1, 1024]> hidden_states_5 = mul(x = hidden_states_1, y = var_310)[name = string("hidden_states_5")];
            tensor<fp32, [1024]> const_1 = const()[name = string("const_1"), val = tensor<fp32, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110158464)))];
            tensor<fp32, [1, 1, 1024]> input_1 = mul(x = const_1, y = hidden_states_5)[name = string("input_1")];
            tensor<fp32, [2048]> linear_0_bias_0 = const()[name = string("linear_0_bias_0"), val = tensor<fp32, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110162624)))];
            tensor<fp32, [1, 1, 2048]> var_316 = linear(bias = linear_0_bias_0, weight = layers_0_self_attn_q_proj_weight_palettized, x = input_1)[name = string("linear_0")];
            tensor<int32, [4]> var_321 = const()[name = string("op_321"), val = tensor<int32, [4]>([1, 1, 16, 128])];
            tensor<fp32, [1, 1, 16, 128]> hidden_states_7 = reshape(shape = var_321, x = var_316)[name = string("hidden_states_7")];
            tensor<fp32, [1024]> linear_1_bias_0 = const()[name = string("linear_1_bias_0"), val = tensor<fp32, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110170880)))];
            tensor<fp32, [1, 1, 1024]> var_325 = linear(bias = linear_1_bias_0, weight = layers_0_self_attn_k_proj_weight_palettized, x = input_1)[name = string("linear_1")];
            tensor<int32, [4]> var_330 = const()[name = string("op_330"), val = tensor<int32, [4]>([1, 1, 8, 128])];
            tensor<fp32, [1, 1, 8, 128]> hidden_states_13 = reshape(shape = var_330, x = var_325)[name = string("hidden_states_13")];
            tensor<fp32, [1, 1, 1024]> var_334 = linear(bias = linear_1_bias_0, weight = layers_0_self_attn_v_proj_weight_palettized, x = input_1)[name = string("linear_2")];
            tensor<int32, [4]> var_339 = const()[name = string("op_339"), val = tensor<int32, [4]>([1, 1, 8, 128])];
            tensor<fp32, [1, 1, 8, 128]> v_1 = reshape(shape = var_339, x = var_334)[name = string("v_1")];
            fp32 var_344_promoted = const()[name = string("op_344_promoted"), val = fp32(0x1p+1)];
            tensor<fp32, [1, 1, 16, 128]> var_350 = pow(x = hidden_states_7, y = var_344_promoted)[name = string("op_350")];
            tensor<int32, [1]> variance_3_axes_0 = const()[name = string("variance_3_axes_0"), val = tensor<int32, [1]>([-1])];
            bool variance_3_keep_dims_0 = const()[name = string("variance_3_keep_dims_0"), val = bool(true)];
            tensor<fp32, [1, 1, 16, 1]> variance_3 = reduce_mean(axes = variance_3_axes_0, keep_dims = variance_3_keep_dims_0, x = var_350)[name = string("variance_3")];
            fp32 var_353 = const()[name = string("op_353"), val = fp32(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 16, 1]> var_354 = add(x = variance_3, y = var_353)[name = string("op_354")];
            fp32 var_355_epsilon_0 = const()[name = string("op_355_epsilon_0"), val = fp32(0x1.197998p-40)];
            tensor<fp32, [1, 1, 16, 1]> var_355 = rsqrt(epsilon = var_355_epsilon_0, x = var_354)[name = string("op_355")];
            tensor<fp32, [1, 1, 16, 128]> hidden_states_11 = mul(x = hidden_states_7, y = var_355)[name = string("hidden_states_11")];
            tensor<fp32, [128]> const_2 = const()[name = string("const_2"), val = tensor<fp32, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110175040)))];
            tensor<fp32, [1, 1, 16, 128]> q_1 = mul(x = const_2, y = hidden_states_11)[name = string("q_1")];
            fp32 var_362_promoted = const()[name = string("op_362_promoted"), val = fp32(0x1p+1)];
            tensor<fp32, [1, 1, 8, 128]> var_368 = pow(x = hidden_states_13, y = var_362_promoted)[name = string("op_368")];
            tensor<int32, [1]> variance_5_axes_0 = const()[name = string("variance_5_axes_0"), val = tensor<int32, [1]>([-1])];
            bool variance_5_keep_dims_0 = const()[name = string("variance_5_keep_dims_0"), val = bool(true)];
            tensor<fp32, [1, 1, 8, 1]> variance_5 = reduce_mean(axes = variance_5_axes_0, keep_dims = variance_5_keep_dims_0, x = var_368)[name = string("variance_5")];
            fp32 var_371 = const()[name = string("op_371"), val = fp32(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 8, 1]> var_372 = add(x = variance_5, y = var_371)[name = string("op_372")];
            fp32 var_373_epsilon_0 = const()[name = string("op_373_epsilon_0"), val = fp32(0x1.197998p-40)];
            tensor<fp32, [1, 1, 8, 1]> var_373 = rsqrt(epsilon = var_373_epsilon_0, x = var_372)[name = string("op_373")];
            tensor<fp32, [1, 1, 8, 128]> hidden_states_17 = mul(x = hidden_states_13, y = var_373)[name = string("hidden_states_17")];
            tensor<fp32, [128]> const_3 = const()[name = string("const_3"), val = tensor<fp32, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110175616)))];
            tensor<fp32, [1, 1, 8, 128]> k_1 = mul(x = const_3, y = hidden_states_17)[name = string("k_1")];
            tensor<int32, [4]> q_3_perm_0 = const()[name = string("q_3_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [4]> k_3_perm_0 = const()[name = string("k_3_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [4]> v_3_perm_0 = const()[name = string("v_3_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [1]> cos_r_1_axes_0 = const()[name = string("cos_r_1_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 1, 1, 128]> cos_r_1 = expand_dims(axes = cos_r_1_axes_0, x = var_234)[name = string("cos_r_1")];
            tensor<int32, [1]> sin_r_1_axes_0 = const()[name = string("sin_r_1_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 1, 1, 128]> sin_r_1 = expand_dims(axes = sin_r_1_axes_0, x = var_242)[name = string("sin_r_1")];
            tensor<fp32, [1, 16, 1, 128]> q_3 = transpose(perm = q_3_perm_0, x = q_1)[name = string("transpose_19")];
            tensor<fp32, [1, 16, 1, 128]> var_390 = mul(x = q_3, y = cos_r_1)[name = string("op_390")];
            tensor<int32, [4]> x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor<int32, [4]>([1, 16, 1, 64])];
            tensor<bool, [4]> x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp32, [1, 16, 1, 64]> x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = q_3)[name = string("x1_1")];
            tensor<int32, [4]> x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor<int32, [4]>([1, 16, 1, 128])];
            tensor<bool, [4]> x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp32, [1, 16, 1, 64]> x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = q_3)[name = string("x2_1")];
            fp32 const_6_promoted = const()[name = string("const_6_promoted"), val = fp32(-0x1p+0)];
            tensor<fp32, [1, 16, 1, 64]> var_411 = mul(x = x2_1, y = const_6_promoted)[name = string("op_411")];
            int32 var_413 = const()[name = string("op_413"), val = int32(-1)];
            bool var_414_interleave_0 = const()[name = string("op_414_interleave_0"), val = bool(false)];
            tensor<fp32, [1, 16, 1, 128]> var_414 = concat(axis = var_413, interleave = var_414_interleave_0, values = (var_411, x1_1))[name = string("op_414")];
            tensor<fp32, [1, 16, 1, 128]> var_415 = mul(x = var_414, y = sin_r_1)[name = string("op_415")];
            tensor<fp32, [1, 16, 1, 128]> q_5 = add(x = var_390, y = var_415)[name = string("q_5")];
            tensor<fp32, [1, 8, 1, 128]> k_3 = transpose(perm = k_3_perm_0, x = k_1)[name = string("transpose_18")];
            tensor<fp32, [1, 8, 1, 128]> var_418 = mul(x = k_3, y = cos_r_1)[name = string("op_418")];
            tensor<int32, [4]> x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor<int32, [4]>([1, 8, 1, 64])];
            tensor<bool, [4]> x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp32, [1, 8, 1, 64]> x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = k_3)[name = string("x1_3")];
            tensor<int32, [4]> x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<bool, [4]> x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp32, [1, 8, 1, 64]> x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = k_3)[name = string("x2_3")];
            fp32 const_9_promoted = const()[name = string("const_9_promoted"), val = fp32(-0x1p+0)];
            tensor<fp32, [1, 8, 1, 64]> var_439 = mul(x = x2_3, y = const_9_promoted)[name = string("op_439")];
            int32 var_441 = const()[name = string("op_441"), val = int32(-1)];
            bool var_442_interleave_0 = const()[name = string("op_442_interleave_0"), val = bool(false)];
            tensor<fp32, [1, 8, 1, 128]> var_442 = concat(axis = var_441, interleave = var_442_interleave_0, values = (var_439, x1_3))[name = string("op_442")];
            tensor<fp32, [1, 8, 1, 128]> var_443 = mul(x = var_442, y = sin_r_1)[name = string("op_443")];
            tensor<fp32, [1, 8, 1, 128]> k_5 = add(x = var_418, y = var_443)[name = string("k_5")];
            tensor<int32, [4]> var_450 = const()[name = string("op_450"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
            tensor<fp32, [1, 1024, 1, 1]> nk_flat_1 = reshape(shape = var_450, x = k_5)[name = string("nk_flat_1")];
            tensor<int32, [4]> var_456 = const()[name = string("op_456"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
            tensor<fp32, [1, 8, 1, 128]> v_3 = transpose(perm = v_3_perm_0, x = v_1)[name = string("transpose_17")];
            tensor<fp32, [1, 1024, 1, 1]> nv_flat_1 = reshape(shape = var_456, x = v_3)[name = string("nv_flat_1")];
            tensor<int32, [1]> var_459_axes_0 = const()[name = string("op_459_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 16]> cast_3 = cast(dtype = cast_3_dtype_0, x = kv_cache_update_mask)[name = string("cast_5")];
            tensor<fp32, [1, 1, 16]> var_459 = expand_dims(axes = var_459_axes_0, x = cast_3)[name = string("op_459")];
            tensor<int32, [1]> update_mask_1_axes_0 = const()[name = string("update_mask_1_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 1, 1, 16]> update_mask_1 = expand_dims(axes = update_mask_1_axes_0, x = var_459)[name = string("update_mask_1")];
            fp32 var_462 = const()[name = string("op_462"), val = fp32(0x1p+0)];
            tensor<fp32, [1, 1, 1, 16]> var_464 = sub(x = var_462, y = update_mask_1)[name = string("op_464")];
            tensor<fp32, [1, 1024, 1, 16]> var_465 = mul(x = var_259, y = var_464)[name = string("op_465")];
            tensor<fp32, [1, 1024, 1, 16]> var_466 = mul(x = nk_flat_1, y = update_mask_1)[name = string("op_466")];
            tensor<fp32, [1, 1024, 1, 16]> key_cache_5 = add(x = var_465, y = var_466)[name = string("key_cache_5")];
            tensor<fp32, [1, 1024, 1, 16]> var_472 = mul(x = var_279, y = var_464)[name = string("op_472")];
            tensor<fp32, [1, 1024, 1, 16]> var_473 = mul(x = nv_flat_1, y = update_mask_1)[name = string("op_473")];
            tensor<fp32, [1, 1024, 1, 16]> value_cache_5 = add(x = var_472, y = var_473)[name = string("value_cache_5")];
            tensor<int32, [1]> var_477_axes_0 = const()[name = string("op_477_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 1024, 16]> var_477 = squeeze(axes = var_477_axes_0, x = key_cache_5)[name = string("op_477")];
            tensor<int32, [4]> var_482 = const()[name = string("op_482"), val = tensor<int32, [4]>([1, 8, 128, 16])];
            tensor<fp32, [1, 8, 128, 16]> kc_1 = reshape(shape = var_482, x = var_477)[name = string("kc_1")];
            tensor<int32, [1]> var_485_axes_0 = const()[name = string("op_485_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 1024, 16]> var_485 = squeeze(axes = var_485_axes_0, x = value_cache_5)[name = string("op_485")];
            tensor<int32, [4]> var_490 = const()[name = string("op_490"), val = tensor<int32, [4]>([1, 8, 128, 16])];
            tensor<fp32, [1, 8, 128, 16]> vc_1 = reshape(shape = var_490, x = var_485)[name = string("vc_1")];
            tensor<int32, [1]> var_493_axes_0 = const()[name = string("op_493_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 8, 1, 128, 16]> var_493 = expand_dims(axes = var_493_axes_0, x = kc_1)[name = string("op_493")];
            tensor<int32, [5]> var_501_reps_0 = const()[name = string("op_501_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
            tensor<fp32, [1, 8, 2, 128, 16]> var_501 = tile(reps = var_501_reps_0, x = var_493)[name = string("op_501")];
            tensor<int32, [4]> var_506 = const()[name = string("op_506"), val = tensor<int32, [4]>([1, 16, 128, 16])];
            tensor<fp32, [1, 16, 128, 16]> kc_3 = reshape(shape = var_506, x = var_501)[name = string("kc_3")];
            tensor<int32, [1]> var_509_axes_0 = const()[name = string("op_509_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 8, 1, 128, 16]> var_509 = expand_dims(axes = var_509_axes_0, x = vc_1)[name = string("op_509")];
            tensor<int32, [5]> var_517_reps_0 = const()[name = string("op_517_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
            tensor<fp32, [1, 8, 2, 128, 16]> var_517 = tile(reps = var_517_reps_0, x = var_509)[name = string("op_517")];
            tensor<int32, [4]> var_522 = const()[name = string("op_522"), val = tensor<int32, [4]>([1, 16, 128, 16])];
            tensor<fp32, [1, 16, 128, 16]> vc_3 = reshape(shape = var_522, x = var_517)[name = string("vc_3")];
            bool var_524_transpose_x_0 = const()[name = string("op_524_transpose_x_0"), val = bool(false)];
            bool var_524_transpose_y_0 = const()[name = string("op_524_transpose_y_0"), val = bool(false)];
            tensor<fp32, [1, 16, 1, 16]> var_524 = matmul(transpose_x = var_524_transpose_x_0, transpose_y = var_524_transpose_y_0, x = q_5, y = kc_3)[name = string("op_524")];
            fp32 _inversed_attn_weights_1_y_0 = const()[name = string("_inversed_attn_weights_1_y_0"), val = fp32(0x1.6a09e6p-4)];
            tensor<fp32, [1, 16, 1, 16]> _inversed_attn_weights_1 = mul(x = var_524, y = _inversed_attn_weights_1_y_0)[name = string("_inversed_attn_weights_1")];
            tensor<int32, [1]> var_528_axes_0 = const()[name = string("op_528_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 16]> cast_2 = cast(dtype = cast_2_dtype_0, x = key_padding_mask)[name = string("cast_4")];
            tensor<fp32, [1, 1, 16]> var_528 = expand_dims(axes = var_528_axes_0, x = cast_2)[name = string("op_528")];
            tensor<int32, [1]> mask_1_axes_0 = const()[name = string("mask_1_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 1, 1, 16]> mask_1 = expand_dims(axes = mask_1_axes_0, x = var_528)[name = string("mask_1")];
            tensor<fp32, [1, 16, 1, 16]> attn_weights_3 = add(x = _inversed_attn_weights_1, y = mask_1)[name = string("attn_weights_3")];
            int32 var_538 = const()[name = string("op_538"), val = int32(-1)];
            tensor<fp32, [1, 16, 1, 16]> attn_weights_7 = softmax(axis = var_538, x = attn_weights_3)[name = string("attn_weights_7")];
            bool attn_output_1_transpose_x_1 = const()[name = string("attn_output_1_transpose_x_1"), val = bool(false)];
            bool attn_output_1_transpose_y_1 = const()[name = string("attn_output_1_transpose_y_1"), val = bool(true)];
            tensor<fp32, [1, 16, 1, 128]> attn_output_1 = matmul(transpose_x = attn_output_1_transpose_x_1, transpose_y = attn_output_1_transpose_y_1, x = attn_weights_7, y = vc_3)[name = string("attn_output_1")];
            tensor<int32, [4]> var_547_perm_0 = const()[name = string("op_547_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_551 = const()[name = string("op_551"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp32, [1, 1, 16, 128]> var_547 = transpose(perm = var_547_perm_0, x = attn_output_1)[name = string("transpose_16")];
            tensor<fp32, [1, 1, 2048]> input_3 = reshape(shape = var_551, x = var_547)[name = string("input_3")];
            tensor<fp32, [1, 1, 1024]> attn_output_3 = linear(bias = linear_1_bias_0, weight = layers_0_self_attn_o_proj_weight_palettized, x = input_3)[name = string("linear_3")];
            tensor<int32, [1]> var_557_axes_0 = const()[name = string("op_557_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1024]> var_557 = squeeze(axes = var_557_axes_0, x = attn_output_3)[name = string("op_557")];
            tensor<int32, [1]> var_559_axes_0 = const()[name = string("op_559_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1024]> var_559 = squeeze(axes = var_559_axes_0, x = var_557)[name = string("op_559")];
            tensor<int32, [1]> var_561_axes_0 = const()[name = string("op_561_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1]> var_561 = expand_dims(axes = var_561_axes_0, x = var_559)[name = string("op_561")];
            tensor<int32, [1]> attn_4d_1_axes_0 = const()[name = string("attn_4d_1_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1, 1]> attn_4d_1 = expand_dims(axes = attn_4d_1_axes_0, x = var_561)[name = string("attn_4d_1")];
            tensor<fp32, [1, 1024, 1, 1]> hidden_1 = add(x = cast_0, y = attn_4d_1)[name = string("hidden_1")];
            tensor<int32, [1]> var_567_axes_0 = const()[name = string("op_567_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024, 1]> var_567 = squeeze(axes = var_567_axes_0, x = hidden_1)[name = string("op_567")];
            tensor<int32, [1]> var_569_axes_0 = const()[name = string("op_569_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024]> var_569 = squeeze(axes = var_569_axes_0, x = var_567)[name = string("op_569")];
            tensor<int32, [1]> hidden_states_19_axes_0 = const()[name = string("hidden_states_19_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1, 1024]> hidden_states_19 = expand_dims(axes = hidden_states_19_axes_0, x = var_569)[name = string("hidden_states_19")];
            fp32 var_575_promoted = const()[name = string("op_575_promoted"), val = fp32(0x1p+1)];
            tensor<fp32, [1, 1, 1024]> var_581 = pow(x = hidden_states_19, y = var_575_promoted)[name = string("op_581")];
            tensor<int32, [1]> variance_7_axes_0 = const()[name = string("variance_7_axes_0"), val = tensor<int32, [1]>([-1])];
            bool variance_7_keep_dims_0 = const()[name = string("variance_7_keep_dims_0"), val = bool(true)];
            tensor<fp32, [1, 1, 1]> variance_7 = reduce_mean(axes = variance_7_axes_0, keep_dims = variance_7_keep_dims_0, x = var_581)[name = string("variance_7")];
            fp32 var_584 = const()[name = string("op_584"), val = fp32(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 1]> var_585 = add(x = variance_7, y = var_584)[name = string("op_585")];
            fp32 var_586_epsilon_0 = const()[name = string("op_586_epsilon_0"), val = fp32(0x1.197998p-40)];
            tensor<fp32, [1, 1, 1]> var_586 = rsqrt(epsilon = var_586_epsilon_0, x = var_585)[name = string("op_586")];
            tensor<fp32, [1, 1, 1024]> hidden_states_23 = mul(x = hidden_states_19, y = var_586)[name = string("hidden_states_23")];
            tensor<fp32, [1024]> const_10 = const()[name = string("const_10"), val = tensor<fp32, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110176192)))];
            tensor<fp32, [1, 1, 1024]> input_5 = mul(x = const_10, y = hidden_states_23)[name = string("input_5")];
            tensor<fp32, [3072]> linear_4_bias_0 = const()[name = string("linear_4_bias_0"), val = tensor<fp32, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110180352)))];
            tensor<fp32, [1, 1, 3072]> input_7 = linear(bias = linear_4_bias_0, weight = layers_0_mlp_gate_proj_weight_palettized, x = input_5)[name = string("linear_4")];
            tensor<fp32, [1, 1, 3072]> var_596 = silu(x = input_7)[name = string("op_596")];
            tensor<fp32, [1, 1, 3072]> var_598 = linear(bias = linear_4_bias_0, weight = layers_0_mlp_up_proj_weight_palettized, x = input_5)[name = string("linear_5")];
            tensor<fp32, [1, 1, 3072]> input_9 = mul(x = var_596, y = var_598)[name = string("input_9")];
            tensor<fp32, [1, 1, 1024]> mlp_out_1 = linear(bias = linear_1_bias_0, weight = layers_0_mlp_down_proj_weight_palettized, x = input_9)[name = string("linear_6")];
            tensor<int32, [1]> var_603_axes_0 = const()[name = string("op_603_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1024]> var_603 = squeeze(axes = var_603_axes_0, x = mlp_out_1)[name = string("op_603")];
            tensor<int32, [1]> var_605_axes_0 = const()[name = string("op_605_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1024]> var_605 = squeeze(axes = var_605_axes_0, x = var_603)[name = string("op_605")];
            tensor<int32, [1]> var_607_axes_0 = const()[name = string("op_607_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1]> var_607 = expand_dims(axes = var_607_axes_0, x = var_605)[name = string("op_607")];
            tensor<int32, [1]> mlp_4d_1_axes_0 = const()[name = string("mlp_4d_1_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1, 1]> mlp_4d_1 = expand_dims(axes = mlp_4d_1_axes_0, x = var_607)[name = string("mlp_4d_1")];
            tensor<fp32, [1, 1024, 1, 1]> hidden_3 = add(x = hidden_1, y = mlp_4d_1)[name = string("hidden_3")];
            tensor<int32, [4]> var_621_begin_0 = const()[name = string("op_621_begin_0"), val = tensor<int32, [4]>([0, 1024, 0, 0])];
            tensor<int32, [4]> var_621_end_0 = const()[name = string("op_621_end_0"), val = tensor<int32, [4]>([1, 2048, 1, 16])];
            tensor<bool, [4]> var_621_end_mask_0 = const()[name = string("op_621_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp32, [1, 1024, 1, 16]> var_621 = slice_by_index(begin = var_621_begin_0, end = var_621_end_0, end_mask = var_621_end_mask_0, x = cast_1)[name = string("op_621")];
            tensor<int32, [4]> var_641_begin_0 = const()[name = string("op_641_begin_0"), val = tensor<int32, [4]>([0, 1024, 0, 0])];
            tensor<int32, [4]> var_641_end_0 = const()[name = string("op_641_end_0"), val = tensor<int32, [4]>([1, 2048, 1, 16])];
            tensor<bool, [4]> var_641_end_mask_0 = const()[name = string("op_641_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp32, [1, 1024, 1, 16]> var_641 = slice_by_index(begin = var_641_begin_0, end = var_641_end_0, end_mask = var_641_end_mask_0, x = cast_4)[name = string("op_641")];
            tensor<int32, [1]> var_653_axes_0 = const()[name = string("op_653_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024, 1]> var_653 = squeeze(axes = var_653_axes_0, x = hidden_3)[name = string("op_653")];
            tensor<int32, [1]> var_655_axes_0 = const()[name = string("op_655_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024]> var_655 = squeeze(axes = var_655_axes_0, x = var_653)[name = string("op_655")];
            tensor<int32, [1]> hidden_states_25_axes_0 = const()[name = string("hidden_states_25_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1, 1024]> hidden_states_25 = expand_dims(axes = hidden_states_25_axes_0, x = var_655)[name = string("hidden_states_25")];
            fp32 var_661_promoted = const()[name = string("op_661_promoted"), val = fp32(0x1p+1)];
            tensor<fp32, [1, 1, 1024]> var_667 = pow(x = hidden_states_25, y = var_661_promoted)[name = string("op_667")];
            tensor<int32, [1]> variance_9_axes_0 = const()[name = string("variance_9_axes_0"), val = tensor<int32, [1]>([-1])];
            bool variance_9_keep_dims_0 = const()[name = string("variance_9_keep_dims_0"), val = bool(true)];
            tensor<fp32, [1, 1, 1]> variance_9 = reduce_mean(axes = variance_9_axes_0, keep_dims = variance_9_keep_dims_0, x = var_667)[name = string("variance_9")];
            fp32 var_670 = const()[name = string("op_670"), val = fp32(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 1]> var_671 = add(x = variance_9, y = var_670)[name = string("op_671")];
            fp32 var_672_epsilon_0 = const()[name = string("op_672_epsilon_0"), val = fp32(0x1.197998p-40)];
            tensor<fp32, [1, 1, 1]> var_672 = rsqrt(epsilon = var_672_epsilon_0, x = var_671)[name = string("op_672")];
            tensor<fp32, [1, 1, 1024]> hidden_states_29 = mul(x = hidden_states_25, y = var_672)[name = string("hidden_states_29")];
            tensor<fp32, [1024]> const_11 = const()[name = string("const_11"), val = tensor<fp32, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110192704)))];
            tensor<fp32, [1, 1, 1024]> input_11 = mul(x = const_11, y = hidden_states_29)[name = string("input_11")];
            tensor<fp32, [1, 1, 2048]> var_678 = linear(bias = linear_0_bias_0, weight = layers_1_self_attn_q_proj_weight_palettized, x = input_11)[name = string("linear_7")];
            tensor<int32, [4]> var_683 = const()[name = string("op_683"), val = tensor<int32, [4]>([1, 1, 16, 128])];
            tensor<fp32, [1, 1, 16, 128]> hidden_states_31 = reshape(shape = var_683, x = var_678)[name = string("hidden_states_31")];
            tensor<fp32, [1, 1, 1024]> var_687 = linear(bias = linear_1_bias_0, weight = layers_1_self_attn_k_proj_weight_palettized, x = input_11)[name = string("linear_8")];
            tensor<int32, [4]> var_692 = const()[name = string("op_692"), val = tensor<int32, [4]>([1, 1, 8, 128])];
            tensor<fp32, [1, 1, 8, 128]> hidden_states_37 = reshape(shape = var_692, x = var_687)[name = string("hidden_states_37")];
            tensor<fp32, [1, 1, 1024]> var_696 = linear(bias = linear_1_bias_0, weight = layers_1_self_attn_v_proj_weight_palettized, x = input_11)[name = string("linear_9")];
            tensor<int32, [4]> var_701 = const()[name = string("op_701"), val = tensor<int32, [4]>([1, 1, 8, 128])];
            tensor<fp32, [1, 1, 8, 128]> v_5 = reshape(shape = var_701, x = var_696)[name = string("v_5")];
            fp32 var_706_promoted = const()[name = string("op_706_promoted"), val = fp32(0x1p+1)];
            tensor<fp32, [1, 1, 16, 128]> var_712 = pow(x = hidden_states_31, y = var_706_promoted)[name = string("op_712")];
            tensor<int32, [1]> variance_11_axes_0 = const()[name = string("variance_11_axes_0"), val = tensor<int32, [1]>([-1])];
            bool variance_11_keep_dims_0 = const()[name = string("variance_11_keep_dims_0"), val = bool(true)];
            tensor<fp32, [1, 1, 16, 1]> variance_11 = reduce_mean(axes = variance_11_axes_0, keep_dims = variance_11_keep_dims_0, x = var_712)[name = string("variance_11")];
            fp32 var_715 = const()[name = string("op_715"), val = fp32(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 16, 1]> var_716 = add(x = variance_11, y = var_715)[name = string("op_716")];
            fp32 var_717_epsilon_0 = const()[name = string("op_717_epsilon_0"), val = fp32(0x1.197998p-40)];
            tensor<fp32, [1, 1, 16, 1]> var_717 = rsqrt(epsilon = var_717_epsilon_0, x = var_716)[name = string("op_717")];
            tensor<fp32, [1, 1, 16, 128]> hidden_states_35 = mul(x = hidden_states_31, y = var_717)[name = string("hidden_states_35")];
            tensor<fp32, [128]> const_12 = const()[name = string("const_12"), val = tensor<fp32, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110196864)))];
            tensor<fp32, [1, 1, 16, 128]> q_7 = mul(x = const_12, y = hidden_states_35)[name = string("q_7")];
            fp32 var_724_promoted = const()[name = string("op_724_promoted"), val = fp32(0x1p+1)];
            tensor<fp32, [1, 1, 8, 128]> var_730 = pow(x = hidden_states_37, y = var_724_promoted)[name = string("op_730")];
            tensor<int32, [1]> variance_13_axes_0 = const()[name = string("variance_13_axes_0"), val = tensor<int32, [1]>([-1])];
            bool variance_13_keep_dims_0 = const()[name = string("variance_13_keep_dims_0"), val = bool(true)];
            tensor<fp32, [1, 1, 8, 1]> variance_13 = reduce_mean(axes = variance_13_axes_0, keep_dims = variance_13_keep_dims_0, x = var_730)[name = string("variance_13")];
            fp32 var_733 = const()[name = string("op_733"), val = fp32(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 8, 1]> var_734 = add(x = variance_13, y = var_733)[name = string("op_734")];
            fp32 var_735_epsilon_0 = const()[name = string("op_735_epsilon_0"), val = fp32(0x1.197998p-40)];
            tensor<fp32, [1, 1, 8, 1]> var_735 = rsqrt(epsilon = var_735_epsilon_0, x = var_734)[name = string("op_735")];
            tensor<fp32, [1, 1, 8, 128]> hidden_states_41 = mul(x = hidden_states_37, y = var_735)[name = string("hidden_states_41")];
            tensor<fp32, [128]> const_13 = const()[name = string("const_13"), val = tensor<fp32, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110197440)))];
            tensor<fp32, [1, 1, 8, 128]> k_7 = mul(x = const_13, y = hidden_states_41)[name = string("k_7")];
            tensor<int32, [4]> q_9_perm_0 = const()[name = string("q_9_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [4]> k_9_perm_0 = const()[name = string("k_9_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [4]> v_7_perm_0 = const()[name = string("v_7_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<fp32, [1, 16, 1, 128]> q_9 = transpose(perm = q_9_perm_0, x = q_7)[name = string("transpose_15")];
            tensor<fp32, [1, 16, 1, 128]> var_752 = mul(x = q_9, y = cos_r_1)[name = string("op_752")];
            tensor<int32, [4]> x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor<int32, [4]>([1, 16, 1, 64])];
            tensor<bool, [4]> x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp32, [1, 16, 1, 64]> x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = q_9)[name = string("x1_5")];
            tensor<int32, [4]> x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor<int32, [4]>([1, 16, 1, 128])];
            tensor<bool, [4]> x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp32, [1, 16, 1, 64]> x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = q_9)[name = string("x2_5")];
            fp32 const_16_promoted = const()[name = string("const_16_promoted"), val = fp32(-0x1p+0)];
            tensor<fp32, [1, 16, 1, 64]> var_773 = mul(x = x2_5, y = const_16_promoted)[name = string("op_773")];
            int32 var_775 = const()[name = string("op_775"), val = int32(-1)];
            bool var_776_interleave_0 = const()[name = string("op_776_interleave_0"), val = bool(false)];
            tensor<fp32, [1, 16, 1, 128]> var_776 = concat(axis = var_775, interleave = var_776_interleave_0, values = (var_773, x1_5))[name = string("op_776")];
            tensor<fp32, [1, 16, 1, 128]> var_777 = mul(x = var_776, y = sin_r_1)[name = string("op_777")];
            tensor<fp32, [1, 16, 1, 128]> q_11 = add(x = var_752, y = var_777)[name = string("q_11")];
            tensor<fp32, [1, 8, 1, 128]> k_9 = transpose(perm = k_9_perm_0, x = k_7)[name = string("transpose_14")];
            tensor<fp32, [1, 8, 1, 128]> var_780 = mul(x = k_9, y = cos_r_1)[name = string("op_780")];
            tensor<int32, [4]> x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor<int32, [4]>([1, 8, 1, 64])];
            tensor<bool, [4]> x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp32, [1, 8, 1, 64]> x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = k_9)[name = string("x1_7")];
            tensor<int32, [4]> x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<bool, [4]> x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp32, [1, 8, 1, 64]> x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = k_9)[name = string("x2_7")];
            fp32 const_19_promoted = const()[name = string("const_19_promoted"), val = fp32(-0x1p+0)];
            tensor<fp32, [1, 8, 1, 64]> var_801 = mul(x = x2_7, y = const_19_promoted)[name = string("op_801")];
            int32 var_803 = const()[name = string("op_803"), val = int32(-1)];
            bool var_804_interleave_0 = const()[name = string("op_804_interleave_0"), val = bool(false)];
            tensor<fp32, [1, 8, 1, 128]> var_804 = concat(axis = var_803, interleave = var_804_interleave_0, values = (var_801, x1_7))[name = string("op_804")];
            tensor<fp32, [1, 8, 1, 128]> var_805 = mul(x = var_804, y = sin_r_1)[name = string("op_805")];
            tensor<fp32, [1, 8, 1, 128]> k_11 = add(x = var_780, y = var_805)[name = string("k_11")];
            tensor<int32, [4]> var_812 = const()[name = string("op_812"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
            tensor<fp32, [1, 1024, 1, 1]> nk_flat_3 = reshape(shape = var_812, x = k_11)[name = string("nk_flat_3")];
            tensor<int32, [4]> var_818 = const()[name = string("op_818"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
            tensor<fp32, [1, 8, 1, 128]> v_7 = transpose(perm = v_7_perm_0, x = v_5)[name = string("transpose_13")];
            tensor<fp32, [1, 1024, 1, 1]> nv_flat_3 = reshape(shape = var_818, x = v_7)[name = string("nv_flat_3")];
            tensor<fp32, [1, 1024, 1, 16]> var_827 = mul(x = var_621, y = var_464)[name = string("op_827")];
            tensor<fp32, [1, 1024, 1, 16]> var_828 = mul(x = nk_flat_3, y = update_mask_1)[name = string("op_828")];
            tensor<fp32, [1, 1024, 1, 16]> key_cache_9 = add(x = var_827, y = var_828)[name = string("key_cache_9")];
            tensor<fp32, [1, 1024, 1, 16]> var_834 = mul(x = var_641, y = var_464)[name = string("op_834")];
            tensor<fp32, [1, 1024, 1, 16]> var_835 = mul(x = nv_flat_3, y = update_mask_1)[name = string("op_835")];
            tensor<fp32, [1, 1024, 1, 16]> value_cache_9 = add(x = var_834, y = var_835)[name = string("value_cache_9")];
            tensor<int32, [1]> var_839_axes_0 = const()[name = string("op_839_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 1024, 16]> var_839 = squeeze(axes = var_839_axes_0, x = key_cache_9)[name = string("op_839")];
            tensor<int32, [4]> var_844 = const()[name = string("op_844"), val = tensor<int32, [4]>([1, 8, 128, 16])];
            tensor<fp32, [1, 8, 128, 16]> kc_5 = reshape(shape = var_844, x = var_839)[name = string("kc_5")];
            tensor<int32, [1]> var_847_axes_0 = const()[name = string("op_847_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 1024, 16]> var_847 = squeeze(axes = var_847_axes_0, x = value_cache_9)[name = string("op_847")];
            tensor<int32, [4]> var_852 = const()[name = string("op_852"), val = tensor<int32, [4]>([1, 8, 128, 16])];
            tensor<fp32, [1, 8, 128, 16]> vc_5 = reshape(shape = var_852, x = var_847)[name = string("vc_5")];
            tensor<int32, [1]> var_855_axes_0 = const()[name = string("op_855_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 8, 1, 128, 16]> var_855 = expand_dims(axes = var_855_axes_0, x = kc_5)[name = string("op_855")];
            tensor<int32, [5]> var_863_reps_0 = const()[name = string("op_863_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
            tensor<fp32, [1, 8, 2, 128, 16]> var_863 = tile(reps = var_863_reps_0, x = var_855)[name = string("op_863")];
            tensor<int32, [4]> var_868 = const()[name = string("op_868"), val = tensor<int32, [4]>([1, 16, 128, 16])];
            tensor<fp32, [1, 16, 128, 16]> kc_7 = reshape(shape = var_868, x = var_863)[name = string("kc_7")];
            tensor<int32, [1]> var_871_axes_0 = const()[name = string("op_871_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 8, 1, 128, 16]> var_871 = expand_dims(axes = var_871_axes_0, x = vc_5)[name = string("op_871")];
            tensor<int32, [5]> var_879_reps_0 = const()[name = string("op_879_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
            tensor<fp32, [1, 8, 2, 128, 16]> var_879 = tile(reps = var_879_reps_0, x = var_871)[name = string("op_879")];
            tensor<int32, [4]> var_884 = const()[name = string("op_884"), val = tensor<int32, [4]>([1, 16, 128, 16])];
            tensor<fp32, [1, 16, 128, 16]> vc_7 = reshape(shape = var_884, x = var_879)[name = string("vc_7")];
            bool var_886_transpose_x_0 = const()[name = string("op_886_transpose_x_0"), val = bool(false)];
            bool var_886_transpose_y_0 = const()[name = string("op_886_transpose_y_0"), val = bool(false)];
            tensor<fp32, [1, 16, 1, 16]> var_886 = matmul(transpose_x = var_886_transpose_x_0, transpose_y = var_886_transpose_y_0, x = q_11, y = kc_7)[name = string("op_886")];
            fp32 _inversed_attn_weights_9_y_0 = const()[name = string("_inversed_attn_weights_9_y_0"), val = fp32(0x1.6a09e6p-4)];
            tensor<fp32, [1, 16, 1, 16]> _inversed_attn_weights_9 = mul(x = var_886, y = _inversed_attn_weights_9_y_0)[name = string("_inversed_attn_weights_9")];
            tensor<fp32, [1, 16, 1, 16]> attn_weights_11 = add(x = _inversed_attn_weights_9, y = mask_1)[name = string("attn_weights_11")];
            int32 var_900 = const()[name = string("op_900"), val = int32(-1)];
            tensor<fp32, [1, 16, 1, 16]> attn_weights_15 = softmax(axis = var_900, x = attn_weights_11)[name = string("attn_weights_15")];
            bool attn_output_5_transpose_x_1 = const()[name = string("attn_output_5_transpose_x_1"), val = bool(false)];
            bool attn_output_5_transpose_y_1 = const()[name = string("attn_output_5_transpose_y_1"), val = bool(true)];
            tensor<fp32, [1, 16, 1, 128]> attn_output_5 = matmul(transpose_x = attn_output_5_transpose_x_1, transpose_y = attn_output_5_transpose_y_1, x = attn_weights_15, y = vc_7)[name = string("attn_output_5")];
            tensor<int32, [4]> var_909_perm_0 = const()[name = string("op_909_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_913 = const()[name = string("op_913"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp32, [1, 1, 16, 128]> var_909 = transpose(perm = var_909_perm_0, x = attn_output_5)[name = string("transpose_12")];
            tensor<fp32, [1, 1, 2048]> input_13 = reshape(shape = var_913, x = var_909)[name = string("input_13")];
            tensor<fp32, [1, 1, 1024]> attn_output_7 = linear(bias = linear_1_bias_0, weight = layers_1_self_attn_o_proj_weight_palettized, x = input_13)[name = string("linear_10")];
            tensor<int32, [1]> var_919_axes_0 = const()[name = string("op_919_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1024]> var_919 = squeeze(axes = var_919_axes_0, x = attn_output_7)[name = string("op_919")];
            tensor<int32, [1]> var_921_axes_0 = const()[name = string("op_921_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1024]> var_921 = squeeze(axes = var_921_axes_0, x = var_919)[name = string("op_921")];
            tensor<int32, [1]> var_923_axes_0 = const()[name = string("op_923_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1]> var_923 = expand_dims(axes = var_923_axes_0, x = var_921)[name = string("op_923")];
            tensor<int32, [1]> attn_4d_3_axes_0 = const()[name = string("attn_4d_3_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1, 1]> attn_4d_3 = expand_dims(axes = attn_4d_3_axes_0, x = var_923)[name = string("attn_4d_3")];
            tensor<fp32, [1, 1024, 1, 1]> hidden_5 = add(x = hidden_3, y = attn_4d_3)[name = string("hidden_5")];
            tensor<int32, [1]> var_929_axes_0 = const()[name = string("op_929_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024, 1]> var_929 = squeeze(axes = var_929_axes_0, x = hidden_5)[name = string("op_929")];
            tensor<int32, [1]> var_931_axes_0 = const()[name = string("op_931_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024]> var_931 = squeeze(axes = var_931_axes_0, x = var_929)[name = string("op_931")];
            tensor<int32, [1]> hidden_states_43_axes_0 = const()[name = string("hidden_states_43_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1, 1024]> hidden_states_43 = expand_dims(axes = hidden_states_43_axes_0, x = var_931)[name = string("hidden_states_43")];
            fp32 var_937_promoted = const()[name = string("op_937_promoted"), val = fp32(0x1p+1)];
            tensor<fp32, [1, 1, 1024]> var_943 = pow(x = hidden_states_43, y = var_937_promoted)[name = string("op_943")];
            tensor<int32, [1]> variance_15_axes_0 = const()[name = string("variance_15_axes_0"), val = tensor<int32, [1]>([-1])];
            bool variance_15_keep_dims_0 = const()[name = string("variance_15_keep_dims_0"), val = bool(true)];
            tensor<fp32, [1, 1, 1]> variance_15 = reduce_mean(axes = variance_15_axes_0, keep_dims = variance_15_keep_dims_0, x = var_943)[name = string("variance_15")];
            fp32 var_946 = const()[name = string("op_946"), val = fp32(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 1]> var_947 = add(x = variance_15, y = var_946)[name = string("op_947")];
            fp32 var_948_epsilon_0 = const()[name = string("op_948_epsilon_0"), val = fp32(0x1.197998p-40)];
            tensor<fp32, [1, 1, 1]> var_948 = rsqrt(epsilon = var_948_epsilon_0, x = var_947)[name = string("op_948")];
            tensor<fp32, [1, 1, 1024]> hidden_states_47 = mul(x = hidden_states_43, y = var_948)[name = string("hidden_states_47")];
            tensor<fp32, [1024]> const_20 = const()[name = string("const_20"), val = tensor<fp32, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110198016)))];
            tensor<fp32, [1, 1, 1024]> input_15 = mul(x = const_20, y = hidden_states_47)[name = string("input_15")];
            tensor<fp32, [1, 1, 3072]> input_17 = linear(bias = linear_4_bias_0, weight = layers_1_mlp_gate_proj_weight_palettized, x = input_15)[name = string("linear_11")];
            tensor<fp32, [1, 1, 3072]> var_958 = silu(x = input_17)[name = string("op_958")];
            tensor<fp32, [1, 1, 3072]> var_960 = linear(bias = linear_4_bias_0, weight = layers_1_mlp_up_proj_weight_palettized, x = input_15)[name = string("linear_12")];
            tensor<fp32, [1, 1, 3072]> input_19 = mul(x = var_958, y = var_960)[name = string("input_19")];
            tensor<fp32, [1, 1, 1024]> mlp_out_3 = linear(bias = linear_1_bias_0, weight = layers_1_mlp_down_proj_weight_palettized, x = input_19)[name = string("linear_13")];
            tensor<int32, [1]> var_965_axes_0 = const()[name = string("op_965_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1024]> var_965 = squeeze(axes = var_965_axes_0, x = mlp_out_3)[name = string("op_965")];
            tensor<int32, [1]> var_967_axes_0 = const()[name = string("op_967_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1024]> var_967 = squeeze(axes = var_967_axes_0, x = var_965)[name = string("op_967")];
            tensor<int32, [1]> var_969_axes_0 = const()[name = string("op_969_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1]> var_969 = expand_dims(axes = var_969_axes_0, x = var_967)[name = string("op_969")];
            tensor<int32, [1]> mlp_4d_3_axes_0 = const()[name = string("mlp_4d_3_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1, 1]> mlp_4d_3 = expand_dims(axes = mlp_4d_3_axes_0, x = var_969)[name = string("mlp_4d_3")];
            tensor<fp32, [1, 1024, 1, 1]> hidden_7 = add(x = hidden_5, y = mlp_4d_3)[name = string("hidden_7")];
            tensor<int32, [4]> var_983_begin_0 = const()[name = string("op_983_begin_0"), val = tensor<int32, [4]>([0, 2048, 0, 0])];
            tensor<int32, [4]> var_983_end_0 = const()[name = string("op_983_end_0"), val = tensor<int32, [4]>([1, 3072, 1, 16])];
            tensor<bool, [4]> var_983_end_mask_0 = const()[name = string("op_983_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp32, [1, 1024, 1, 16]> var_983 = slice_by_index(begin = var_983_begin_0, end = var_983_end_0, end_mask = var_983_end_mask_0, x = cast_1)[name = string("op_983")];
            tensor<int32, [4]> var_1003_begin_0 = const()[name = string("op_1003_begin_0"), val = tensor<int32, [4]>([0, 2048, 0, 0])];
            tensor<int32, [4]> var_1003_end_0 = const()[name = string("op_1003_end_0"), val = tensor<int32, [4]>([1, 3072, 1, 16])];
            tensor<bool, [4]> var_1003_end_mask_0 = const()[name = string("op_1003_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp32, [1, 1024, 1, 16]> var_1003 = slice_by_index(begin = var_1003_begin_0, end = var_1003_end_0, end_mask = var_1003_end_mask_0, x = cast_4)[name = string("op_1003")];
            tensor<int32, [1]> var_1015_axes_0 = const()[name = string("op_1015_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024, 1]> var_1015 = squeeze(axes = var_1015_axes_0, x = hidden_7)[name = string("op_1015")];
            tensor<int32, [1]> var_1017_axes_0 = const()[name = string("op_1017_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024]> var_1017 = squeeze(axes = var_1017_axes_0, x = var_1015)[name = string("op_1017")];
            tensor<int32, [1]> hidden_states_49_axes_0 = const()[name = string("hidden_states_49_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1, 1024]> hidden_states_49 = expand_dims(axes = hidden_states_49_axes_0, x = var_1017)[name = string("hidden_states_49")];
            fp32 var_1023_promoted = const()[name = string("op_1023_promoted"), val = fp32(0x1p+1)];
            tensor<fp32, [1, 1, 1024]> var_1029 = pow(x = hidden_states_49, y = var_1023_promoted)[name = string("op_1029")];
            tensor<int32, [1]> variance_17_axes_0 = const()[name = string("variance_17_axes_0"), val = tensor<int32, [1]>([-1])];
            bool variance_17_keep_dims_0 = const()[name = string("variance_17_keep_dims_0"), val = bool(true)];
            tensor<fp32, [1, 1, 1]> variance_17 = reduce_mean(axes = variance_17_axes_0, keep_dims = variance_17_keep_dims_0, x = var_1029)[name = string("variance_17")];
            fp32 var_1032 = const()[name = string("op_1032"), val = fp32(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 1]> var_1033 = add(x = variance_17, y = var_1032)[name = string("op_1033")];
            fp32 var_1034_epsilon_0 = const()[name = string("op_1034_epsilon_0"), val = fp32(0x1.197998p-40)];
            tensor<fp32, [1, 1, 1]> var_1034 = rsqrt(epsilon = var_1034_epsilon_0, x = var_1033)[name = string("op_1034")];
            tensor<fp32, [1, 1, 1024]> hidden_states_53 = mul(x = hidden_states_49, y = var_1034)[name = string("hidden_states_53")];
            tensor<fp32, [1024]> const_21 = const()[name = string("const_21"), val = tensor<fp32, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110202176)))];
            tensor<fp32, [1, 1, 1024]> input_21 = mul(x = const_21, y = hidden_states_53)[name = string("input_21")];
            tensor<fp32, [1, 1, 2048]> var_1040 = linear(bias = linear_0_bias_0, weight = layers_2_self_attn_q_proj_weight_palettized, x = input_21)[name = string("linear_14")];
            tensor<int32, [4]> var_1045 = const()[name = string("op_1045"), val = tensor<int32, [4]>([1, 1, 16, 128])];
            tensor<fp32, [1, 1, 16, 128]> hidden_states_55 = reshape(shape = var_1045, x = var_1040)[name = string("hidden_states_55")];
            tensor<fp32, [1, 1, 1024]> var_1049 = linear(bias = linear_1_bias_0, weight = layers_2_self_attn_k_proj_weight_palettized, x = input_21)[name = string("linear_15")];
            tensor<int32, [4]> var_1054 = const()[name = string("op_1054"), val = tensor<int32, [4]>([1, 1, 8, 128])];
            tensor<fp32, [1, 1, 8, 128]> hidden_states_61 = reshape(shape = var_1054, x = var_1049)[name = string("hidden_states_61")];
            tensor<fp32, [1, 1, 1024]> var_1058 = linear(bias = linear_1_bias_0, weight = layers_2_self_attn_v_proj_weight_palettized, x = input_21)[name = string("linear_16")];
            tensor<int32, [4]> var_1063 = const()[name = string("op_1063"), val = tensor<int32, [4]>([1, 1, 8, 128])];
            tensor<fp32, [1, 1, 8, 128]> v_9 = reshape(shape = var_1063, x = var_1058)[name = string("v_9")];
            fp32 var_1068_promoted = const()[name = string("op_1068_promoted"), val = fp32(0x1p+1)];
            tensor<fp32, [1, 1, 16, 128]> var_1074 = pow(x = hidden_states_55, y = var_1068_promoted)[name = string("op_1074")];
            tensor<int32, [1]> variance_19_axes_0 = const()[name = string("variance_19_axes_0"), val = tensor<int32, [1]>([-1])];
            bool variance_19_keep_dims_0 = const()[name = string("variance_19_keep_dims_0"), val = bool(true)];
            tensor<fp32, [1, 1, 16, 1]> variance_19 = reduce_mean(axes = variance_19_axes_0, keep_dims = variance_19_keep_dims_0, x = var_1074)[name = string("variance_19")];
            fp32 var_1077 = const()[name = string("op_1077"), val = fp32(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 16, 1]> var_1078 = add(x = variance_19, y = var_1077)[name = string("op_1078")];
            fp32 var_1079_epsilon_0 = const()[name = string("op_1079_epsilon_0"), val = fp32(0x1.197998p-40)];
            tensor<fp32, [1, 1, 16, 1]> var_1079 = rsqrt(epsilon = var_1079_epsilon_0, x = var_1078)[name = string("op_1079")];
            tensor<fp32, [1, 1, 16, 128]> hidden_states_59 = mul(x = hidden_states_55, y = var_1079)[name = string("hidden_states_59")];
            tensor<fp32, [128]> const_22 = const()[name = string("const_22"), val = tensor<fp32, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110206336)))];
            tensor<fp32, [1, 1, 16, 128]> q_13 = mul(x = const_22, y = hidden_states_59)[name = string("q_13")];
            fp32 var_1086_promoted = const()[name = string("op_1086_promoted"), val = fp32(0x1p+1)];
            tensor<fp32, [1, 1, 8, 128]> var_1092 = pow(x = hidden_states_61, y = var_1086_promoted)[name = string("op_1092")];
            tensor<int32, [1]> variance_21_axes_0 = const()[name = string("variance_21_axes_0"), val = tensor<int32, [1]>([-1])];
            bool variance_21_keep_dims_0 = const()[name = string("variance_21_keep_dims_0"), val = bool(true)];
            tensor<fp32, [1, 1, 8, 1]> variance_21 = reduce_mean(axes = variance_21_axes_0, keep_dims = variance_21_keep_dims_0, x = var_1092)[name = string("variance_21")];
            fp32 var_1095 = const()[name = string("op_1095"), val = fp32(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 8, 1]> var_1096 = add(x = variance_21, y = var_1095)[name = string("op_1096")];
            fp32 var_1097_epsilon_0 = const()[name = string("op_1097_epsilon_0"), val = fp32(0x1.197998p-40)];
            tensor<fp32, [1, 1, 8, 1]> var_1097 = rsqrt(epsilon = var_1097_epsilon_0, x = var_1096)[name = string("op_1097")];
            tensor<fp32, [1, 1, 8, 128]> hidden_states_65 = mul(x = hidden_states_61, y = var_1097)[name = string("hidden_states_65")];
            tensor<fp32, [128]> const_23 = const()[name = string("const_23"), val = tensor<fp32, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110206912)))];
            tensor<fp32, [1, 1, 8, 128]> k_13 = mul(x = const_23, y = hidden_states_65)[name = string("k_13")];
            tensor<int32, [4]> q_15_perm_0 = const()[name = string("q_15_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [4]> k_15_perm_0 = const()[name = string("k_15_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [4]> v_11_perm_0 = const()[name = string("v_11_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<fp32, [1, 16, 1, 128]> q_15 = transpose(perm = q_15_perm_0, x = q_13)[name = string("transpose_11")];
            tensor<fp32, [1, 16, 1, 128]> var_1114 = mul(x = q_15, y = cos_r_1)[name = string("op_1114")];
            tensor<int32, [4]> x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor<int32, [4]>([1, 16, 1, 64])];
            tensor<bool, [4]> x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp32, [1, 16, 1, 64]> x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = q_15)[name = string("x1_9")];
            tensor<int32, [4]> x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor<int32, [4]>([1, 16, 1, 128])];
            tensor<bool, [4]> x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp32, [1, 16, 1, 64]> x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = q_15)[name = string("x2_9")];
            fp32 const_26_promoted = const()[name = string("const_26_promoted"), val = fp32(-0x1p+0)];
            tensor<fp32, [1, 16, 1, 64]> var_1135 = mul(x = x2_9, y = const_26_promoted)[name = string("op_1135")];
            int32 var_1137 = const()[name = string("op_1137"), val = int32(-1)];
            bool var_1138_interleave_0 = const()[name = string("op_1138_interleave_0"), val = bool(false)];
            tensor<fp32, [1, 16, 1, 128]> var_1138 = concat(axis = var_1137, interleave = var_1138_interleave_0, values = (var_1135, x1_9))[name = string("op_1138")];
            tensor<fp32, [1, 16, 1, 128]> var_1139 = mul(x = var_1138, y = sin_r_1)[name = string("op_1139")];
            tensor<fp32, [1, 16, 1, 128]> q_17 = add(x = var_1114, y = var_1139)[name = string("q_17")];
            tensor<fp32, [1, 8, 1, 128]> k_15 = transpose(perm = k_15_perm_0, x = k_13)[name = string("transpose_10")];
            tensor<fp32, [1, 8, 1, 128]> var_1142 = mul(x = k_15, y = cos_r_1)[name = string("op_1142")];
            tensor<int32, [4]> x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor<int32, [4]>([1, 8, 1, 64])];
            tensor<bool, [4]> x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp32, [1, 8, 1, 64]> x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = k_15)[name = string("x1_11")];
            tensor<int32, [4]> x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<bool, [4]> x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp32, [1, 8, 1, 64]> x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = k_15)[name = string("x2_11")];
            fp32 const_29_promoted = const()[name = string("const_29_promoted"), val = fp32(-0x1p+0)];
            tensor<fp32, [1, 8, 1, 64]> var_1163 = mul(x = x2_11, y = const_29_promoted)[name = string("op_1163")];
            int32 var_1165 = const()[name = string("op_1165"), val = int32(-1)];
            bool var_1166_interleave_0 = const()[name = string("op_1166_interleave_0"), val = bool(false)];
            tensor<fp32, [1, 8, 1, 128]> var_1166 = concat(axis = var_1165, interleave = var_1166_interleave_0, values = (var_1163, x1_11))[name = string("op_1166")];
            tensor<fp32, [1, 8, 1, 128]> var_1167 = mul(x = var_1166, y = sin_r_1)[name = string("op_1167")];
            tensor<fp32, [1, 8, 1, 128]> k_17 = add(x = var_1142, y = var_1167)[name = string("k_17")];
            tensor<int32, [4]> var_1174 = const()[name = string("op_1174"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
            tensor<fp32, [1, 1024, 1, 1]> nk_flat_5 = reshape(shape = var_1174, x = k_17)[name = string("nk_flat_5")];
            tensor<int32, [4]> var_1180 = const()[name = string("op_1180"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
            tensor<fp32, [1, 8, 1, 128]> v_11 = transpose(perm = v_11_perm_0, x = v_9)[name = string("transpose_9")];
            tensor<fp32, [1, 1024, 1, 1]> nv_flat_5 = reshape(shape = var_1180, x = v_11)[name = string("nv_flat_5")];
            tensor<fp32, [1, 1024, 1, 16]> var_1189 = mul(x = var_983, y = var_464)[name = string("op_1189")];
            tensor<fp32, [1, 1024, 1, 16]> var_1190 = mul(x = nk_flat_5, y = update_mask_1)[name = string("op_1190")];
            tensor<fp32, [1, 1024, 1, 16]> key_cache_13 = add(x = var_1189, y = var_1190)[name = string("key_cache_13")];
            tensor<fp32, [1, 1024, 1, 16]> var_1196 = mul(x = var_1003, y = var_464)[name = string("op_1196")];
            tensor<fp32, [1, 1024, 1, 16]> var_1197 = mul(x = nv_flat_5, y = update_mask_1)[name = string("op_1197")];
            tensor<fp32, [1, 1024, 1, 16]> value_cache_13 = add(x = var_1196, y = var_1197)[name = string("value_cache_13")];
            tensor<int32, [1]> var_1201_axes_0 = const()[name = string("op_1201_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 1024, 16]> var_1201 = squeeze(axes = var_1201_axes_0, x = key_cache_13)[name = string("op_1201")];
            tensor<int32, [4]> var_1206 = const()[name = string("op_1206"), val = tensor<int32, [4]>([1, 8, 128, 16])];
            tensor<fp32, [1, 8, 128, 16]> kc_9 = reshape(shape = var_1206, x = var_1201)[name = string("kc_9")];
            tensor<int32, [1]> var_1209_axes_0 = const()[name = string("op_1209_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 1024, 16]> var_1209 = squeeze(axes = var_1209_axes_0, x = value_cache_13)[name = string("op_1209")];
            tensor<int32, [4]> var_1214 = const()[name = string("op_1214"), val = tensor<int32, [4]>([1, 8, 128, 16])];
            tensor<fp32, [1, 8, 128, 16]> vc_9 = reshape(shape = var_1214, x = var_1209)[name = string("vc_9")];
            tensor<int32, [1]> var_1217_axes_0 = const()[name = string("op_1217_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 8, 1, 128, 16]> var_1217 = expand_dims(axes = var_1217_axes_0, x = kc_9)[name = string("op_1217")];
            tensor<int32, [5]> var_1225_reps_0 = const()[name = string("op_1225_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
            tensor<fp32, [1, 8, 2, 128, 16]> var_1225 = tile(reps = var_1225_reps_0, x = var_1217)[name = string("op_1225")];
            tensor<int32, [4]> var_1230 = const()[name = string("op_1230"), val = tensor<int32, [4]>([1, 16, 128, 16])];
            tensor<fp32, [1, 16, 128, 16]> kc_11 = reshape(shape = var_1230, x = var_1225)[name = string("kc_11")];
            tensor<int32, [1]> var_1233_axes_0 = const()[name = string("op_1233_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 8, 1, 128, 16]> var_1233 = expand_dims(axes = var_1233_axes_0, x = vc_9)[name = string("op_1233")];
            tensor<int32, [5]> var_1241_reps_0 = const()[name = string("op_1241_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
            tensor<fp32, [1, 8, 2, 128, 16]> var_1241 = tile(reps = var_1241_reps_0, x = var_1233)[name = string("op_1241")];
            tensor<int32, [4]> var_1246 = const()[name = string("op_1246"), val = tensor<int32, [4]>([1, 16, 128, 16])];
            tensor<fp32, [1, 16, 128, 16]> vc_11 = reshape(shape = var_1246, x = var_1241)[name = string("vc_11")];
            bool var_1248_transpose_x_0 = const()[name = string("op_1248_transpose_x_0"), val = bool(false)];
            bool var_1248_transpose_y_0 = const()[name = string("op_1248_transpose_y_0"), val = bool(false)];
            tensor<fp32, [1, 16, 1, 16]> var_1248 = matmul(transpose_x = var_1248_transpose_x_0, transpose_y = var_1248_transpose_y_0, x = q_17, y = kc_11)[name = string("op_1248")];
            fp32 _inversed_attn_weights_17_y_0 = const()[name = string("_inversed_attn_weights_17_y_0"), val = fp32(0x1.6a09e6p-4)];
            tensor<fp32, [1, 16, 1, 16]> _inversed_attn_weights_17 = mul(x = var_1248, y = _inversed_attn_weights_17_y_0)[name = string("_inversed_attn_weights_17")];
            tensor<fp32, [1, 16, 1, 16]> attn_weights_19 = add(x = _inversed_attn_weights_17, y = mask_1)[name = string("attn_weights_19")];
            int32 var_1262 = const()[name = string("op_1262"), val = int32(-1)];
            tensor<fp32, [1, 16, 1, 16]> attn_weights_23 = softmax(axis = var_1262, x = attn_weights_19)[name = string("attn_weights_23")];
            bool attn_output_9_transpose_x_1 = const()[name = string("attn_output_9_transpose_x_1"), val = bool(false)];
            bool attn_output_9_transpose_y_1 = const()[name = string("attn_output_9_transpose_y_1"), val = bool(true)];
            tensor<fp32, [1, 16, 1, 128]> attn_output_9 = matmul(transpose_x = attn_output_9_transpose_x_1, transpose_y = attn_output_9_transpose_y_1, x = attn_weights_23, y = vc_11)[name = string("attn_output_9")];
            tensor<int32, [4]> var_1271_perm_0 = const()[name = string("op_1271_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1275 = const()[name = string("op_1275"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp32, [1, 1, 16, 128]> var_1271 = transpose(perm = var_1271_perm_0, x = attn_output_9)[name = string("transpose_8")];
            tensor<fp32, [1, 1, 2048]> input_23 = reshape(shape = var_1275, x = var_1271)[name = string("input_23")];
            tensor<fp32, [1, 1, 1024]> attn_output_11 = linear(bias = linear_1_bias_0, weight = layers_2_self_attn_o_proj_weight_palettized, x = input_23)[name = string("linear_17")];
            tensor<int32, [1]> var_1281_axes_0 = const()[name = string("op_1281_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1024]> var_1281 = squeeze(axes = var_1281_axes_0, x = attn_output_11)[name = string("op_1281")];
            tensor<int32, [1]> var_1283_axes_0 = const()[name = string("op_1283_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1024]> var_1283 = squeeze(axes = var_1283_axes_0, x = var_1281)[name = string("op_1283")];
            tensor<int32, [1]> var_1285_axes_0 = const()[name = string("op_1285_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1]> var_1285 = expand_dims(axes = var_1285_axes_0, x = var_1283)[name = string("op_1285")];
            tensor<int32, [1]> attn_4d_5_axes_0 = const()[name = string("attn_4d_5_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1, 1]> attn_4d_5 = expand_dims(axes = attn_4d_5_axes_0, x = var_1285)[name = string("attn_4d_5")];
            tensor<fp32, [1, 1024, 1, 1]> hidden_9 = add(x = hidden_7, y = attn_4d_5)[name = string("hidden_9")];
            tensor<int32, [1]> var_1291_axes_0 = const()[name = string("op_1291_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024, 1]> var_1291 = squeeze(axes = var_1291_axes_0, x = hidden_9)[name = string("op_1291")];
            tensor<int32, [1]> var_1293_axes_0 = const()[name = string("op_1293_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024]> var_1293 = squeeze(axes = var_1293_axes_0, x = var_1291)[name = string("op_1293")];
            tensor<int32, [1]> hidden_states_67_axes_0 = const()[name = string("hidden_states_67_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1, 1024]> hidden_states_67 = expand_dims(axes = hidden_states_67_axes_0, x = var_1293)[name = string("hidden_states_67")];
            fp32 var_1299_promoted = const()[name = string("op_1299_promoted"), val = fp32(0x1p+1)];
            tensor<fp32, [1, 1, 1024]> var_1305 = pow(x = hidden_states_67, y = var_1299_promoted)[name = string("op_1305")];
            tensor<int32, [1]> variance_23_axes_0 = const()[name = string("variance_23_axes_0"), val = tensor<int32, [1]>([-1])];
            bool variance_23_keep_dims_0 = const()[name = string("variance_23_keep_dims_0"), val = bool(true)];
            tensor<fp32, [1, 1, 1]> variance_23 = reduce_mean(axes = variance_23_axes_0, keep_dims = variance_23_keep_dims_0, x = var_1305)[name = string("variance_23")];
            fp32 var_1308 = const()[name = string("op_1308"), val = fp32(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 1]> var_1309 = add(x = variance_23, y = var_1308)[name = string("op_1309")];
            fp32 var_1310_epsilon_0 = const()[name = string("op_1310_epsilon_0"), val = fp32(0x1.197998p-40)];
            tensor<fp32, [1, 1, 1]> var_1310 = rsqrt(epsilon = var_1310_epsilon_0, x = var_1309)[name = string("op_1310")];
            tensor<fp32, [1, 1, 1024]> hidden_states_71 = mul(x = hidden_states_67, y = var_1310)[name = string("hidden_states_71")];
            tensor<fp32, [1024]> const_30 = const()[name = string("const_30"), val = tensor<fp32, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110207488)))];
            tensor<fp32, [1, 1, 1024]> input_25 = mul(x = const_30, y = hidden_states_71)[name = string("input_25")];
            tensor<fp32, [1, 1, 3072]> input_27 = linear(bias = linear_4_bias_0, weight = layers_2_mlp_gate_proj_weight_palettized, x = input_25)[name = string("linear_18")];
            tensor<fp32, [1, 1, 3072]> var_1320 = silu(x = input_27)[name = string("op_1320")];
            tensor<fp32, [1, 1, 3072]> var_1322 = linear(bias = linear_4_bias_0, weight = layers_2_mlp_up_proj_weight_palettized, x = input_25)[name = string("linear_19")];
            tensor<fp32, [1, 1, 3072]> input_29 = mul(x = var_1320, y = var_1322)[name = string("input_29")];
            tensor<fp32, [1, 1, 1024]> mlp_out_5 = linear(bias = linear_1_bias_0, weight = layers_2_mlp_down_proj_weight_palettized, x = input_29)[name = string("linear_20")];
            tensor<int32, [1]> var_1327_axes_0 = const()[name = string("op_1327_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1024]> var_1327 = squeeze(axes = var_1327_axes_0, x = mlp_out_5)[name = string("op_1327")];
            tensor<int32, [1]> var_1329_axes_0 = const()[name = string("op_1329_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1024]> var_1329 = squeeze(axes = var_1329_axes_0, x = var_1327)[name = string("op_1329")];
            tensor<int32, [1]> var_1331_axes_0 = const()[name = string("op_1331_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1]> var_1331 = expand_dims(axes = var_1331_axes_0, x = var_1329)[name = string("op_1331")];
            tensor<int32, [1]> mlp_4d_5_axes_0 = const()[name = string("mlp_4d_5_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1, 1]> mlp_4d_5 = expand_dims(axes = mlp_4d_5_axes_0, x = var_1331)[name = string("mlp_4d_5")];
            tensor<fp32, [1, 1024, 1, 1]> hidden_11 = add(x = hidden_9, y = mlp_4d_5)[name = string("hidden_11")];
            tensor<int32, [4]> var_1345_begin_0 = const()[name = string("op_1345_begin_0"), val = tensor<int32, [4]>([0, 3072, 0, 0])];
            tensor<int32, [4]> var_1345_end_0 = const()[name = string("op_1345_end_0"), val = tensor<int32, [4]>([1, 4096, 1, 16])];
            tensor<bool, [4]> var_1345_end_mask_0 = const()[name = string("op_1345_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp32, [1, 1024, 1, 16]> var_1345 = slice_by_index(begin = var_1345_begin_0, end = var_1345_end_0, end_mask = var_1345_end_mask_0, x = cast_1)[name = string("op_1345")];
            tensor<int32, [4]> var_1365_begin_0 = const()[name = string("op_1365_begin_0"), val = tensor<int32, [4]>([0, 3072, 0, 0])];
            tensor<int32, [4]> var_1365_end_0 = const()[name = string("op_1365_end_0"), val = tensor<int32, [4]>([1, 4096, 1, 16])];
            tensor<bool, [4]> var_1365_end_mask_0 = const()[name = string("op_1365_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp32, [1, 1024, 1, 16]> var_1365 = slice_by_index(begin = var_1365_begin_0, end = var_1365_end_0, end_mask = var_1365_end_mask_0, x = cast_4)[name = string("op_1365")];
            tensor<int32, [1]> var_1377_axes_0 = const()[name = string("op_1377_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024, 1]> var_1377 = squeeze(axes = var_1377_axes_0, x = hidden_11)[name = string("op_1377")];
            tensor<int32, [1]> var_1379_axes_0 = const()[name = string("op_1379_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024]> var_1379 = squeeze(axes = var_1379_axes_0, x = var_1377)[name = string("op_1379")];
            tensor<int32, [1]> hidden_states_73_axes_0 = const()[name = string("hidden_states_73_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1, 1024]> hidden_states_73 = expand_dims(axes = hidden_states_73_axes_0, x = var_1379)[name = string("hidden_states_73")];
            fp32 var_1385_promoted = const()[name = string("op_1385_promoted"), val = fp32(0x1p+1)];
            tensor<fp32, [1, 1, 1024]> var_1391 = pow(x = hidden_states_73, y = var_1385_promoted)[name = string("op_1391")];
            tensor<int32, [1]> variance_25_axes_0 = const()[name = string("variance_25_axes_0"), val = tensor<int32, [1]>([-1])];
            bool variance_25_keep_dims_0 = const()[name = string("variance_25_keep_dims_0"), val = bool(true)];
            tensor<fp32, [1, 1, 1]> variance_25 = reduce_mean(axes = variance_25_axes_0, keep_dims = variance_25_keep_dims_0, x = var_1391)[name = string("variance_25")];
            fp32 var_1394 = const()[name = string("op_1394"), val = fp32(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 1]> var_1395 = add(x = variance_25, y = var_1394)[name = string("op_1395")];
            fp32 var_1396_epsilon_0 = const()[name = string("op_1396_epsilon_0"), val = fp32(0x1.197998p-40)];
            tensor<fp32, [1, 1, 1]> var_1396 = rsqrt(epsilon = var_1396_epsilon_0, x = var_1395)[name = string("op_1396")];
            tensor<fp32, [1, 1, 1024]> hidden_states_77 = mul(x = hidden_states_73, y = var_1396)[name = string("hidden_states_77")];
            tensor<fp32, [1024]> const_31 = const()[name = string("const_31"), val = tensor<fp32, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110211648)))];
            tensor<fp32, [1, 1, 1024]> input_31 = mul(x = const_31, y = hidden_states_77)[name = string("input_31")];
            tensor<fp32, [1, 1, 2048]> var_1402 = linear(bias = linear_0_bias_0, weight = layers_3_self_attn_q_proj_weight_palettized, x = input_31)[name = string("linear_21")];
            tensor<int32, [4]> var_1407 = const()[name = string("op_1407"), val = tensor<int32, [4]>([1, 1, 16, 128])];
            tensor<fp32, [1, 1, 16, 128]> hidden_states_79 = reshape(shape = var_1407, x = var_1402)[name = string("hidden_states_79")];
            tensor<fp32, [1, 1, 1024]> var_1411 = linear(bias = linear_1_bias_0, weight = layers_3_self_attn_k_proj_weight_palettized, x = input_31)[name = string("linear_22")];
            tensor<int32, [4]> var_1416 = const()[name = string("op_1416"), val = tensor<int32, [4]>([1, 1, 8, 128])];
            tensor<fp32, [1, 1, 8, 128]> hidden_states_85 = reshape(shape = var_1416, x = var_1411)[name = string("hidden_states_85")];
            tensor<fp32, [1, 1, 1024]> var_1420 = linear(bias = linear_1_bias_0, weight = layers_3_self_attn_v_proj_weight_palettized, x = input_31)[name = string("linear_23")];
            tensor<int32, [4]> var_1425 = const()[name = string("op_1425"), val = tensor<int32, [4]>([1, 1, 8, 128])];
            tensor<fp32, [1, 1, 8, 128]> v_13 = reshape(shape = var_1425, x = var_1420)[name = string("v_13")];
            fp32 var_1430_promoted = const()[name = string("op_1430_promoted"), val = fp32(0x1p+1)];
            tensor<fp32, [1, 1, 16, 128]> var_1436 = pow(x = hidden_states_79, y = var_1430_promoted)[name = string("op_1436")];
            tensor<int32, [1]> variance_27_axes_0 = const()[name = string("variance_27_axes_0"), val = tensor<int32, [1]>([-1])];
            bool variance_27_keep_dims_0 = const()[name = string("variance_27_keep_dims_0"), val = bool(true)];
            tensor<fp32, [1, 1, 16, 1]> variance_27 = reduce_mean(axes = variance_27_axes_0, keep_dims = variance_27_keep_dims_0, x = var_1436)[name = string("variance_27")];
            fp32 var_1439 = const()[name = string("op_1439"), val = fp32(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 16, 1]> var_1440 = add(x = variance_27, y = var_1439)[name = string("op_1440")];
            fp32 var_1441_epsilon_0 = const()[name = string("op_1441_epsilon_0"), val = fp32(0x1.197998p-40)];
            tensor<fp32, [1, 1, 16, 1]> var_1441 = rsqrt(epsilon = var_1441_epsilon_0, x = var_1440)[name = string("op_1441")];
            tensor<fp32, [1, 1, 16, 128]> hidden_states_83 = mul(x = hidden_states_79, y = var_1441)[name = string("hidden_states_83")];
            tensor<fp32, [128]> const_32 = const()[name = string("const_32"), val = tensor<fp32, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110215808)))];
            tensor<fp32, [1, 1, 16, 128]> q_19 = mul(x = const_32, y = hidden_states_83)[name = string("q_19")];
            fp32 var_1448_promoted = const()[name = string("op_1448_promoted"), val = fp32(0x1p+1)];
            tensor<fp32, [1, 1, 8, 128]> var_1454 = pow(x = hidden_states_85, y = var_1448_promoted)[name = string("op_1454")];
            tensor<int32, [1]> variance_29_axes_0 = const()[name = string("variance_29_axes_0"), val = tensor<int32, [1]>([-1])];
            bool variance_29_keep_dims_0 = const()[name = string("variance_29_keep_dims_0"), val = bool(true)];
            tensor<fp32, [1, 1, 8, 1]> variance_29 = reduce_mean(axes = variance_29_axes_0, keep_dims = variance_29_keep_dims_0, x = var_1454)[name = string("variance_29")];
            fp32 var_1457 = const()[name = string("op_1457"), val = fp32(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 8, 1]> var_1458 = add(x = variance_29, y = var_1457)[name = string("op_1458")];
            fp32 var_1459_epsilon_0 = const()[name = string("op_1459_epsilon_0"), val = fp32(0x1.197998p-40)];
            tensor<fp32, [1, 1, 8, 1]> var_1459 = rsqrt(epsilon = var_1459_epsilon_0, x = var_1458)[name = string("op_1459")];
            tensor<fp32, [1, 1, 8, 128]> hidden_states_89 = mul(x = hidden_states_85, y = var_1459)[name = string("hidden_states_89")];
            tensor<fp32, [128]> const_33 = const()[name = string("const_33"), val = tensor<fp32, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110216384)))];
            tensor<fp32, [1, 1, 8, 128]> k_19 = mul(x = const_33, y = hidden_states_89)[name = string("k_19")];
            tensor<int32, [4]> q_21_perm_0 = const()[name = string("q_21_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [4]> k_21_perm_0 = const()[name = string("k_21_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [4]> v_15_perm_0 = const()[name = string("v_15_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<fp32, [1, 16, 1, 128]> q_21 = transpose(perm = q_21_perm_0, x = q_19)[name = string("transpose_7")];
            tensor<fp32, [1, 16, 1, 128]> var_1476 = mul(x = q_21, y = cos_r_1)[name = string("op_1476")];
            tensor<int32, [4]> x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor<int32, [4]>([1, 16, 1, 64])];
            tensor<bool, [4]> x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp32, [1, 16, 1, 64]> x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = q_21)[name = string("x1_13")];
            tensor<int32, [4]> x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor<int32, [4]>([1, 16, 1, 128])];
            tensor<bool, [4]> x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp32, [1, 16, 1, 64]> x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = q_21)[name = string("x2_13")];
            fp32 const_36_promoted = const()[name = string("const_36_promoted"), val = fp32(-0x1p+0)];
            tensor<fp32, [1, 16, 1, 64]> var_1497 = mul(x = x2_13, y = const_36_promoted)[name = string("op_1497")];
            int32 var_1499 = const()[name = string("op_1499"), val = int32(-1)];
            bool var_1500_interleave_0 = const()[name = string("op_1500_interleave_0"), val = bool(false)];
            tensor<fp32, [1, 16, 1, 128]> var_1500 = concat(axis = var_1499, interleave = var_1500_interleave_0, values = (var_1497, x1_13))[name = string("op_1500")];
            tensor<fp32, [1, 16, 1, 128]> var_1501 = mul(x = var_1500, y = sin_r_1)[name = string("op_1501")];
            tensor<fp32, [1, 16, 1, 128]> q_23 = add(x = var_1476, y = var_1501)[name = string("q_23")];
            tensor<fp32, [1, 8, 1, 128]> k_21 = transpose(perm = k_21_perm_0, x = k_19)[name = string("transpose_6")];
            tensor<fp32, [1, 8, 1, 128]> var_1504 = mul(x = k_21, y = cos_r_1)[name = string("op_1504")];
            tensor<int32, [4]> x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor<int32, [4]>([1, 8, 1, 64])];
            tensor<bool, [4]> x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp32, [1, 8, 1, 64]> x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = k_21)[name = string("x1_15")];
            tensor<int32, [4]> x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<bool, [4]> x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp32, [1, 8, 1, 64]> x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = k_21)[name = string("x2_15")];
            fp32 const_39_promoted = const()[name = string("const_39_promoted"), val = fp32(-0x1p+0)];
            tensor<fp32, [1, 8, 1, 64]> var_1525 = mul(x = x2_15, y = const_39_promoted)[name = string("op_1525")];
            int32 var_1527 = const()[name = string("op_1527"), val = int32(-1)];
            bool var_1528_interleave_0 = const()[name = string("op_1528_interleave_0"), val = bool(false)];
            tensor<fp32, [1, 8, 1, 128]> var_1528 = concat(axis = var_1527, interleave = var_1528_interleave_0, values = (var_1525, x1_15))[name = string("op_1528")];
            tensor<fp32, [1, 8, 1, 128]> var_1529 = mul(x = var_1528, y = sin_r_1)[name = string("op_1529")];
            tensor<fp32, [1, 8, 1, 128]> k_23 = add(x = var_1504, y = var_1529)[name = string("k_23")];
            tensor<int32, [4]> var_1536 = const()[name = string("op_1536"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
            tensor<fp32, [1, 1024, 1, 1]> nk_flat_7 = reshape(shape = var_1536, x = k_23)[name = string("nk_flat_7")];
            tensor<int32, [4]> var_1542 = const()[name = string("op_1542"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
            tensor<fp32, [1, 8, 1, 128]> v_15 = transpose(perm = v_15_perm_0, x = v_13)[name = string("transpose_5")];
            tensor<fp32, [1, 1024, 1, 1]> nv_flat_7 = reshape(shape = var_1542, x = v_15)[name = string("nv_flat_7")];
            tensor<fp32, [1, 1024, 1, 16]> var_1551 = mul(x = var_1345, y = var_464)[name = string("op_1551")];
            tensor<fp32, [1, 1024, 1, 16]> var_1552 = mul(x = nk_flat_7, y = update_mask_1)[name = string("op_1552")];
            tensor<fp32, [1, 1024, 1, 16]> key_cache_17 = add(x = var_1551, y = var_1552)[name = string("key_cache_17")];
            tensor<fp32, [1, 1024, 1, 16]> var_1558 = mul(x = var_1365, y = var_464)[name = string("op_1558")];
            tensor<fp32, [1, 1024, 1, 16]> var_1559 = mul(x = nv_flat_7, y = update_mask_1)[name = string("op_1559")];
            tensor<fp32, [1, 1024, 1, 16]> value_cache_17 = add(x = var_1558, y = var_1559)[name = string("value_cache_17")];
            tensor<int32, [1]> var_1563_axes_0 = const()[name = string("op_1563_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 1024, 16]> var_1563 = squeeze(axes = var_1563_axes_0, x = key_cache_17)[name = string("op_1563")];
            tensor<int32, [4]> var_1568 = const()[name = string("op_1568"), val = tensor<int32, [4]>([1, 8, 128, 16])];
            tensor<fp32, [1, 8, 128, 16]> kc_13 = reshape(shape = var_1568, x = var_1563)[name = string("kc_13")];
            tensor<int32, [1]> var_1571_axes_0 = const()[name = string("op_1571_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 1024, 16]> var_1571 = squeeze(axes = var_1571_axes_0, x = value_cache_17)[name = string("op_1571")];
            tensor<int32, [4]> var_1576 = const()[name = string("op_1576"), val = tensor<int32, [4]>([1, 8, 128, 16])];
            tensor<fp32, [1, 8, 128, 16]> vc_13 = reshape(shape = var_1576, x = var_1571)[name = string("vc_13")];
            tensor<int32, [1]> var_1579_axes_0 = const()[name = string("op_1579_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 8, 1, 128, 16]> var_1579 = expand_dims(axes = var_1579_axes_0, x = kc_13)[name = string("op_1579")];
            tensor<int32, [5]> var_1587_reps_0 = const()[name = string("op_1587_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
            tensor<fp32, [1, 8, 2, 128, 16]> var_1587 = tile(reps = var_1587_reps_0, x = var_1579)[name = string("op_1587")];
            tensor<int32, [4]> var_1592 = const()[name = string("op_1592"), val = tensor<int32, [4]>([1, 16, 128, 16])];
            tensor<fp32, [1, 16, 128, 16]> kc_15 = reshape(shape = var_1592, x = var_1587)[name = string("kc_15")];
            tensor<int32, [1]> var_1595_axes_0 = const()[name = string("op_1595_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 8, 1, 128, 16]> var_1595 = expand_dims(axes = var_1595_axes_0, x = vc_13)[name = string("op_1595")];
            tensor<int32, [5]> var_1603_reps_0 = const()[name = string("op_1603_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
            tensor<fp32, [1, 8, 2, 128, 16]> var_1603 = tile(reps = var_1603_reps_0, x = var_1595)[name = string("op_1603")];
            tensor<int32, [4]> var_1608 = const()[name = string("op_1608"), val = tensor<int32, [4]>([1, 16, 128, 16])];
            tensor<fp32, [1, 16, 128, 16]> vc_15 = reshape(shape = var_1608, x = var_1603)[name = string("vc_15")];
            bool var_1610_transpose_x_0 = const()[name = string("op_1610_transpose_x_0"), val = bool(false)];
            bool var_1610_transpose_y_0 = const()[name = string("op_1610_transpose_y_0"), val = bool(false)];
            tensor<fp32, [1, 16, 1, 16]> var_1610 = matmul(transpose_x = var_1610_transpose_x_0, transpose_y = var_1610_transpose_y_0, x = q_23, y = kc_15)[name = string("op_1610")];
            fp32 _inversed_attn_weights_25_y_0 = const()[name = string("_inversed_attn_weights_25_y_0"), val = fp32(0x1.6a09e6p-4)];
            tensor<fp32, [1, 16, 1, 16]> _inversed_attn_weights_25 = mul(x = var_1610, y = _inversed_attn_weights_25_y_0)[name = string("_inversed_attn_weights_25")];
            tensor<fp32, [1, 16, 1, 16]> attn_weights_27 = add(x = _inversed_attn_weights_25, y = mask_1)[name = string("attn_weights_27")];
            int32 var_1624 = const()[name = string("op_1624"), val = int32(-1)];
            tensor<fp32, [1, 16, 1, 16]> attn_weights_31 = softmax(axis = var_1624, x = attn_weights_27)[name = string("attn_weights_31")];
            bool attn_output_13_transpose_x_1 = const()[name = string("attn_output_13_transpose_x_1"), val = bool(false)];
            bool attn_output_13_transpose_y_1 = const()[name = string("attn_output_13_transpose_y_1"), val = bool(true)];
            tensor<fp32, [1, 16, 1, 128]> attn_output_13 = matmul(transpose_x = attn_output_13_transpose_x_1, transpose_y = attn_output_13_transpose_y_1, x = attn_weights_31, y = vc_15)[name = string("attn_output_13")];
            tensor<int32, [4]> var_1633_perm_0 = const()[name = string("op_1633_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1637 = const()[name = string("op_1637"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp32, [1, 1, 16, 128]> var_1633 = transpose(perm = var_1633_perm_0, x = attn_output_13)[name = string("transpose_4")];
            tensor<fp32, [1, 1, 2048]> input_33 = reshape(shape = var_1637, x = var_1633)[name = string("input_33")];
            tensor<fp32, [1, 1, 1024]> attn_output_15 = linear(bias = linear_1_bias_0, weight = layers_3_self_attn_o_proj_weight_palettized, x = input_33)[name = string("linear_24")];
            tensor<int32, [1]> var_1643_axes_0 = const()[name = string("op_1643_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1024]> var_1643 = squeeze(axes = var_1643_axes_0, x = attn_output_15)[name = string("op_1643")];
            tensor<int32, [1]> var_1645_axes_0 = const()[name = string("op_1645_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1024]> var_1645 = squeeze(axes = var_1645_axes_0, x = var_1643)[name = string("op_1645")];
            tensor<int32, [1]> var_1647_axes_0 = const()[name = string("op_1647_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1]> var_1647 = expand_dims(axes = var_1647_axes_0, x = var_1645)[name = string("op_1647")];
            tensor<int32, [1]> attn_4d_7_axes_0 = const()[name = string("attn_4d_7_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1, 1]> attn_4d_7 = expand_dims(axes = attn_4d_7_axes_0, x = var_1647)[name = string("attn_4d_7")];
            tensor<fp32, [1, 1024, 1, 1]> hidden_13 = add(x = hidden_11, y = attn_4d_7)[name = string("hidden_13")];
            tensor<int32, [1]> var_1653_axes_0 = const()[name = string("op_1653_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024, 1]> var_1653 = squeeze(axes = var_1653_axes_0, x = hidden_13)[name = string("op_1653")];
            tensor<int32, [1]> var_1655_axes_0 = const()[name = string("op_1655_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024]> var_1655 = squeeze(axes = var_1655_axes_0, x = var_1653)[name = string("op_1655")];
            tensor<int32, [1]> hidden_states_91_axes_0 = const()[name = string("hidden_states_91_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1, 1024]> hidden_states_91 = expand_dims(axes = hidden_states_91_axes_0, x = var_1655)[name = string("hidden_states_91")];
            fp32 var_1661_promoted = const()[name = string("op_1661_promoted"), val = fp32(0x1p+1)];
            tensor<fp32, [1, 1, 1024]> var_1667 = pow(x = hidden_states_91, y = var_1661_promoted)[name = string("op_1667")];
            tensor<int32, [1]> variance_31_axes_0 = const()[name = string("variance_31_axes_0"), val = tensor<int32, [1]>([-1])];
            bool variance_31_keep_dims_0 = const()[name = string("variance_31_keep_dims_0"), val = bool(true)];
            tensor<fp32, [1, 1, 1]> variance_31 = reduce_mean(axes = variance_31_axes_0, keep_dims = variance_31_keep_dims_0, x = var_1667)[name = string("variance_31")];
            fp32 var_1670 = const()[name = string("op_1670"), val = fp32(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 1]> var_1671 = add(x = variance_31, y = var_1670)[name = string("op_1671")];
            fp32 var_1672_epsilon_0 = const()[name = string("op_1672_epsilon_0"), val = fp32(0x1.197998p-40)];
            tensor<fp32, [1, 1, 1]> var_1672 = rsqrt(epsilon = var_1672_epsilon_0, x = var_1671)[name = string("op_1672")];
            tensor<fp32, [1, 1, 1024]> hidden_states_95 = mul(x = hidden_states_91, y = var_1672)[name = string("hidden_states_95")];
            tensor<fp32, [1024]> const_40 = const()[name = string("const_40"), val = tensor<fp32, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110216960)))];
            tensor<fp32, [1, 1, 1024]> input_35 = mul(x = const_40, y = hidden_states_95)[name = string("input_35")];
            tensor<fp32, [1, 1, 3072]> input_37 = linear(bias = linear_4_bias_0, weight = layers_3_mlp_gate_proj_weight_palettized, x = input_35)[name = string("linear_25")];
            tensor<fp32, [1, 1, 3072]> var_1682 = silu(x = input_37)[name = string("op_1682")];
            tensor<fp32, [1, 1, 3072]> var_1684 = linear(bias = linear_4_bias_0, weight = layers_3_mlp_up_proj_weight_palettized, x = input_35)[name = string("linear_26")];
            tensor<fp32, [1, 1, 3072]> input_39 = mul(x = var_1682, y = var_1684)[name = string("input_39")];
            tensor<fp32, [1, 1, 1024]> mlp_out_7 = linear(bias = linear_1_bias_0, weight = layers_3_mlp_down_proj_weight_palettized, x = input_39)[name = string("linear_27")];
            tensor<int32, [1]> var_1689_axes_0 = const()[name = string("op_1689_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1024]> var_1689 = squeeze(axes = var_1689_axes_0, x = mlp_out_7)[name = string("op_1689")];
            tensor<int32, [1]> var_1691_axes_0 = const()[name = string("op_1691_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1024]> var_1691 = squeeze(axes = var_1691_axes_0, x = var_1689)[name = string("op_1691")];
            tensor<int32, [1]> var_1693_axes_0 = const()[name = string("op_1693_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1]> var_1693 = expand_dims(axes = var_1693_axes_0, x = var_1691)[name = string("op_1693")];
            tensor<int32, [1]> mlp_4d_7_axes_0 = const()[name = string("mlp_4d_7_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1, 1]> mlp_4d_7 = expand_dims(axes = mlp_4d_7_axes_0, x = var_1693)[name = string("mlp_4d_7")];
            tensor<fp32, [1, 1024, 1, 1]> hidden_15 = add(x = hidden_13, y = mlp_4d_7)[name = string("hidden_15")];
            tensor<int32, [4]> var_1707_begin_0 = const()[name = string("op_1707_begin_0"), val = tensor<int32, [4]>([0, 4096, 0, 0])];
            tensor<int32, [4]> var_1707_end_0 = const()[name = string("op_1707_end_0"), val = tensor<int32, [4]>([1, 1, 1, 16])];
            tensor<bool, [4]> var_1707_end_mask_0 = const()[name = string("op_1707_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp32, [1, 1024, 1, 16]> var_1707 = slice_by_index(begin = var_1707_begin_0, end = var_1707_end_0, end_mask = var_1707_end_mask_0, x = cast_1)[name = string("op_1707")];
            tensor<int32, [4]> var_1727_begin_0 = const()[name = string("op_1727_begin_0"), val = tensor<int32, [4]>([0, 4096, 0, 0])];
            tensor<int32, [4]> var_1727_end_0 = const()[name = string("op_1727_end_0"), val = tensor<int32, [4]>([1, 1, 1, 16])];
            tensor<bool, [4]> var_1727_end_mask_0 = const()[name = string("op_1727_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp32, [1, 1024, 1, 16]> var_1727 = slice_by_index(begin = var_1727_begin_0, end = var_1727_end_0, end_mask = var_1727_end_mask_0, x = cast_4)[name = string("op_1727")];
            tensor<int32, [1]> var_1739_axes_0 = const()[name = string("op_1739_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024, 1]> var_1739 = squeeze(axes = var_1739_axes_0, x = hidden_15)[name = string("op_1739")];
            tensor<int32, [1]> var_1741_axes_0 = const()[name = string("op_1741_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024]> var_1741 = squeeze(axes = var_1741_axes_0, x = var_1739)[name = string("op_1741")];
            tensor<int32, [1]> hidden_states_97_axes_0 = const()[name = string("hidden_states_97_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1, 1024]> hidden_states_97 = expand_dims(axes = hidden_states_97_axes_0, x = var_1741)[name = string("hidden_states_97")];
            fp32 var_1747_promoted = const()[name = string("op_1747_promoted"), val = fp32(0x1p+1)];
            tensor<fp32, [1, 1, 1024]> var_1753 = pow(x = hidden_states_97, y = var_1747_promoted)[name = string("op_1753")];
            tensor<int32, [1]> variance_33_axes_0 = const()[name = string("variance_33_axes_0"), val = tensor<int32, [1]>([-1])];
            bool variance_33_keep_dims_0 = const()[name = string("variance_33_keep_dims_0"), val = bool(true)];
            tensor<fp32, [1, 1, 1]> variance_33 = reduce_mean(axes = variance_33_axes_0, keep_dims = variance_33_keep_dims_0, x = var_1753)[name = string("variance_33")];
            fp32 var_1756 = const()[name = string("op_1756"), val = fp32(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 1]> var_1757 = add(x = variance_33, y = var_1756)[name = string("op_1757")];
            fp32 var_1758_epsilon_0 = const()[name = string("op_1758_epsilon_0"), val = fp32(0x1.197998p-40)];
            tensor<fp32, [1, 1, 1]> var_1758 = rsqrt(epsilon = var_1758_epsilon_0, x = var_1757)[name = string("op_1758")];
            tensor<fp32, [1, 1, 1024]> hidden_states_101 = mul(x = hidden_states_97, y = var_1758)[name = string("hidden_states_101")];
            tensor<fp32, [1024]> const_41 = const()[name = string("const_41"), val = tensor<fp32, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110221120)))];
            tensor<fp32, [1, 1, 1024]> input_41 = mul(x = const_41, y = hidden_states_101)[name = string("input_41")];
            tensor<fp32, [1, 1, 2048]> var_1764 = linear(bias = linear_0_bias_0, weight = layers_4_self_attn_q_proj_weight_palettized, x = input_41)[name = string("linear_28")];
            tensor<int32, [4]> var_1769 = const()[name = string("op_1769"), val = tensor<int32, [4]>([1, 1, 16, 128])];
            tensor<fp32, [1, 1, 16, 128]> hidden_states_103 = reshape(shape = var_1769, x = var_1764)[name = string("hidden_states_103")];
            tensor<fp32, [1, 1, 1024]> var_1773 = linear(bias = linear_1_bias_0, weight = layers_4_self_attn_k_proj_weight_palettized, x = input_41)[name = string("linear_29")];
            tensor<int32, [4]> var_1778 = const()[name = string("op_1778"), val = tensor<int32, [4]>([1, 1, 8, 128])];
            tensor<fp32, [1, 1, 8, 128]> hidden_states_109 = reshape(shape = var_1778, x = var_1773)[name = string("hidden_states_109")];
            tensor<fp32, [1, 1, 1024]> var_1782 = linear(bias = linear_1_bias_0, weight = layers_4_self_attn_v_proj_weight_palettized, x = input_41)[name = string("linear_30")];
            tensor<int32, [4]> var_1787 = const()[name = string("op_1787"), val = tensor<int32, [4]>([1, 1, 8, 128])];
            tensor<fp32, [1, 1, 8, 128]> v_17 = reshape(shape = var_1787, x = var_1782)[name = string("v_17")];
            fp32 var_1792_promoted = const()[name = string("op_1792_promoted"), val = fp32(0x1p+1)];
            tensor<fp32, [1, 1, 16, 128]> var_1798 = pow(x = hidden_states_103, y = var_1792_promoted)[name = string("op_1798")];
            tensor<int32, [1]> variance_35_axes_0 = const()[name = string("variance_35_axes_0"), val = tensor<int32, [1]>([-1])];
            bool variance_35_keep_dims_0 = const()[name = string("variance_35_keep_dims_0"), val = bool(true)];
            tensor<fp32, [1, 1, 16, 1]> variance_35 = reduce_mean(axes = variance_35_axes_0, keep_dims = variance_35_keep_dims_0, x = var_1798)[name = string("variance_35")];
            fp32 var_1801 = const()[name = string("op_1801"), val = fp32(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 16, 1]> var_1802 = add(x = variance_35, y = var_1801)[name = string("op_1802")];
            fp32 var_1803_epsilon_0 = const()[name = string("op_1803_epsilon_0"), val = fp32(0x1.197998p-40)];
            tensor<fp32, [1, 1, 16, 1]> var_1803 = rsqrt(epsilon = var_1803_epsilon_0, x = var_1802)[name = string("op_1803")];
            tensor<fp32, [1, 1, 16, 128]> hidden_states_107 = mul(x = hidden_states_103, y = var_1803)[name = string("hidden_states_107")];
            tensor<fp32, [128]> const_42 = const()[name = string("const_42"), val = tensor<fp32, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110225280)))];
            tensor<fp32, [1, 1, 16, 128]> q_25 = mul(x = const_42, y = hidden_states_107)[name = string("q_25")];
            fp32 var_1810_promoted = const()[name = string("op_1810_promoted"), val = fp32(0x1p+1)];
            tensor<fp32, [1, 1, 8, 128]> var_1816 = pow(x = hidden_states_109, y = var_1810_promoted)[name = string("op_1816")];
            tensor<int32, [1]> variance_37_axes_0 = const()[name = string("variance_37_axes_0"), val = tensor<int32, [1]>([-1])];
            bool variance_37_keep_dims_0 = const()[name = string("variance_37_keep_dims_0"), val = bool(true)];
            tensor<fp32, [1, 1, 8, 1]> variance_37 = reduce_mean(axes = variance_37_axes_0, keep_dims = variance_37_keep_dims_0, x = var_1816)[name = string("variance_37")];
            fp32 var_1819 = const()[name = string("op_1819"), val = fp32(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 8, 1]> var_1820 = add(x = variance_37, y = var_1819)[name = string("op_1820")];
            fp32 var_1821_epsilon_0 = const()[name = string("op_1821_epsilon_0"), val = fp32(0x1.197998p-40)];
            tensor<fp32, [1, 1, 8, 1]> var_1821 = rsqrt(epsilon = var_1821_epsilon_0, x = var_1820)[name = string("op_1821")];
            tensor<fp32, [1, 1, 8, 128]> hidden_states_113 = mul(x = hidden_states_109, y = var_1821)[name = string("hidden_states_113")];
            tensor<fp32, [128]> const_43 = const()[name = string("const_43"), val = tensor<fp32, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110225856)))];
            tensor<fp32, [1, 1, 8, 128]> k_25 = mul(x = const_43, y = hidden_states_113)[name = string("k_25")];
            tensor<int32, [4]> q_27_perm_0 = const()[name = string("q_27_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [4]> k_27_perm_0 = const()[name = string("k_27_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [4]> v_perm_0 = const()[name = string("v_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<fp32, [1, 16, 1, 128]> q_27 = transpose(perm = q_27_perm_0, x = q_25)[name = string("transpose_3")];
            tensor<fp32, [1, 16, 1, 128]> var_1838 = mul(x = q_27, y = cos_r_1)[name = string("op_1838")];
            tensor<int32, [4]> x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor<int32, [4]>([1, 16, 1, 64])];
            tensor<bool, [4]> x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp32, [1, 16, 1, 64]> x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = q_27)[name = string("x1_17")];
            tensor<int32, [4]> x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor<int32, [4]>([1, 16, 1, 128])];
            tensor<bool, [4]> x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp32, [1, 16, 1, 64]> x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = q_27)[name = string("x2_17")];
            fp32 const_46_promoted = const()[name = string("const_46_promoted"), val = fp32(-0x1p+0)];
            tensor<fp32, [1, 16, 1, 64]> var_1859 = mul(x = x2_17, y = const_46_promoted)[name = string("op_1859")];
            int32 var_1861 = const()[name = string("op_1861"), val = int32(-1)];
            bool var_1862_interleave_0 = const()[name = string("op_1862_interleave_0"), val = bool(false)];
            tensor<fp32, [1, 16, 1, 128]> var_1862 = concat(axis = var_1861, interleave = var_1862_interleave_0, values = (var_1859, x1_17))[name = string("op_1862")];
            tensor<fp32, [1, 16, 1, 128]> var_1863 = mul(x = var_1862, y = sin_r_1)[name = string("op_1863")];
            tensor<fp32, [1, 16, 1, 128]> q = add(x = var_1838, y = var_1863)[name = string("q")];
            tensor<fp32, [1, 8, 1, 128]> k_27 = transpose(perm = k_27_perm_0, x = k_25)[name = string("transpose_2")];
            tensor<fp32, [1, 8, 1, 128]> var_1866 = mul(x = k_27, y = cos_r_1)[name = string("op_1866")];
            tensor<int32, [4]> x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_end_0 = const()[name = string("x1_end_0"), val = tensor<int32, [4]>([1, 8, 1, 64])];
            tensor<bool, [4]> x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp32, [1, 8, 1, 64]> x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = k_27)[name = string("x1")];
            tensor<int32, [4]> x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_end_0 = const()[name = string("x2_end_0"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<bool, [4]> x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp32, [1, 8, 1, 64]> x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = k_27)[name = string("x2")];
            fp32 const_49_promoted = const()[name = string("const_49_promoted"), val = fp32(-0x1p+0)];
            tensor<fp32, [1, 8, 1, 64]> var_1887 = mul(x = x2, y = const_49_promoted)[name = string("op_1887")];
            int32 var_1889 = const()[name = string("op_1889"), val = int32(-1)];
            bool var_1890_interleave_0 = const()[name = string("op_1890_interleave_0"), val = bool(false)];
            tensor<fp32, [1, 8, 1, 128]> var_1890 = concat(axis = var_1889, interleave = var_1890_interleave_0, values = (var_1887, x1))[name = string("op_1890")];
            tensor<fp32, [1, 8, 1, 128]> var_1891 = mul(x = var_1890, y = sin_r_1)[name = string("op_1891")];
            tensor<fp32, [1, 8, 1, 128]> k = add(x = var_1866, y = var_1891)[name = string("k")];
            tensor<int32, [4]> var_1898 = const()[name = string("op_1898"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
            tensor<fp32, [1, 1024, 1, 1]> nk_flat = reshape(shape = var_1898, x = k)[name = string("nk_flat")];
            tensor<int32, [4]> var_1904 = const()[name = string("op_1904"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
            tensor<fp32, [1, 8, 1, 128]> v = transpose(perm = v_perm_0, x = v_17)[name = string("transpose_1")];
            tensor<fp32, [1, 1024, 1, 1]> nv_flat = reshape(shape = var_1904, x = v)[name = string("nv_flat")];
            tensor<fp32, [1, 1024, 1, 16]> var_1913 = mul(x = var_1707, y = var_464)[name = string("op_1913")];
            tensor<fp32, [1, 1024, 1, 16]> var_1914 = mul(x = nk_flat, y = update_mask_1)[name = string("op_1914")];
            tensor<fp32, [1, 1024, 1, 16]> key_cache_1 = add(x = var_1913, y = var_1914)[name = string("key_cache")];
            tensor<fp32, [1, 1024, 1, 16]> var_1920 = mul(x = var_1727, y = var_464)[name = string("op_1920")];
            tensor<fp32, [1, 1024, 1, 16]> var_1921 = mul(x = nv_flat, y = update_mask_1)[name = string("op_1921")];
            tensor<fp32, [1, 1024, 1, 16]> value_cache_1 = add(x = var_1920, y = var_1921)[name = string("value_cache")];
            tensor<int32, [1]> var_1925_axes_0 = const()[name = string("op_1925_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 1024, 16]> var_1925 = squeeze(axes = var_1925_axes_0, x = key_cache_1)[name = string("op_1925")];
            tensor<int32, [4]> var_1930 = const()[name = string("op_1930"), val = tensor<int32, [4]>([1, 8, 128, 16])];
            tensor<fp32, [1, 8, 128, 16]> kc_17 = reshape(shape = var_1930, x = var_1925)[name = string("kc_17")];
            tensor<int32, [1]> var_1933_axes_0 = const()[name = string("op_1933_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 1024, 16]> var_1933 = squeeze(axes = var_1933_axes_0, x = value_cache_1)[name = string("op_1933")];
            tensor<int32, [4]> var_1938 = const()[name = string("op_1938"), val = tensor<int32, [4]>([1, 8, 128, 16])];
            tensor<fp32, [1, 8, 128, 16]> vc_17 = reshape(shape = var_1938, x = var_1933)[name = string("vc_17")];
            tensor<int32, [1]> var_1941_axes_0 = const()[name = string("op_1941_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 8, 1, 128, 16]> var_1941 = expand_dims(axes = var_1941_axes_0, x = kc_17)[name = string("op_1941")];
            tensor<int32, [5]> var_1949_reps_0 = const()[name = string("op_1949_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
            tensor<fp32, [1, 8, 2, 128, 16]> var_1949 = tile(reps = var_1949_reps_0, x = var_1941)[name = string("op_1949")];
            tensor<int32, [4]> var_1954 = const()[name = string("op_1954"), val = tensor<int32, [4]>([1, 16, 128, 16])];
            tensor<fp32, [1, 16, 128, 16]> kc = reshape(shape = var_1954, x = var_1949)[name = string("kc")];
            tensor<int32, [1]> var_1957_axes_0 = const()[name = string("op_1957_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 8, 1, 128, 16]> var_1957 = expand_dims(axes = var_1957_axes_0, x = vc_17)[name = string("op_1957")];
            tensor<int32, [5]> var_1965_reps_0 = const()[name = string("op_1965_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
            tensor<fp32, [1, 8, 2, 128, 16]> var_1965 = tile(reps = var_1965_reps_0, x = var_1957)[name = string("op_1965")];
            tensor<int32, [4]> var_1970 = const()[name = string("op_1970"), val = tensor<int32, [4]>([1, 16, 128, 16])];
            tensor<fp32, [1, 16, 128, 16]> vc = reshape(shape = var_1970, x = var_1965)[name = string("vc")];
            bool var_1972_transpose_x_0 = const()[name = string("op_1972_transpose_x_0"), val = bool(false)];
            bool var_1972_transpose_y_0 = const()[name = string("op_1972_transpose_y_0"), val = bool(false)];
            tensor<fp32, [1, 16, 1, 16]> var_1972 = matmul(transpose_x = var_1972_transpose_x_0, transpose_y = var_1972_transpose_y_0, x = q, y = kc)[name = string("op_1972")];
            fp32 _inversed_attn_weights_33_y_0 = const()[name = string("_inversed_attn_weights_33_y_0"), val = fp32(0x1.6a09e6p-4)];
            tensor<fp32, [1, 16, 1, 16]> _inversed_attn_weights_33 = mul(x = var_1972, y = _inversed_attn_weights_33_y_0)[name = string("_inversed_attn_weights_33")];
            tensor<fp32, [1, 16, 1, 16]> attn_weights_35 = add(x = _inversed_attn_weights_33, y = mask_1)[name = string("attn_weights_35")];
            int32 var_1986 = const()[name = string("op_1986"), val = int32(-1)];
            tensor<fp32, [1, 16, 1, 16]> attn_weights = softmax(axis = var_1986, x = attn_weights_35)[name = string("attn_weights")];
            bool attn_output_17_transpose_x_1 = const()[name = string("attn_output_17_transpose_x_1"), val = bool(false)];
            bool attn_output_17_transpose_y_1 = const()[name = string("attn_output_17_transpose_y_1"), val = bool(true)];
            tensor<fp32, [1, 16, 1, 128]> attn_output_17 = matmul(transpose_x = attn_output_17_transpose_x_1, transpose_y = attn_output_17_transpose_y_1, x = attn_weights, y = vc)[name = string("attn_output_17")];
            tensor<int32, [4]> var_1995_perm_0 = const()[name = string("op_1995_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1999 = const()[name = string("op_1999"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp32, [1, 1, 16, 128]> var_1995 = transpose(perm = var_1995_perm_0, x = attn_output_17)[name = string("transpose_0")];
            tensor<fp32, [1, 1, 2048]> input_43 = reshape(shape = var_1999, x = var_1995)[name = string("input_43")];
            tensor<fp32, [1, 1, 1024]> attn_output = linear(bias = linear_1_bias_0, weight = layers_4_self_attn_o_proj_weight_palettized, x = input_43)[name = string("linear_31")];
            tensor<int32, [1]> var_2005_axes_0 = const()[name = string("op_2005_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1024]> var_2005 = squeeze(axes = var_2005_axes_0, x = attn_output)[name = string("op_2005")];
            tensor<int32, [1]> var_2007_axes_0 = const()[name = string("op_2007_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1024]> var_2007 = squeeze(axes = var_2007_axes_0, x = var_2005)[name = string("op_2007")];
            tensor<int32, [1]> var_2009_axes_0 = const()[name = string("op_2009_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1]> var_2009 = expand_dims(axes = var_2009_axes_0, x = var_2007)[name = string("op_2009")];
            tensor<int32, [1]> attn_4d_axes_0 = const()[name = string("attn_4d_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1, 1]> attn_4d = expand_dims(axes = attn_4d_axes_0, x = var_2009)[name = string("attn_4d")];
            tensor<fp32, [1, 1024, 1, 1]> hidden_17 = add(x = hidden_15, y = attn_4d)[name = string("hidden_17")];
            tensor<int32, [1]> var_2015_axes_0 = const()[name = string("op_2015_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024, 1]> var_2015 = squeeze(axes = var_2015_axes_0, x = hidden_17)[name = string("op_2015")];
            tensor<int32, [1]> var_2017_axes_0 = const()[name = string("op_2017_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024]> var_2017 = squeeze(axes = var_2017_axes_0, x = var_2015)[name = string("op_2017")];
            tensor<int32, [1]> hidden_states_115_axes_0 = const()[name = string("hidden_states_115_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1, 1024]> hidden_states_115 = expand_dims(axes = hidden_states_115_axes_0, x = var_2017)[name = string("hidden_states_115")];
            fp32 var_2023_promoted = const()[name = string("op_2023_promoted"), val = fp32(0x1p+1)];
            tensor<fp32, [1, 1, 1024]> var_2029 = pow(x = hidden_states_115, y = var_2023_promoted)[name = string("op_2029")];
            tensor<int32, [1]> variance_39_axes_0 = const()[name = string("variance_39_axes_0"), val = tensor<int32, [1]>([-1])];
            bool variance_39_keep_dims_0 = const()[name = string("variance_39_keep_dims_0"), val = bool(true)];
            tensor<fp32, [1, 1, 1]> variance_39 = reduce_mean(axes = variance_39_axes_0, keep_dims = variance_39_keep_dims_0, x = var_2029)[name = string("variance_39")];
            fp32 var_2032 = const()[name = string("op_2032"), val = fp32(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 1]> var_2033 = add(x = variance_39, y = var_2032)[name = string("op_2033")];
            fp32 var_2034_epsilon_0 = const()[name = string("op_2034_epsilon_0"), val = fp32(0x1.197998p-40)];
            tensor<fp32, [1, 1, 1]> var_2034 = rsqrt(epsilon = var_2034_epsilon_0, x = var_2033)[name = string("op_2034")];
            tensor<fp32, [1, 1, 1024]> hidden_states_119 = mul(x = hidden_states_115, y = var_2034)[name = string("hidden_states_119")];
            tensor<fp32, [1024]> const_50 = const()[name = string("const_50"), val = tensor<fp32, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110226432)))];
            tensor<fp32, [1, 1, 1024]> input_45 = mul(x = const_50, y = hidden_states_119)[name = string("input_45")];
            tensor<fp32, [1, 1, 3072]> input_47 = linear(bias = linear_4_bias_0, weight = layers_4_mlp_gate_proj_weight_palettized, x = input_45)[name = string("linear_32")];
            tensor<fp32, [1, 1, 3072]> var_2044 = silu(x = input_47)[name = string("op_2044")];
            tensor<fp32, [1, 1, 3072]> var_2046 = linear(bias = linear_4_bias_0, weight = layers_4_mlp_up_proj_weight_palettized, x = input_45)[name = string("linear_33")];
            tensor<fp32, [1, 1, 3072]> input_49 = mul(x = var_2044, y = var_2046)[name = string("input_49")];
            tensor<fp32, [1, 1, 1024]> mlp_out = linear(bias = linear_1_bias_0, weight = layers_4_mlp_down_proj_weight_palettized, x = input_49)[name = string("linear_34")];
            tensor<int32, [1]> var_2051_axes_0 = const()[name = string("op_2051_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1024]> var_2051 = squeeze(axes = var_2051_axes_0, x = mlp_out)[name = string("op_2051")];
            tensor<int32, [1]> var_2053_axes_0 = const()[name = string("op_2053_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1024]> var_2053 = squeeze(axes = var_2053_axes_0, x = var_2051)[name = string("op_2053")];
            tensor<int32, [1]> var_2055_axes_0 = const()[name = string("op_2055_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1]> var_2055 = expand_dims(axes = var_2055_axes_0, x = var_2053)[name = string("op_2055")];
            tensor<int32, [1]> mlp_4d_axes_0 = const()[name = string("mlp_4d_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1, 1]> mlp_4d = expand_dims(axes = mlp_4d_axes_0, x = var_2055)[name = string("mlp_4d")];
            tensor<fp32, [1, 1024, 1, 1]> hidden_states_type_fp32 = add(x = hidden_17, y = mlp_4d)[name = string("hidden")];
            tensor<int32, [1]> var_2061_axes_0 = const()[name = string("op_2061_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024, 1]> var_2061 = squeeze(axes = var_2061_axes_0, x = hidden_states_type_fp32)[name = string("op_2061")];
            tensor<int32, [1]> var_2063_axes_0 = const()[name = string("op_2063_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024]> var_2063 = squeeze(axes = var_2063_axes_0, x = var_2061)[name = string("op_2063")];
            tensor<int32, [1]> hidden_states_121_axes_0 = const()[name = string("hidden_states_121_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1, 1024]> hidden_states_121 = expand_dims(axes = hidden_states_121_axes_0, x = var_2063)[name = string("hidden_states_121")];
            fp32 var_2069_promoted = const()[name = string("op_2069_promoted"), val = fp32(0x1p+1)];
            tensor<fp32, [1, 1, 1024]> var_2075 = pow(x = hidden_states_121, y = var_2069_promoted)[name = string("op_2075")];
            tensor<int32, [1]> variance_axes_0 = const()[name = string("variance_axes_0"), val = tensor<int32, [1]>([-1])];
            bool variance_keep_dims_0 = const()[name = string("variance_keep_dims_0"), val = bool(true)];
            tensor<fp32, [1, 1, 1]> variance = reduce_mean(axes = variance_axes_0, keep_dims = variance_keep_dims_0, x = var_2075)[name = string("variance")];
            fp32 var_2078 = const()[name = string("op_2078"), val = fp32(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 1]> var_2079 = add(x = variance, y = var_2078)[name = string("op_2079")];
            fp32 var_2080_epsilon_0 = const()[name = string("op_2080_epsilon_0"), val = fp32(0x1.197998p-40)];
            tensor<fp32, [1, 1, 1]> var_2080 = rsqrt(epsilon = var_2080_epsilon_0, x = var_2079)[name = string("op_2080")];
            tensor<fp32, [1, 1, 1024]> hidden_states_1_1 = mul(x = hidden_states_121, y = var_2080)[name = string("hidden_states")];
            tensor<fp32, [1024]> const_51 = const()[name = string("const_51"), val = tensor<fp32, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110230592)))];
            tensor<fp32, [1, 1, 1024]> input = mul(x = const_51, y = hidden_states_1_1)[name = string("input")];
            tensor<fp32, [1, 1, 2048]> logits_1 = linear(bias = linear_0_bias_0, weight = lm_heads_0_weight_palettized, x = input)[name = string("linear_35")];
            tensor<int32, [1]> var_2088_axes_0 = const()[name = string("op_2088_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2088 = squeeze(axes = var_2088_axes_0, x = logits_1)[name = string("op_2088")];
            tensor<fp32, [1, 1, 2048]> logits_3 = linear(bias = linear_0_bias_0, weight = lm_heads_1_weight_palettized, x = input)[name = string("linear_36")];
            tensor<int32, [1]> var_2093_axes_0 = const()[name = string("op_2093_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2093 = squeeze(axes = var_2093_axes_0, x = logits_3)[name = string("op_2093")];
            tensor<fp32, [1, 1, 2048]> logits_5 = linear(bias = linear_0_bias_0, weight = lm_heads_2_weight_palettized, x = input)[name = string("linear_37")];
            tensor<int32, [1]> var_2098_axes_0 = const()[name = string("op_2098_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2098 = squeeze(axes = var_2098_axes_0, x = logits_5)[name = string("op_2098")];
            tensor<fp32, [1, 1, 2048]> logits_7 = linear(bias = linear_0_bias_0, weight = lm_heads_3_weight_palettized, x = input)[name = string("linear_38")];
            tensor<int32, [1]> var_2103_axes_0 = const()[name = string("op_2103_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2103 = squeeze(axes = var_2103_axes_0, x = logits_7)[name = string("op_2103")];
            tensor<fp32, [1, 1, 2048]> logits_9 = linear(bias = linear_0_bias_0, weight = lm_heads_4_weight_palettized, x = input)[name = string("linear_39")];
            tensor<int32, [1]> var_2108_axes_0 = const()[name = string("op_2108_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2108 = squeeze(axes = var_2108_axes_0, x = logits_9)[name = string("op_2108")];
            tensor<fp32, [1, 1, 2048]> logits_11 = linear(bias = linear_0_bias_0, weight = lm_heads_5_weight_palettized, x = input)[name = string("linear_40")];
            tensor<int32, [1]> var_2113_axes_0 = const()[name = string("op_2113_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2113 = squeeze(axes = var_2113_axes_0, x = logits_11)[name = string("op_2113")];
            tensor<fp32, [1, 1, 2048]> logits_13 = linear(bias = linear_0_bias_0, weight = lm_heads_6_weight_palettized, x = input)[name = string("linear_41")];
            tensor<int32, [1]> var_2118_axes_0 = const()[name = string("op_2118_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2118 = squeeze(axes = var_2118_axes_0, x = logits_13)[name = string("op_2118")];
            tensor<fp32, [1, 1, 2048]> logits_15 = linear(bias = linear_0_bias_0, weight = lm_heads_7_weight_palettized, x = input)[name = string("linear_42")];
            tensor<int32, [1]> var_2123_axes_0 = const()[name = string("op_2123_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2123 = squeeze(axes = var_2123_axes_0, x = logits_15)[name = string("op_2123")];
            tensor<fp32, [1, 1, 2048]> logits_17 = linear(bias = linear_0_bias_0, weight = lm_heads_8_weight_palettized, x = input)[name = string("linear_43")];
            tensor<int32, [1]> var_2128_axes_0 = const()[name = string("op_2128_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2128 = squeeze(axes = var_2128_axes_0, x = logits_17)[name = string("op_2128")];
            tensor<fp32, [1, 1, 2048]> logits_19 = linear(bias = linear_0_bias_0, weight = lm_heads_9_weight_palettized, x = input)[name = string("linear_44")];
            tensor<int32, [1]> var_2133_axes_0 = const()[name = string("op_2133_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2133 = squeeze(axes = var_2133_axes_0, x = logits_19)[name = string("op_2133")];
            tensor<fp32, [1, 1, 2048]> logits_21 = linear(bias = linear_0_bias_0, weight = lm_heads_10_weight_palettized, x = input)[name = string("linear_45")];
            tensor<int32, [1]> var_2138_axes_0 = const()[name = string("op_2138_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2138 = squeeze(axes = var_2138_axes_0, x = logits_21)[name = string("op_2138")];
            tensor<fp32, [1, 1, 2048]> logits_23 = linear(bias = linear_0_bias_0, weight = lm_heads_11_weight_palettized, x = input)[name = string("linear_46")];
            tensor<int32, [1]> var_2143_axes_0 = const()[name = string("op_2143_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2143 = squeeze(axes = var_2143_axes_0, x = logits_23)[name = string("op_2143")];
            tensor<fp32, [1, 1, 2048]> logits_25 = linear(bias = linear_0_bias_0, weight = lm_heads_12_weight_palettized, x = input)[name = string("linear_47")];
            tensor<int32, [1]> var_2148_axes_0 = const()[name = string("op_2148_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2148 = squeeze(axes = var_2148_axes_0, x = logits_25)[name = string("op_2148")];
            tensor<fp32, [1, 1, 2048]> logits_27 = linear(bias = linear_0_bias_0, weight = lm_heads_13_weight_palettized, x = input)[name = string("linear_48")];
            tensor<int32, [1]> var_2153_axes_0 = const()[name = string("op_2153_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2153 = squeeze(axes = var_2153_axes_0, x = logits_27)[name = string("op_2153")];
            tensor<fp32, [1, 1, 2048]> logits = linear(bias = linear_0_bias_0, weight = lm_heads_14_weight_palettized, x = input)[name = string("linear_49")];
            tensor<int32, [1]> var_2158_axes_0 = const()[name = string("op_2158_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2158 = squeeze(axes = var_2158_axes_0, x = logits)[name = string("op_2158")];
            int32 var_2161_axis_0 = const()[name = string("op_2161_axis_0"), val = int32(1)];
            tensor<fp32, [1, 15, 2048]> all_logits_type_fp32 = stack(axis = var_2161_axis_0, values = (var_2088, var_2093, var_2098, var_2103, var_2108, var_2113, var_2118, var_2123, var_2128, var_2133, var_2138, var_2143, var_2148, var_2153, var_2158))[name = string("op_2161")];
            int32 var_2163 = const()[name = string("op_2163"), val = int32(1)];
            bool new_kv_1_interleave_0 = const()[name = string("new_kv_1_interleave_0"), val = bool(false)];
            tensor<fp32, [1, 5120, 1, 1]> new_kv_1 = concat(axis = var_2163, interleave = new_kv_1_interleave_0, values = (nk_flat_1, nk_flat_3, nk_flat_5, nk_flat_7, nk_flat))[name = string("new_kv_1")];
            tensor<fp32, [1, 5120, 1, 16]> var_2172 = mul(x = cast_1, y = var_464)[name = string("op_2172")];
            tensor<fp32, [1, 5120, 1, 16]> var_2173 = mul(x = new_kv_1, y = update_mask_1)[name = string("op_2173")];
            tensor<fp32, [1, 5120, 1, 16]> new_key_cache_type_fp32 = add(x = var_2172, y = var_2173)[name = string("op_2175")];
            int32 var_2177 = const()[name = string("op_2177"), val = int32(1)];
            bool new_kv_interleave_0 = const()[name = string("new_kv_interleave_0"), val = bool(false)];
            tensor<fp32, [1, 5120, 1, 1]> new_kv = concat(axis = var_2177, interleave = new_kv_interleave_0, values = (nv_flat_1, nv_flat_3, nv_flat_5, nv_flat_7, nv_flat))[name = string("new_kv")];
            tensor<fp32, [1, 5120, 1, 16]> var_2186 = mul(x = cast_4, y = var_464)[name = string("op_2186")];
            tensor<fp32, [1, 5120, 1, 16]> var_2187 = mul(x = new_kv, y = update_mask_1)[name = string("op_2187")];
            tensor<fp32, [1, 5120, 1, 16]> new_value_cache_type_fp32 = add(x = var_2186, y = var_2187)[name = string("op_2189")];
            string cast_65_dtype_0 = const()[name = string("cast_65_dtype_0"), val = string("fp16")];
            string cast_66_dtype_0 = const()[name = string("cast_66_dtype_0"), val = string("fp16")];
            string cast_67_dtype_0 = const()[name = string("cast_67_dtype_0"), val = string("fp16")];
            string cast_68_dtype_0 = const()[name = string("cast_68_dtype_0"), val = string("fp16")];
            tensor<fp16, [1, 15, 2048]> all_logits = cast(dtype = cast_65_dtype_0, x = all_logits_type_fp32)[name = string("cast_0")];
            tensor<fp16, [1, 1024, 1, 1]> hidden_states = cast(dtype = cast_66_dtype_0, x = hidden_states_type_fp32)[name = string("cast_1")];
            tensor<fp16, [1, 5120, 1, 16]> new_key_cache = cast(dtype = cast_67_dtype_0, x = new_key_cache_type_fp32)[name = string("cast_2")];
            tensor<fp16, [1, 5120, 1, 16]> new_value_cache = cast(dtype = cast_68_dtype_0, x = new_value_cache_type_fp32)[name = string("cast_3")];
        } -> (all_logits, hidden_states, new_key_cache, new_value_cache);
}