File size: 171,195 Bytes

program(1.0)
[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}})]
{
    func main<ios17>(tensor<int32, [1]> cache_length, tensor<fp16, [1, 1024, 1, 1]> input_embeds, tensor<fp16, [1, 5120, 1, 16]> key_cache, tensor<fp16, [1, 16]> key_padding_mask, tensor<fp16, [1, 16]> kv_cache_update_mask, tensor<fp16, [1, 5120, 1, 16]> value_cache) {
            tensor<string, []> cast_0_dtype_0 = const()[name = tensor<string, []>("cast_0_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> cast_1_dtype_0 = const()[name = tensor<string, []>("cast_1_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> cast_2_dtype_0 = const()[name = tensor<string, []>("cast_2_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> cast_3_dtype_0 = const()[name = tensor<string, []>("cast_3_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> cast_4_dtype_0 = const()[name = tensor<string, []>("cast_4_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<fp32, [2048, 1024]> layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2097280))), name = tensor<string, []>("layers_0_self_attn_q_proj_weight_palettized"), shape = tensor<uint32, [2]>([2048, 1024])];
            tensor<fp32, [1024, 1024]> layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2098368))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3147008))), name = tensor<string, []>("layers_0_self_attn_k_proj_weight_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
            tensor<fp32, [1024, 1024]> layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3148096))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4196736))), name = tensor<string, []>("layers_0_self_attn_v_proj_weight_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
            tensor<fp32, [1024, 2048]> layers_0_self_attn_o_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4197824))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6295040))), name = tensor<string, []>("layers_0_self_attn_o_proj_weight_palettized"), shape = tensor<uint32, [2]>([1024, 2048])];
            tensor<fp32, [3072, 1024]> layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [3145728]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6296128))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9441920))), name = tensor<string, []>("layers_0_mlp_gate_proj_weight_palettized"), shape = tensor<uint32, [2]>([3072, 1024])];
            tensor<fp32, [3072, 1024]> layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [3145728]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9443008))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12588800))), name = tensor<string, []>("layers_0_mlp_up_proj_weight_palettized"), shape = tensor<uint32, [2]>([3072, 1024])];
            tensor<fp32, [1024, 3072]> layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [3145728]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12589888))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15735680))), name = tensor<string, []>("layers_0_mlp_down_proj_weight_palettized"), shape = tensor<uint32, [2]>([1024, 3072])];
            tensor<fp32, [2048, 1024]> layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15736768))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17833984))), name = tensor<string, []>("layers_1_self_attn_q_proj_weight_palettized"), shape = tensor<uint32, [2]>([2048, 1024])];
            tensor<fp32, [1024, 1024]> layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17835072))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18883712))), name = tensor<string, []>("layers_1_self_attn_k_proj_weight_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
            tensor<fp32, [1024, 1024]> layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18884800))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(19933440))), name = tensor<string, []>("layers_1_self_attn_v_proj_weight_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
            tensor<fp32, [1024, 2048]> layers_1_self_attn_o_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(19934528))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22031744))), name = tensor<string, []>("layers_1_self_attn_o_proj_weight_palettized"), shape = tensor<uint32, [2]>([1024, 2048])];
            tensor<fp32, [3072, 1024]> layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [3145728]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22032832))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25178624))), name = tensor<string, []>("layers_1_mlp_gate_proj_weight_palettized"), shape = tensor<uint32, [2]>([3072, 1024])];
            tensor<fp32, [3072, 1024]> layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [3145728]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25179712))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28325504))), name = tensor<string, []>("layers_1_mlp_up_proj_weight_palettized"), shape = tensor<uint32, [2]>([3072, 1024])];
            tensor<fp32, [1024, 3072]> layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [3145728]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28326592))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31472384))), name = tensor<string, []>("layers_1_mlp_down_proj_weight_palettized"), shape = tensor<uint32, [2]>([1024, 3072])];
            tensor<fp32, [2048, 1024]> layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31473472))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33570688))), name = tensor<string, []>("layers_2_self_attn_q_proj_weight_palettized"), shape = tensor<uint32, [2]>([2048, 1024])];
            tensor<fp32, [1024, 1024]> layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33571776))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34620416))), name = tensor<string, []>("layers_2_self_attn_k_proj_weight_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
            tensor<fp32, [1024, 1024]> layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34621504))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35670144))), name = tensor<string, []>("layers_2_self_attn_v_proj_weight_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
            tensor<fp32, [1024, 2048]> layers_2_self_attn_o_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35671232))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37768448))), name = tensor<string, []>("layers_2_self_attn_o_proj_weight_palettized"), shape = tensor<uint32, [2]>([1024, 2048])];
            tensor<fp32, [3072, 1024]> layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [3145728]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37769536))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40915328))), name = tensor<string, []>("layers_2_mlp_gate_proj_weight_palettized"), shape = tensor<uint32, [2]>([3072, 1024])];
            tensor<fp32, [3072, 1024]> layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [3145728]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40916416))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44062208))), name = tensor<string, []>("layers_2_mlp_up_proj_weight_palettized"), shape = tensor<uint32, [2]>([3072, 1024])];
            tensor<fp32, [1024, 3072]> layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [3145728]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44063296))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47209088))), name = tensor<string, []>("layers_2_mlp_down_proj_weight_palettized"), shape = tensor<uint32, [2]>([1024, 3072])];
            tensor<fp32, [2048, 1024]> layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47210176))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49307392))), name = tensor<string, []>("layers_3_self_attn_q_proj_weight_palettized"), shape = tensor<uint32, [2]>([2048, 1024])];
            tensor<fp32, [1024, 1024]> layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49308480))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50357120))), name = tensor<string, []>("layers_3_self_attn_k_proj_weight_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
            tensor<fp32, [1024, 1024]> layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50358208))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51406848))), name = tensor<string, []>("layers_3_self_attn_v_proj_weight_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
            tensor<fp32, [1024, 2048]> layers_3_self_attn_o_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51407936))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53505152))), name = tensor<string, []>("layers_3_self_attn_o_proj_weight_palettized"), shape = tensor<uint32, [2]>([1024, 2048])];
            tensor<fp32, [3072, 1024]> layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [3145728]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53506240))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56652032))), name = tensor<string, []>("layers_3_mlp_gate_proj_weight_palettized"), shape = tensor<uint32, [2]>([3072, 1024])];
            tensor<fp32, [3072, 1024]> layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [3145728]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56653120))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(59798912))), name = tensor<string, []>("layers_3_mlp_up_proj_weight_palettized"), shape = tensor<uint32, [2]>([3072, 1024])];
            tensor<fp32, [1024, 3072]> layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [3145728]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(59800000))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62945792))), name = tensor<string, []>("layers_3_mlp_down_proj_weight_palettized"), shape = tensor<uint32, [2]>([1024, 3072])];
            tensor<fp32, [2048, 1024]> layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62946880))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65044096))), name = tensor<string, []>("layers_4_self_attn_q_proj_weight_palettized"), shape = tensor<uint32, [2]>([2048, 1024])];
            tensor<fp32, [1024, 1024]> layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65045184))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66093824))), name = tensor<string, []>("layers_4_self_attn_k_proj_weight_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
            tensor<fp32, [1024, 1024]> layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66094912))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67143552))), name = tensor<string, []>("layers_4_self_attn_v_proj_weight_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
            tensor<fp32, [1024, 2048]> layers_4_self_attn_o_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67144640))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69241856))), name = tensor<string, []>("layers_4_self_attn_o_proj_weight_palettized"), shape = tensor<uint32, [2]>([1024, 2048])];
            tensor<fp32, [3072, 1024]> layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [3145728]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69242944))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(72388736))), name = tensor<string, []>("layers_4_mlp_gate_proj_weight_palettized"), shape = tensor<uint32, [2]>([3072, 1024])];
            tensor<fp32, [3072, 1024]> layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [3145728]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(72389824))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(75535616))), name = tensor<string, []>("layers_4_mlp_up_proj_weight_palettized"), shape = tensor<uint32, [2]>([3072, 1024])];
            tensor<fp32, [1024, 3072]> layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [3145728]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(75536704))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78682496))), name = tensor<string, []>("layers_4_mlp_down_proj_weight_palettized"), shape = tensor<uint32, [2]>([1024, 3072])];
            tensor<fp32, [2048, 1024]> lm_heads_0_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78683584))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80780800))), name = tensor<string, []>("lm_heads_0_weight_palettized"), shape = tensor<uint32, [2]>([2048, 1024])];
            tensor<fp32, [2048, 1024]> lm_heads_1_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80781888))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82879104))), name = tensor<string, []>("lm_heads_1_weight_palettized"), shape = tensor<uint32, [2]>([2048, 1024])];
            tensor<fp32, [2048, 1024]> lm_heads_2_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82880192))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84977408))), name = tensor<string, []>("lm_heads_2_weight_palettized"), shape = tensor<uint32, [2]>([2048, 1024])];
            tensor<fp32, [2048, 1024]> lm_heads_3_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84978496))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87075712))), name = tensor<string, []>("lm_heads_3_weight_palettized"), shape = tensor<uint32, [2]>([2048, 1024])];
            tensor<fp32, [2048, 1024]> lm_heads_4_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87076800))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89174016))), name = tensor<string, []>("lm_heads_4_weight_palettized"), shape = tensor<uint32, [2]>([2048, 1024])];
            tensor<fp32, [2048, 1024]> lm_heads_5_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89175104))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91272320))), name = tensor<string, []>("lm_heads_5_weight_palettized"), shape = tensor<uint32, [2]>([2048, 1024])];
            tensor<fp32, [2048, 1024]> lm_heads_6_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91273408))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93370624))), name = tensor<string, []>("lm_heads_6_weight_palettized"), shape = tensor<uint32, [2]>([2048, 1024])];
            tensor<fp32, [2048, 1024]> lm_heads_7_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93371712))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(95468928))), name = tensor<string, []>("lm_heads_7_weight_palettized"), shape = tensor<uint32, [2]>([2048, 1024])];
            tensor<fp32, [2048, 1024]> lm_heads_8_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(95470016))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(97567232))), name = tensor<string, []>("lm_heads_8_weight_palettized"), shape = tensor<uint32, [2]>([2048, 1024])];
            tensor<fp32, [2048, 1024]> lm_heads_9_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(97568320))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99665536))), name = tensor<string, []>("lm_heads_9_weight_palettized"), shape = tensor<uint32, [2]>([2048, 1024])];
            tensor<fp32, [2048, 1024]> lm_heads_10_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99666624))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101763840))), name = tensor<string, []>("lm_heads_10_weight_palettized"), shape = tensor<uint32, [2]>([2048, 1024])];
            tensor<fp32, [2048, 1024]> lm_heads_11_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101764928))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103862144))), name = tensor<string, []>("lm_heads_11_weight_palettized"), shape = tensor<uint32, [2]>([2048, 1024])];
            tensor<fp32, [2048, 1024]> lm_heads_12_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103863232))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105960448))), name = tensor<string, []>("lm_heads_12_weight_palettized"), shape = tensor<uint32, [2]>([2048, 1024])];
            tensor<fp32, [2048, 1024]> lm_heads_13_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105961536))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(108058752))), name = tensor<string, []>("lm_heads_13_weight_palettized"), shape = tensor<uint32, [2]>([2048, 1024])];
            tensor<fp32, [2048, 1024]> lm_heads_14_weight_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(108059840))), lut = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110157056))), name = tensor<string, []>("lm_heads_14_weight_palettized"), shape = tensor<uint32, [2]>([2048, 1024])];
            tensor<int32, [1]> var_205_axes_0 = const()[name = tensor<string, []>("op_205_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1, 1]> var_205 = expand_dims(axes = var_205_axes_0, x = cache_length)[name = tensor<string, []>("op_205")];
            tensor<string, []> pos_dtype_0 = const()[name = tensor<string, []>("pos_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<fp32, [1, 64, 1]> const_0 = const()[name = tensor<string, []>("const_0"), val = tensor<fp32, [1, 64, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110158144)))];
            tensor<int32, [1]> var_226_axes_0 = const()[name = tensor<string, []>("op_226_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1]> pos = cast(dtype = pos_dtype_0, x = var_205)[name = tensor<string, []>("cast_9")];
            tensor<fp32, [1, 1, 1]> var_226 = expand_dims(axes = var_226_axes_0, x = pos)[name = tensor<string, []>("op_226")];
            tensor<bool, []> var_227_transpose_x_0 = const()[name = tensor<string, []>("op_227_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_227_transpose_y_0 = const()[name = tensor<string, []>("op_227_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 64, 1]> var_227 = matmul(transpose_x = var_227_transpose_x_0, transpose_y = var_227_transpose_y_0, x = const_0, y = var_226)[name = tensor<string, []>("op_227")];
            tensor<int32, [3]> freqs_perm_0 = const()[name = tensor<string, []>("freqs_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, []> var_232 = const()[name = tensor<string, []>("op_232"), val = tensor<int32, []>(-1)];
            tensor<bool, []> emb_interleave_0 = const()[name = tensor<string, []>("emb_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 1, 64]> freqs = transpose(perm = freqs_perm_0, x = var_227)[name = tensor<string, []>("transpose_10")];
            tensor<fp32, [1, 1, 128]> emb = concat(axis = var_232, interleave = emb_interleave_0, values = (freqs, freqs))[name = tensor<string, []>("emb")];
            tensor<fp32, [1, 1, 128]> var_234 = cos(x = emb)[name = tensor<string, []>("op_234")];
            tensor<fp32, [1, 1, 128]> var_242 = sin(x = emb)[name = tensor<string, []>("op_242")];
            tensor<int32, [1]> var_251_axes_0 = const()[name = tensor<string, []>("op_251_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 16]> cast_3 = cast(dtype = cast_3_dtype_0, x = kv_cache_update_mask)[name = tensor<string, []>("cast_8")];
            tensor<fp32, [1, 1, 16]> var_251 = expand_dims(axes = var_251_axes_0, x = cast_3)[name = tensor<string, []>("op_251")];
            tensor<int32, [1]> update_mask_axes_0 = const()[name = tensor<string, []>("update_mask_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 1, 1, 16]> update_mask = expand_dims(axes = update_mask_axes_0, x = var_251)[name = tensor<string, []>("update_mask")];
            tensor<int32, [4]> var_263_begin_0 = const()[name = tensor<string, []>("op_263_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> var_263_end_0 = const()[name = tensor<string, []>("op_263_end_0"), val = tensor<int32, [4]>([1, 1024, 1, 16])];
            tensor<bool, [4]> var_263_end_mask_0 = const()[name = tensor<string, []>("op_263_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp32, [1, 5120, 1, 16]> cast_1 = cast(dtype = cast_1_dtype_0, x = key_cache)[name = tensor<string, []>("cast_7")];
            tensor<fp32, [1, 1024, 1, 16]> var_263 = slice_by_index(begin = var_263_begin_0, end = var_263_end_0, end_mask = var_263_end_mask_0, x = cast_1)[name = tensor<string, []>("op_263")];
            tensor<int32, [4]> var_283_begin_0 = const()[name = tensor<string, []>("op_283_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> var_283_end_0 = const()[name = tensor<string, []>("op_283_end_0"), val = tensor<int32, [4]>([1, 1024, 1, 16])];
            tensor<bool, [4]> var_283_end_mask_0 = const()[name = tensor<string, []>("op_283_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp32, [1, 5120, 1, 16]> cast_4 = cast(dtype = cast_4_dtype_0, x = value_cache)[name = tensor<string, []>("cast_6")];
            tensor<fp32, [1, 1024, 1, 16]> var_283 = slice_by_index(begin = var_283_begin_0, end = var_283_end_0, end_mask = var_283_end_mask_0, x = cast_4)[name = tensor<string, []>("op_283")];
            tensor<int32, [1]> var_295_axes_0 = const()[name = tensor<string, []>("op_295_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024, 1, 1]> cast_0 = cast(dtype = cast_0_dtype_0, x = input_embeds)[name = tensor<string, []>("cast_5")];
            tensor<fp32, [1, 1024, 1]> var_295 = squeeze(axes = var_295_axes_0, x = cast_0)[name = tensor<string, []>("op_295")];
            tensor<int32, [1]> var_297_axes_0 = const()[name = tensor<string, []>("op_297_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024]> var_297 = squeeze(axes = var_297_axes_0, x = var_295)[name = tensor<string, []>("op_297")];
            tensor<int32, [1]> hidden_states_1_axes_0 = const()[name = tensor<string, []>("hidden_states_1_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1, 1024]> hidden_states_1 = expand_dims(axes = hidden_states_1_axes_0, x = var_297)[name = tensor<string, []>("hidden_states_1")];
            tensor<fp32, []> var_303_promoted = const()[name = tensor<string, []>("op_303_promoted"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, 1, 1024]> var_309 = pow(x = hidden_states_1, y = var_303_promoted)[name = tensor<string, []>("op_309")];
            tensor<int32, [1]> variance_1_axes_0 = const()[name = tensor<string, []>("variance_1_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_1_keep_dims_0 = const()[name = tensor<string, []>("variance_1_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, 1, 1]> variance_1 = reduce_mean(axes = variance_1_axes_0, keep_dims = variance_1_keep_dims_0, x = var_309)[name = tensor<string, []>("variance_1")];
            tensor<fp32, [1024]> const_1 = const()[name = tensor<string, []>("const_1"), val = tensor<fp32, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110158464)))];
            tensor<fp32, [1, 1, 1024]> var_313 = mul(x = const_1, y = hidden_states_1)[name = tensor<string, []>("op_313")];
            tensor<fp32, []> var_314 = const()[name = tensor<string, []>("op_314"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 1]> var_315 = add(x = variance_1, y = var_314)[name = tensor<string, []>("op_315")];
            tensor<fp32, []> var_316_epsilon_0 = const()[name = tensor<string, []>("op_316_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, 1, 1]> var_316 = rsqrt(epsilon = var_316_epsilon_0, x = var_315)[name = tensor<string, []>("op_316")];
            tensor<fp32, [1, 1, 1024]> input_1 = mul(x = var_313, y = var_316)[name = tensor<string, []>("input_1")];
            tensor<fp32, [2048]> linear_0_bias_0 = const()[name = tensor<string, []>("linear_0_bias_0"), val = tensor<fp32, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110162624)))];
            tensor<fp32, [1, 1, 2048]> var_320 = linear(bias = linear_0_bias_0, weight = layers_0_self_attn_q_proj_weight_palettized, x = input_1)[name = tensor<string, []>("linear_0")];
            tensor<int32, [4]> var_325 = const()[name = tensor<string, []>("op_325"), val = tensor<int32, [4]>([1, 1, 16, 128])];
            tensor<fp32, [1, 1, 16, 128]> var_326 = reshape(shape = var_325, x = var_320)[name = tensor<string, []>("op_326")];
            tensor<fp32, [1024]> linear_1_bias_0 = const()[name = tensor<string, []>("linear_1_bias_0"), val = tensor<fp32, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110170880)))];
            tensor<fp32, [1, 1, 1024]> var_332 = linear(bias = linear_1_bias_0, weight = layers_0_self_attn_k_proj_weight_palettized, x = input_1)[name = tensor<string, []>("linear_1")];
            tensor<int32, [4]> var_337 = const()[name = tensor<string, []>("op_337"), val = tensor<int32, [4]>([1, 1, 8, 128])];
            tensor<fp32, [1, 1, 8, 128]> var_338 = reshape(shape = var_337, x = var_332)[name = tensor<string, []>("op_338")];
            tensor<fp32, [1, 1, 1024]> var_344 = linear(bias = linear_1_bias_0, weight = layers_0_self_attn_v_proj_weight_palettized, x = input_1)[name = tensor<string, []>("linear_2")];
            tensor<fp32, []> var_357_promoted = const()[name = tensor<string, []>("op_357_promoted"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, 1, 16, 128]> var_363 = pow(x = var_326, y = var_357_promoted)[name = tensor<string, []>("op_363")];
            tensor<bool, []> variance_3_keep_dims_0 = const()[name = tensor<string, []>("variance_3_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<int32, [1]> const_52 = const()[name = tensor<string, []>("const_52"), val = tensor<int32, [1]>([3])];
            tensor<fp32, [1, 1, 16, 1]> variance_3 = reduce_mean(axes = const_52, keep_dims = variance_3_keep_dims_0, x = var_363)[name = tensor<string, []>("variance_3")];
            tensor<fp32, [1, 1, 1, 128]> const_53 = const()[name = tensor<string, []>("const_53"), val = tensor<fp32, [1, 1, 1, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110175040)))];
            tensor<fp32, [1, 1, 16, 128]> var_367 = mul(x = const_53, y = var_326)[name = tensor<string, []>("op_367")];
            tensor<fp32, []> var_368 = const()[name = tensor<string, []>("op_368"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 16, 1]> var_369 = add(x = variance_3, y = var_368)[name = tensor<string, []>("op_369")];
            tensor<fp32, []> var_370_epsilon_0 = const()[name = tensor<string, []>("op_370_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, 1, 16, 1]> var_370 = rsqrt(epsilon = var_370_epsilon_0, x = var_369)[name = tensor<string, []>("op_370")];
            tensor<fp32, [1, 1, 16, 128]> q_1 = mul(x = var_367, y = var_370)[name = tensor<string, []>("q_1")];
            tensor<fp32, []> var_375_promoted = const()[name = tensor<string, []>("op_375_promoted"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, 1, 8, 128]> var_381 = pow(x = var_338, y = var_375_promoted)[name = tensor<string, []>("op_381")];
            tensor<bool, []> variance_5_keep_dims_0 = const()[name = tensor<string, []>("variance_5_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<int32, [1]> const_54 = const()[name = tensor<string, []>("const_54"), val = tensor<int32, [1]>([3])];
            tensor<fp32, [1, 1, 8, 1]> variance_5 = reduce_mean(axes = const_54, keep_dims = variance_5_keep_dims_0, x = var_381)[name = tensor<string, []>("variance_5")];
            tensor<fp32, [1, 1, 1, 128]> const_55 = const()[name = tensor<string, []>("const_55"), val = tensor<fp32, [1, 1, 1, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110175616)))];
            tensor<fp32, [1, 1, 8, 128]> var_385 = mul(x = const_55, y = var_338)[name = tensor<string, []>("op_385")];
            tensor<fp32, []> var_386 = const()[name = tensor<string, []>("op_386"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 8, 1]> var_387 = add(x = variance_5, y = var_386)[name = tensor<string, []>("op_387")];
            tensor<fp32, []> var_388_epsilon_0 = const()[name = tensor<string, []>("op_388_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, 1, 8, 1]> var_388 = rsqrt(epsilon = var_388_epsilon_0, x = var_387)[name = tensor<string, []>("op_388")];
            tensor<fp32, [1, 1, 8, 128]> k_1 = mul(x = var_385, y = var_388)[name = tensor<string, []>("k_1")];
            tensor<int32, [1]> cos_r_1_axes_0 = const()[name = tensor<string, []>("cos_r_1_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 1, 1, 128]> cos_r_1 = expand_dims(axes = cos_r_1_axes_0, x = var_234)[name = tensor<string, []>("cos_r_1")];
            tensor<int32, [1]> sin_r_1_axes_0 = const()[name = tensor<string, []>("sin_r_1_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 1, 1, 128]> sin_r_1 = expand_dims(axes = sin_r_1_axes_0, x = var_242)[name = tensor<string, []>("sin_r_1")];
            tensor<fp32, [1, 1, 16, 128]> var_403 = mul(x = q_1, y = cos_r_1)[name = tensor<string, []>("op_403")];
            tensor<int32, [4]> x1_1_begin_0 = const()[name = tensor<string, []>("x1_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_1_end_0 = const()[name = tensor<string, []>("x1_1_end_0"), val = tensor<int32, [4]>([1, 1, 16, 64])];
            tensor<bool, [4]> x1_1_end_mask_0 = const()[name = tensor<string, []>("x1_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp32, [1, 1, 16, 64]> x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = q_1)[name = tensor<string, []>("x1_1")];
            tensor<int32, [4]> x2_1_begin_0 = const()[name = tensor<string, []>("x2_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_1_end_0 = const()[name = tensor<string, []>("x2_1_end_0"), val = tensor<int32, [4]>([1, 1, 16, 128])];
            tensor<bool, [4]> x2_1_end_mask_0 = const()[name = tensor<string, []>("x2_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp32, [1, 1, 16, 64]> x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = q_1)[name = tensor<string, []>("x2_1")];
            tensor<fp32, []> const_6_promoted = const()[name = tensor<string, []>("const_6_promoted"), val = tensor<fp32, []>(-0x1p+0)];
            tensor<fp32, [1, 1, 16, 64]> var_424 = mul(x = x2_1, y = const_6_promoted)[name = tensor<string, []>("op_424")];
            tensor<int32, []> var_426 = const()[name = tensor<string, []>("op_426"), val = tensor<int32, []>(-1)];
            tensor<bool, []> var_427_interleave_0 = const()[name = tensor<string, []>("op_427_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 1, 16, 128]> var_427 = concat(axis = var_426, interleave = var_427_interleave_0, values = (var_424, x1_1))[name = tensor<string, []>("op_427")];
            tensor<fp32, [1, 1, 16, 128]> var_428 = mul(x = var_427, y = sin_r_1)[name = tensor<string, []>("op_428")];
            tensor<fp32, [1, 1, 16, 128]> q_5 = add(x = var_403, y = var_428)[name = tensor<string, []>("q_5")];
            tensor<fp32, [1, 1, 8, 128]> var_431 = mul(x = k_1, y = cos_r_1)[name = tensor<string, []>("op_431")];
            tensor<int32, [4]> x1_3_begin_0 = const()[name = tensor<string, []>("x1_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_3_end_0 = const()[name = tensor<string, []>("x1_3_end_0"), val = tensor<int32, [4]>([1, 1, 8, 64])];
            tensor<bool, [4]> x1_3_end_mask_0 = const()[name = tensor<string, []>("x1_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp32, [1, 1, 8, 64]> x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = k_1)[name = tensor<string, []>("x1_3")];
            tensor<int32, [4]> x2_3_begin_0 = const()[name = tensor<string, []>("x2_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_3_end_0 = const()[name = tensor<string, []>("x2_3_end_0"), val = tensor<int32, [4]>([1, 1, 8, 128])];
            tensor<bool, [4]> x2_3_end_mask_0 = const()[name = tensor<string, []>("x2_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp32, [1, 1, 8, 64]> x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = k_1)[name = tensor<string, []>("x2_3")];
            tensor<fp32, []> const_9_promoted = const()[name = tensor<string, []>("const_9_promoted"), val = tensor<fp32, []>(-0x1p+0)];
            tensor<fp32, [1, 1, 8, 64]> var_452 = mul(x = x2_3, y = const_9_promoted)[name = tensor<string, []>("op_452")];
            tensor<int32, []> var_454 = const()[name = tensor<string, []>("op_454"), val = tensor<int32, []>(-1)];
            tensor<bool, []> var_455_interleave_0 = const()[name = tensor<string, []>("op_455_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 1, 8, 128]> var_455 = concat(axis = var_454, interleave = var_455_interleave_0, values = (var_452, x1_3))[name = tensor<string, []>("op_455")];
            tensor<fp32, [1, 1, 8, 128]> var_456 = mul(x = var_455, y = sin_r_1)[name = tensor<string, []>("op_456")];
            tensor<fp32, [1, 1, 8, 128]> k_5 = add(x = var_431, y = var_456)[name = tensor<string, []>("k_5")];
            tensor<int32, [4]> var_463 = const()[name = tensor<string, []>("op_463"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
            tensor<fp32, [1, 1024, 1, 1]> nk_1 = reshape(shape = var_463, x = k_5)[name = tensor<string, []>("nk_1")];
            tensor<int32, [4]> var_469 = const()[name = tensor<string, []>("op_469"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
            tensor<fp32, [1, 1024, 1, 1]> nv_1 = reshape(shape = var_469, x = var_344)[name = tensor<string, []>("nv_1")];
            tensor<fp32, []> var_471 = const()[name = tensor<string, []>("op_471"), val = tensor<fp32, []>(0x1p+0)];
            tensor<fp32, [1, 1, 1, 16]> var_473 = sub(x = var_471, y = update_mask)[name = tensor<string, []>("op_473")];
            tensor<fp32, [1, 1024, 1, 16]> var_474 = mul(x = var_263, y = var_473)[name = tensor<string, []>("op_474")];
            tensor<fp32, [1, 1024, 1, 16]> var_475 = mul(x = nk_1, y = update_mask)[name = tensor<string, []>("op_475")];
            tensor<fp32, [1, 1024, 1, 16]> lkc_3 = add(x = var_474, y = var_475)[name = tensor<string, []>("lkc_3")];
            tensor<fp32, [1, 1024, 1, 16]> var_481 = mul(x = var_283, y = var_473)[name = tensor<string, []>("op_481")];
            tensor<fp32, [1, 1024, 1, 16]> var_482 = mul(x = nv_1, y = update_mask)[name = tensor<string, []>("op_482")];
            tensor<fp32, [1, 1024, 1, 16]> lvc_3 = add(x = var_481, y = var_482)[name = tensor<string, []>("lvc_3")];
            tensor<int32, [1]> var_486_axes_0 = const()[name = tensor<string, []>("op_486_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 1024, 16]> var_486 = squeeze(axes = var_486_axes_0, x = lkc_3)[name = tensor<string, []>("op_486")];
            tensor<int32, [4]> var_491 = const()[name = tensor<string, []>("op_491"), val = tensor<int32, [4]>([1, 8, 128, 16])];
            tensor<fp32, [1, 8, 128, 16]> kc_1 = reshape(shape = var_491, x = var_486)[name = tensor<string, []>("kc_1")];
            tensor<int32, [1]> var_494_axes_0 = const()[name = tensor<string, []>("op_494_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 1024, 16]> var_494 = squeeze(axes = var_494_axes_0, x = lvc_3)[name = tensor<string, []>("op_494")];
            tensor<int32, [4]> var_499 = const()[name = tensor<string, []>("op_499"), val = tensor<int32, [4]>([1, 8, 128, 16])];
            tensor<fp32, [1, 8, 128, 16]> vc_1 = reshape(shape = var_499, x = var_494)[name = tensor<string, []>("vc_1")];
            tensor<int32, [1]> var_502_axes_0 = const()[name = tensor<string, []>("op_502_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 8, 1, 128, 16]> var_502 = expand_dims(axes = var_502_axes_0, x = kc_1)[name = tensor<string, []>("op_502")];
            tensor<int32, [5]> var_510_reps_0 = const()[name = tensor<string, []>("op_510_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
            tensor<fp32, [1, 8, 2, 128, 16]> var_510 = tile(reps = var_510_reps_0, x = var_502)[name = tensor<string, []>("op_510")];
            tensor<int32, [4]> var_515 = const()[name = tensor<string, []>("op_515"), val = tensor<int32, [4]>([1, 16, 128, 16])];
            tensor<fp32, [1, 16, 128, 16]> kc_3 = reshape(shape = var_515, x = var_510)[name = tensor<string, []>("kc_3")];
            tensor<int32, [1]> var_518_axes_0 = const()[name = tensor<string, []>("op_518_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 8, 1, 128, 16]> var_518 = expand_dims(axes = var_518_axes_0, x = vc_1)[name = tensor<string, []>("op_518")];
            tensor<int32, [5]> var_526_reps_0 = const()[name = tensor<string, []>("op_526_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
            tensor<fp32, [1, 8, 2, 128, 16]> var_526 = tile(reps = var_526_reps_0, x = var_518)[name = tensor<string, []>("op_526")];
            tensor<int32, [4]> var_531 = const()[name = tensor<string, []>("op_531"), val = tensor<int32, [4]>([1, 16, 128, 16])];
            tensor<fp32, [1, 16, 128, 16]> vc_3 = reshape(shape = var_531, x = var_526)[name = tensor<string, []>("vc_3")];
            tensor<int32, [4]> var_535_perm_0 = const()[name = tensor<string, []>("op_535_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_536_transpose_x_0 = const()[name = tensor<string, []>("op_536_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_536_transpose_y_0 = const()[name = tensor<string, []>("op_536_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 16, 1, 128]> var_535 = transpose(perm = var_535_perm_0, x = q_5)[name = tensor<string, []>("transpose_9")];
            tensor<fp32, [1, 16, 1, 16]> var_536 = matmul(transpose_x = var_536_transpose_x_0, transpose_y = var_536_transpose_y_0, x = var_535, y = kc_3)[name = tensor<string, []>("op_536")];
            tensor<fp32, []> _inversed_aw_1_y_0 = const()[name = tensor<string, []>("_inversed_aw_1_y_0"), val = tensor<fp32, []>(0x1.6a09e6p-4)];
            tensor<fp32, [1, 16, 1, 16]> _inversed_aw_1 = mul(x = var_536, y = _inversed_aw_1_y_0)[name = tensor<string, []>("_inversed_aw_1")];
            tensor<int32, [1]> var_540_axes_0 = const()[name = tensor<string, []>("op_540_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 16]> cast_2 = cast(dtype = cast_2_dtype_0, x = key_padding_mask)[name = tensor<string, []>("cast_4")];
            tensor<fp32, [1, 1, 16]> var_540 = expand_dims(axes = var_540_axes_0, x = cast_2)[name = tensor<string, []>("op_540")];
            tensor<int32, [1]> var_542_axes_0 = const()[name = tensor<string, []>("op_542_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 1, 1, 16]> var_542 = expand_dims(axes = var_542_axes_0, x = var_540)[name = tensor<string, []>("op_542")];
            tensor<fp32, [1, 16, 1, 16]> aw_3 = add(x = _inversed_aw_1, y = var_542)[name = tensor<string, []>("aw_3")];
            tensor<int32, []> var_550 = const()[name = tensor<string, []>("op_550"), val = tensor<int32, []>(-1)];
            tensor<fp32, [1, 16, 1, 16]> aw_7 = softmax(axis = var_550, x = aw_3)[name = tensor<string, []>("aw_7")];
            tensor<bool, []> var_556_transpose_x_1 = const()[name = tensor<string, []>("op_556_transpose_x_1"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_556_transpose_y_1 = const()[name = tensor<string, []>("op_556_transpose_y_1"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, 16, 1, 128]> var_556 = matmul(transpose_x = var_556_transpose_x_1, transpose_y = var_556_transpose_y_1, x = aw_7, y = vc_3)[name = tensor<string, []>("op_556")];
            tensor<int32, [4]> var_559_perm_0 = const()[name = tensor<string, []>("op_559_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_563 = const()[name = tensor<string, []>("op_563"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp32, [1, 1, 16, 128]> var_559 = transpose(perm = var_559_perm_0, x = var_556)[name = tensor<string, []>("transpose_8")];
            tensor<fp32, [1, 1, 2048]> input_3 = reshape(shape = var_563, x = var_559)[name = tensor<string, []>("input_3")];
            tensor<fp32, [1, 1, 1024]> var_567 = linear(bias = linear_1_bias_0, weight = layers_0_self_attn_o_proj_weight_palettized, x = input_3)[name = tensor<string, []>("linear_3")];
            tensor<int32, [1]> var_569_axes_0 = const()[name = tensor<string, []>("op_569_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1024]> var_569 = squeeze(axes = var_569_axes_0, x = var_567)[name = tensor<string, []>("op_569")];
            tensor<int32, [1]> var_571_axes_0 = const()[name = tensor<string, []>("op_571_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1024]> var_571 = squeeze(axes = var_571_axes_0, x = var_569)[name = tensor<string, []>("op_571")];
            tensor<int32, [1]> var_573_axes_0 = const()[name = tensor<string, []>("op_573_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1]> var_573 = expand_dims(axes = var_573_axes_0, x = var_571)[name = tensor<string, []>("op_573")];
            tensor<int32, [1]> ao_1_axes_0 = const()[name = tensor<string, []>("ao_1_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1, 1]> ao_1 = expand_dims(axes = ao_1_axes_0, x = var_573)[name = tensor<string, []>("ao_1")];
            tensor<fp32, [1, 1024, 1, 1]> hidden_1 = add(x = cast_0, y = ao_1)[name = tensor<string, []>("hidden_1")];
            tensor<int32, [1]> var_579_axes_0 = const()[name = tensor<string, []>("op_579_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024, 1]> var_579 = squeeze(axes = var_579_axes_0, x = hidden_1)[name = tensor<string, []>("op_579")];
            tensor<int32, [1]> var_581_axes_0 = const()[name = tensor<string, []>("op_581_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024]> var_581 = squeeze(axes = var_581_axes_0, x = var_579)[name = tensor<string, []>("op_581")];
            tensor<int32, [1]> hidden_states_13_axes_0 = const()[name = tensor<string, []>("hidden_states_13_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1, 1024]> hidden_states_13 = expand_dims(axes = hidden_states_13_axes_0, x = var_581)[name = tensor<string, []>("hidden_states_13")];
            tensor<fp32, []> var_587_promoted = const()[name = tensor<string, []>("op_587_promoted"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, 1, 1024]> var_593 = pow(x = hidden_states_13, y = var_587_promoted)[name = tensor<string, []>("op_593")];
            tensor<int32, [1]> variance_7_axes_0 = const()[name = tensor<string, []>("variance_7_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_7_keep_dims_0 = const()[name = tensor<string, []>("variance_7_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, 1, 1]> variance_7 = reduce_mean(axes = variance_7_axes_0, keep_dims = variance_7_keep_dims_0, x = var_593)[name = tensor<string, []>("variance_7")];
            tensor<fp32, [1024]> const_10 = const()[name = tensor<string, []>("const_10"), val = tensor<fp32, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110176192)))];
            tensor<fp32, [1, 1, 1024]> var_597 = mul(x = const_10, y = hidden_states_13)[name = tensor<string, []>("op_597")];
            tensor<fp32, []> var_598 = const()[name = tensor<string, []>("op_598"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 1]> var_599 = add(x = variance_7, y = var_598)[name = tensor<string, []>("op_599")];
            tensor<fp32, []> var_600_epsilon_0 = const()[name = tensor<string, []>("op_600_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, 1, 1]> var_600 = rsqrt(epsilon = var_600_epsilon_0, x = var_599)[name = tensor<string, []>("op_600")];
            tensor<fp32, [1, 1, 1024]> input_5 = mul(x = var_597, y = var_600)[name = tensor<string, []>("input_5")];
            tensor<fp32, [3072]> linear_4_bias_0 = const()[name = tensor<string, []>("linear_4_bias_0"), val = tensor<fp32, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110180352)))];
            tensor<fp32, [1, 1, 3072]> input_7 = linear(bias = linear_4_bias_0, weight = layers_0_mlp_gate_proj_weight_palettized, x = input_5)[name = tensor<string, []>("linear_4")];
            tensor<fp32, [1, 1, 3072]> var_608 = silu(x = input_7)[name = tensor<string, []>("op_608")];
            tensor<fp32, [1, 1, 3072]> var_610 = linear(bias = linear_4_bias_0, weight = layers_0_mlp_up_proj_weight_palettized, x = input_5)[name = tensor<string, []>("linear_5")];
            tensor<fp32, [1, 1, 3072]> input_9 = mul(x = var_608, y = var_610)[name = tensor<string, []>("input_9")];
            tensor<fp32, [1, 1, 1024]> var_613 = linear(bias = linear_1_bias_0, weight = layers_0_mlp_down_proj_weight_palettized, x = input_9)[name = tensor<string, []>("linear_6")];
            tensor<int32, [1]> var_615_axes_0 = const()[name = tensor<string, []>("op_615_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1024]> var_615 = squeeze(axes = var_615_axes_0, x = var_613)[name = tensor<string, []>("op_615")];
            tensor<int32, [1]> var_617_axes_0 = const()[name = tensor<string, []>("op_617_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1024]> var_617 = squeeze(axes = var_617_axes_0, x = var_615)[name = tensor<string, []>("op_617")];
            tensor<int32, [1]> var_619_axes_0 = const()[name = tensor<string, []>("op_619_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1]> var_619 = expand_dims(axes = var_619_axes_0, x = var_617)[name = tensor<string, []>("op_619")];
            tensor<int32, [1]> h_1_axes_0 = const()[name = tensor<string, []>("h_1_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1, 1]> h_1 = expand_dims(axes = h_1_axes_0, x = var_619)[name = tensor<string, []>("h_1")];
            tensor<fp32, [1, 1024, 1, 1]> hidden_3 = add(x = hidden_1, y = h_1)[name = tensor<string, []>("hidden_3")];
            tensor<int32, [4]> var_633_begin_0 = const()[name = tensor<string, []>("op_633_begin_0"), val = tensor<int32, [4]>([0, 1024, 0, 0])];
            tensor<int32, [4]> var_633_end_0 = const()[name = tensor<string, []>("op_633_end_0"), val = tensor<int32, [4]>([1, 2048, 1, 16])];
            tensor<bool, [4]> var_633_end_mask_0 = const()[name = tensor<string, []>("op_633_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp32, [1, 1024, 1, 16]> var_633 = slice_by_index(begin = var_633_begin_0, end = var_633_end_0, end_mask = var_633_end_mask_0, x = cast_1)[name = tensor<string, []>("op_633")];
            tensor<int32, [4]> var_653_begin_0 = const()[name = tensor<string, []>("op_653_begin_0"), val = tensor<int32, [4]>([0, 1024, 0, 0])];
            tensor<int32, [4]> var_653_end_0 = const()[name = tensor<string, []>("op_653_end_0"), val = tensor<int32, [4]>([1, 2048, 1, 16])];
            tensor<bool, [4]> var_653_end_mask_0 = const()[name = tensor<string, []>("op_653_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp32, [1, 1024, 1, 16]> var_653 = slice_by_index(begin = var_653_begin_0, end = var_653_end_0, end_mask = var_653_end_mask_0, x = cast_4)[name = tensor<string, []>("op_653")];
            tensor<int32, [1]> var_665_axes_0 = const()[name = tensor<string, []>("op_665_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024, 1]> var_665 = squeeze(axes = var_665_axes_0, x = hidden_3)[name = tensor<string, []>("op_665")];
            tensor<int32, [1]> var_667_axes_0 = const()[name = tensor<string, []>("op_667_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024]> var_667 = squeeze(axes = var_667_axes_0, x = var_665)[name = tensor<string, []>("op_667")];
            tensor<int32, [1]> hidden_states_17_axes_0 = const()[name = tensor<string, []>("hidden_states_17_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1, 1024]> hidden_states_17 = expand_dims(axes = hidden_states_17_axes_0, x = var_667)[name = tensor<string, []>("hidden_states_17")];
            tensor<fp32, []> var_673_promoted = const()[name = tensor<string, []>("op_673_promoted"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, 1, 1024]> var_679 = pow(x = hidden_states_17, y = var_673_promoted)[name = tensor<string, []>("op_679")];
            tensor<int32, [1]> variance_9_axes_0 = const()[name = tensor<string, []>("variance_9_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_9_keep_dims_0 = const()[name = tensor<string, []>("variance_9_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, 1, 1]> variance_9 = reduce_mean(axes = variance_9_axes_0, keep_dims = variance_9_keep_dims_0, x = var_679)[name = tensor<string, []>("variance_9")];
            tensor<fp32, [1024]> const_11 = const()[name = tensor<string, []>("const_11"), val = tensor<fp32, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110192704)))];
            tensor<fp32, [1, 1, 1024]> var_683 = mul(x = const_11, y = hidden_states_17)[name = tensor<string, []>("op_683")];
            tensor<fp32, []> var_684 = const()[name = tensor<string, []>("op_684"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 1]> var_685 = add(x = variance_9, y = var_684)[name = tensor<string, []>("op_685")];
            tensor<fp32, []> var_686_epsilon_0 = const()[name = tensor<string, []>("op_686_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, 1, 1]> var_686 = rsqrt(epsilon = var_686_epsilon_0, x = var_685)[name = tensor<string, []>("op_686")];
            tensor<fp32, [1, 1, 1024]> input_11 = mul(x = var_683, y = var_686)[name = tensor<string, []>("input_11")];
            tensor<fp32, [1, 1, 2048]> var_690 = linear(bias = linear_0_bias_0, weight = layers_1_self_attn_q_proj_weight_palettized, x = input_11)[name = tensor<string, []>("linear_7")];
            tensor<int32, [4]> var_695 = const()[name = tensor<string, []>("op_695"), val = tensor<int32, [4]>([1, 1, 16, 128])];
            tensor<fp32, [1, 1, 16, 128]> var_696 = reshape(shape = var_695, x = var_690)[name = tensor<string, []>("op_696")];
            tensor<fp32, [1, 1, 1024]> var_702 = linear(bias = linear_1_bias_0, weight = layers_1_self_attn_k_proj_weight_palettized, x = input_11)[name = tensor<string, []>("linear_8")];
            tensor<int32, [4]> var_707 = const()[name = tensor<string, []>("op_707"), val = tensor<int32, [4]>([1, 1, 8, 128])];
            tensor<fp32, [1, 1, 8, 128]> var_708 = reshape(shape = var_707, x = var_702)[name = tensor<string, []>("op_708")];
            tensor<fp32, [1, 1, 1024]> var_714 = linear(bias = linear_1_bias_0, weight = layers_1_self_attn_v_proj_weight_palettized, x = input_11)[name = tensor<string, []>("linear_9")];
            tensor<fp32, []> var_727_promoted = const()[name = tensor<string, []>("op_727_promoted"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, 1, 16, 128]> var_733 = pow(x = var_696, y = var_727_promoted)[name = tensor<string, []>("op_733")];
            tensor<bool, []> variance_11_keep_dims_0 = const()[name = tensor<string, []>("variance_11_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<int32, [1]> const_56 = const()[name = tensor<string, []>("const_56"), val = tensor<int32, [1]>([3])];
            tensor<fp32, [1, 1, 16, 1]> variance_11 = reduce_mean(axes = const_56, keep_dims = variance_11_keep_dims_0, x = var_733)[name = tensor<string, []>("variance_11")];
            tensor<fp32, [1, 1, 1, 128]> const_57 = const()[name = tensor<string, []>("const_57"), val = tensor<fp32, [1, 1, 1, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110196864)))];
            tensor<fp32, [1, 1, 16, 128]> var_737 = mul(x = const_57, y = var_696)[name = tensor<string, []>("op_737")];
            tensor<fp32, []> var_738 = const()[name = tensor<string, []>("op_738"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 16, 1]> var_739 = add(x = variance_11, y = var_738)[name = tensor<string, []>("op_739")];
            tensor<fp32, []> var_740_epsilon_0 = const()[name = tensor<string, []>("op_740_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, 1, 16, 1]> var_740 = rsqrt(epsilon = var_740_epsilon_0, x = var_739)[name = tensor<string, []>("op_740")];
            tensor<fp32, [1, 1, 16, 128]> q_7 = mul(x = var_737, y = var_740)[name = tensor<string, []>("q_7")];
            tensor<fp32, []> var_745_promoted = const()[name = tensor<string, []>("op_745_promoted"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, 1, 8, 128]> var_751 = pow(x = var_708, y = var_745_promoted)[name = tensor<string, []>("op_751")];
            tensor<bool, []> variance_13_keep_dims_0 = const()[name = tensor<string, []>("variance_13_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<int32, [1]> const_58 = const()[name = tensor<string, []>("const_58"), val = tensor<int32, [1]>([3])];
            tensor<fp32, [1, 1, 8, 1]> variance_13 = reduce_mean(axes = const_58, keep_dims = variance_13_keep_dims_0, x = var_751)[name = tensor<string, []>("variance_13")];
            tensor<fp32, [1, 1, 1, 128]> const_59 = const()[name = tensor<string, []>("const_59"), val = tensor<fp32, [1, 1, 1, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110197440)))];
            tensor<fp32, [1, 1, 8, 128]> var_755 = mul(x = const_59, y = var_708)[name = tensor<string, []>("op_755")];
            tensor<fp32, []> var_756 = const()[name = tensor<string, []>("op_756"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 8, 1]> var_757 = add(x = variance_13, y = var_756)[name = tensor<string, []>("op_757")];
            tensor<fp32, []> var_758_epsilon_0 = const()[name = tensor<string, []>("op_758_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, 1, 8, 1]> var_758 = rsqrt(epsilon = var_758_epsilon_0, x = var_757)[name = tensor<string, []>("op_758")];
            tensor<fp32, [1, 1, 8, 128]> k_7 = mul(x = var_755, y = var_758)[name = tensor<string, []>("k_7")];
            tensor<fp32, [1, 1, 16, 128]> var_773 = mul(x = q_7, y = cos_r_1)[name = tensor<string, []>("op_773")];
            tensor<int32, [4]> x1_5_begin_0 = const()[name = tensor<string, []>("x1_5_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_5_end_0 = const()[name = tensor<string, []>("x1_5_end_0"), val = tensor<int32, [4]>([1, 1, 16, 64])];
            tensor<bool, [4]> x1_5_end_mask_0 = const()[name = tensor<string, []>("x1_5_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp32, [1, 1, 16, 64]> x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = q_7)[name = tensor<string, []>("x1_5")];
            tensor<int32, [4]> x2_5_begin_0 = const()[name = tensor<string, []>("x2_5_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_5_end_0 = const()[name = tensor<string, []>("x2_5_end_0"), val = tensor<int32, [4]>([1, 1, 16, 128])];
            tensor<bool, [4]> x2_5_end_mask_0 = const()[name = tensor<string, []>("x2_5_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp32, [1, 1, 16, 64]> x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = q_7)[name = tensor<string, []>("x2_5")];
            tensor<fp32, []> const_16_promoted = const()[name = tensor<string, []>("const_16_promoted"), val = tensor<fp32, []>(-0x1p+0)];
            tensor<fp32, [1, 1, 16, 64]> var_794 = mul(x = x2_5, y = const_16_promoted)[name = tensor<string, []>("op_794")];
            tensor<int32, []> var_796 = const()[name = tensor<string, []>("op_796"), val = tensor<int32, []>(-1)];
            tensor<bool, []> var_797_interleave_0 = const()[name = tensor<string, []>("op_797_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 1, 16, 128]> var_797 = concat(axis = var_796, interleave = var_797_interleave_0, values = (var_794, x1_5))[name = tensor<string, []>("op_797")];
            tensor<fp32, [1, 1, 16, 128]> var_798 = mul(x = var_797, y = sin_r_1)[name = tensor<string, []>("op_798")];
            tensor<fp32, [1, 1, 16, 128]> q_11 = add(x = var_773, y = var_798)[name = tensor<string, []>("q_11")];
            tensor<fp32, [1, 1, 8, 128]> var_801 = mul(x = k_7, y = cos_r_1)[name = tensor<string, []>("op_801")];
            tensor<int32, [4]> x1_7_begin_0 = const()[name = tensor<string, []>("x1_7_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_7_end_0 = const()[name = tensor<string, []>("x1_7_end_0"), val = tensor<int32, [4]>([1, 1, 8, 64])];
            tensor<bool, [4]> x1_7_end_mask_0 = const()[name = tensor<string, []>("x1_7_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp32, [1, 1, 8, 64]> x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = k_7)[name = tensor<string, []>("x1_7")];
            tensor<int32, [4]> x2_7_begin_0 = const()[name = tensor<string, []>("x2_7_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_7_end_0 = const()[name = tensor<string, []>("x2_7_end_0"), val = tensor<int32, [4]>([1, 1, 8, 128])];
            tensor<bool, [4]> x2_7_end_mask_0 = const()[name = tensor<string, []>("x2_7_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp32, [1, 1, 8, 64]> x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = k_7)[name = tensor<string, []>("x2_7")];
            tensor<fp32, []> const_19_promoted = const()[name = tensor<string, []>("const_19_promoted"), val = tensor<fp32, []>(-0x1p+0)];
            tensor<fp32, [1, 1, 8, 64]> var_822 = mul(x = x2_7, y = const_19_promoted)[name = tensor<string, []>("op_822")];
            tensor<int32, []> var_824 = const()[name = tensor<string, []>("op_824"), val = tensor<int32, []>(-1)];
            tensor<bool, []> var_825_interleave_0 = const()[name = tensor<string, []>("op_825_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 1, 8, 128]> var_825 = concat(axis = var_824, interleave = var_825_interleave_0, values = (var_822, x1_7))[name = tensor<string, []>("op_825")];
            tensor<fp32, [1, 1, 8, 128]> var_826 = mul(x = var_825, y = sin_r_1)[name = tensor<string, []>("op_826")];
            tensor<fp32, [1, 1, 8, 128]> k_11 = add(x = var_801, y = var_826)[name = tensor<string, []>("k_11")];
            tensor<int32, [4]> var_833 = const()[name = tensor<string, []>("op_833"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
            tensor<fp32, [1, 1024, 1, 1]> nk_3 = reshape(shape = var_833, x = k_11)[name = tensor<string, []>("nk_3")];
            tensor<int32, [4]> var_839 = const()[name = tensor<string, []>("op_839"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
            tensor<fp32, [1, 1024, 1, 1]> nv_3 = reshape(shape = var_839, x = var_714)[name = tensor<string, []>("nv_3")];
            tensor<fp32, [1, 1024, 1, 16]> var_844 = mul(x = var_633, y = var_473)[name = tensor<string, []>("op_844")];
            tensor<fp32, [1, 1024, 1, 16]> var_845 = mul(x = nk_3, y = update_mask)[name = tensor<string, []>("op_845")];
            tensor<fp32, [1, 1024, 1, 16]> lkc_7 = add(x = var_844, y = var_845)[name = tensor<string, []>("lkc_7")];
            tensor<fp32, [1, 1024, 1, 16]> var_851 = mul(x = var_653, y = var_473)[name = tensor<string, []>("op_851")];
            tensor<fp32, [1, 1024, 1, 16]> var_852 = mul(x = nv_3, y = update_mask)[name = tensor<string, []>("op_852")];
            tensor<fp32, [1, 1024, 1, 16]> lvc_7 = add(x = var_851, y = var_852)[name = tensor<string, []>("lvc_7")];
            tensor<int32, [1]> var_856_axes_0 = const()[name = tensor<string, []>("op_856_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 1024, 16]> var_856 = squeeze(axes = var_856_axes_0, x = lkc_7)[name = tensor<string, []>("op_856")];
            tensor<int32, [4]> var_861 = const()[name = tensor<string, []>("op_861"), val = tensor<int32, [4]>([1, 8, 128, 16])];
            tensor<fp32, [1, 8, 128, 16]> kc_5 = reshape(shape = var_861, x = var_856)[name = tensor<string, []>("kc_5")];
            tensor<int32, [1]> var_864_axes_0 = const()[name = tensor<string, []>("op_864_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 1024, 16]> var_864 = squeeze(axes = var_864_axes_0, x = lvc_7)[name = tensor<string, []>("op_864")];
            tensor<int32, [4]> var_869 = const()[name = tensor<string, []>("op_869"), val = tensor<int32, [4]>([1, 8, 128, 16])];
            tensor<fp32, [1, 8, 128, 16]> vc_5 = reshape(shape = var_869, x = var_864)[name = tensor<string, []>("vc_5")];
            tensor<int32, [1]> var_872_axes_0 = const()[name = tensor<string, []>("op_872_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 8, 1, 128, 16]> var_872 = expand_dims(axes = var_872_axes_0, x = kc_5)[name = tensor<string, []>("op_872")];
            tensor<int32, [5]> var_880_reps_0 = const()[name = tensor<string, []>("op_880_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
            tensor<fp32, [1, 8, 2, 128, 16]> var_880 = tile(reps = var_880_reps_0, x = var_872)[name = tensor<string, []>("op_880")];
            tensor<int32, [4]> var_885 = const()[name = tensor<string, []>("op_885"), val = tensor<int32, [4]>([1, 16, 128, 16])];
            tensor<fp32, [1, 16, 128, 16]> kc_7 = reshape(shape = var_885, x = var_880)[name = tensor<string, []>("kc_7")];
            tensor<int32, [1]> var_888_axes_0 = const()[name = tensor<string, []>("op_888_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 8, 1, 128, 16]> var_888 = expand_dims(axes = var_888_axes_0, x = vc_5)[name = tensor<string, []>("op_888")];
            tensor<int32, [5]> var_896_reps_0 = const()[name = tensor<string, []>("op_896_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
            tensor<fp32, [1, 8, 2, 128, 16]> var_896 = tile(reps = var_896_reps_0, x = var_888)[name = tensor<string, []>("op_896")];
            tensor<int32, [4]> var_901 = const()[name = tensor<string, []>("op_901"), val = tensor<int32, [4]>([1, 16, 128, 16])];
            tensor<fp32, [1, 16, 128, 16]> vc_7 = reshape(shape = var_901, x = var_896)[name = tensor<string, []>("vc_7")];
            tensor<int32, [4]> var_905_perm_0 = const()[name = tensor<string, []>("op_905_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_906_transpose_x_0 = const()[name = tensor<string, []>("op_906_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_906_transpose_y_0 = const()[name = tensor<string, []>("op_906_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 16, 1, 128]> var_905 = transpose(perm = var_905_perm_0, x = q_11)[name = tensor<string, []>("transpose_7")];
            tensor<fp32, [1, 16, 1, 16]> var_906 = matmul(transpose_x = var_906_transpose_x_0, transpose_y = var_906_transpose_y_0, x = var_905, y = kc_7)[name = tensor<string, []>("op_906")];
            tensor<fp32, []> _inversed_aw_9_y_0 = const()[name = tensor<string, []>("_inversed_aw_9_y_0"), val = tensor<fp32, []>(0x1.6a09e6p-4)];
            tensor<fp32, [1, 16, 1, 16]> _inversed_aw_9 = mul(x = var_906, y = _inversed_aw_9_y_0)[name = tensor<string, []>("_inversed_aw_9")];
            tensor<fp32, [1, 16, 1, 16]> aw_11 = add(x = _inversed_aw_9, y = var_542)[name = tensor<string, []>("aw_11")];
            tensor<int32, []> var_920 = const()[name = tensor<string, []>("op_920"), val = tensor<int32, []>(-1)];
            tensor<fp32, [1, 16, 1, 16]> aw_15 = softmax(axis = var_920, x = aw_11)[name = tensor<string, []>("aw_15")];
            tensor<bool, []> var_926_transpose_x_1 = const()[name = tensor<string, []>("op_926_transpose_x_1"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_926_transpose_y_1 = const()[name = tensor<string, []>("op_926_transpose_y_1"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, 16, 1, 128]> var_926 = matmul(transpose_x = var_926_transpose_x_1, transpose_y = var_926_transpose_y_1, x = aw_15, y = vc_7)[name = tensor<string, []>("op_926")];
            tensor<int32, [4]> var_929_perm_0 = const()[name = tensor<string, []>("op_929_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_933 = const()[name = tensor<string, []>("op_933"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp32, [1, 1, 16, 128]> var_929 = transpose(perm = var_929_perm_0, x = var_926)[name = tensor<string, []>("transpose_6")];
            tensor<fp32, [1, 1, 2048]> input_13 = reshape(shape = var_933, x = var_929)[name = tensor<string, []>("input_13")];
            tensor<fp32, [1, 1, 1024]> var_937 = linear(bias = linear_1_bias_0, weight = layers_1_self_attn_o_proj_weight_palettized, x = input_13)[name = tensor<string, []>("linear_10")];
            tensor<int32, [1]> var_939_axes_0 = const()[name = tensor<string, []>("op_939_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1024]> var_939 = squeeze(axes = var_939_axes_0, x = var_937)[name = tensor<string, []>("op_939")];
            tensor<int32, [1]> var_941_axes_0 = const()[name = tensor<string, []>("op_941_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1024]> var_941 = squeeze(axes = var_941_axes_0, x = var_939)[name = tensor<string, []>("op_941")];
            tensor<int32, [1]> var_943_axes_0 = const()[name = tensor<string, []>("op_943_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1]> var_943 = expand_dims(axes = var_943_axes_0, x = var_941)[name = tensor<string, []>("op_943")];
            tensor<int32, [1]> ao_3_axes_0 = const()[name = tensor<string, []>("ao_3_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1, 1]> ao_3 = expand_dims(axes = ao_3_axes_0, x = var_943)[name = tensor<string, []>("ao_3")];
            tensor<fp32, [1, 1024, 1, 1]> hidden_5 = add(x = hidden_3, y = ao_3)[name = tensor<string, []>("hidden_5")];
            tensor<int32, [1]> var_949_axes_0 = const()[name = tensor<string, []>("op_949_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024, 1]> var_949 = squeeze(axes = var_949_axes_0, x = hidden_5)[name = tensor<string, []>("op_949")];
            tensor<int32, [1]> var_951_axes_0 = const()[name = tensor<string, []>("op_951_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024]> var_951 = squeeze(axes = var_951_axes_0, x = var_949)[name = tensor<string, []>("op_951")];
            tensor<int32, [1]> hidden_states_29_axes_0 = const()[name = tensor<string, []>("hidden_states_29_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1, 1024]> hidden_states_29 = expand_dims(axes = hidden_states_29_axes_0, x = var_951)[name = tensor<string, []>("hidden_states_29")];
            tensor<fp32, []> var_957_promoted = const()[name = tensor<string, []>("op_957_promoted"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, 1, 1024]> var_963 = pow(x = hidden_states_29, y = var_957_promoted)[name = tensor<string, []>("op_963")];
            tensor<int32, [1]> variance_15_axes_0 = const()[name = tensor<string, []>("variance_15_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_15_keep_dims_0 = const()[name = tensor<string, []>("variance_15_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, 1, 1]> variance_15 = reduce_mean(axes = variance_15_axes_0, keep_dims = variance_15_keep_dims_0, x = var_963)[name = tensor<string, []>("variance_15")];
            tensor<fp32, [1024]> const_20 = const()[name = tensor<string, []>("const_20"), val = tensor<fp32, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110198016)))];
            tensor<fp32, [1, 1, 1024]> var_967 = mul(x = const_20, y = hidden_states_29)[name = tensor<string, []>("op_967")];
            tensor<fp32, []> var_968 = const()[name = tensor<string, []>("op_968"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 1]> var_969 = add(x = variance_15, y = var_968)[name = tensor<string, []>("op_969")];
            tensor<fp32, []> var_970_epsilon_0 = const()[name = tensor<string, []>("op_970_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, 1, 1]> var_970 = rsqrt(epsilon = var_970_epsilon_0, x = var_969)[name = tensor<string, []>("op_970")];
            tensor<fp32, [1, 1, 1024]> input_15 = mul(x = var_967, y = var_970)[name = tensor<string, []>("input_15")];
            tensor<fp32, [1, 1, 3072]> input_17 = linear(bias = linear_4_bias_0, weight = layers_1_mlp_gate_proj_weight_palettized, x = input_15)[name = tensor<string, []>("linear_11")];
            tensor<fp32, [1, 1, 3072]> var_978 = silu(x = input_17)[name = tensor<string, []>("op_978")];
            tensor<fp32, [1, 1, 3072]> var_980 = linear(bias = linear_4_bias_0, weight = layers_1_mlp_up_proj_weight_palettized, x = input_15)[name = tensor<string, []>("linear_12")];
            tensor<fp32, [1, 1, 3072]> input_19 = mul(x = var_978, y = var_980)[name = tensor<string, []>("input_19")];
            tensor<fp32, [1, 1, 1024]> var_983 = linear(bias = linear_1_bias_0, weight = layers_1_mlp_down_proj_weight_palettized, x = input_19)[name = tensor<string, []>("linear_13")];
            tensor<int32, [1]> var_985_axes_0 = const()[name = tensor<string, []>("op_985_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1024]> var_985 = squeeze(axes = var_985_axes_0, x = var_983)[name = tensor<string, []>("op_985")];
            tensor<int32, [1]> var_987_axes_0 = const()[name = tensor<string, []>("op_987_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1024]> var_987 = squeeze(axes = var_987_axes_0, x = var_985)[name = tensor<string, []>("op_987")];
            tensor<int32, [1]> var_989_axes_0 = const()[name = tensor<string, []>("op_989_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1]> var_989 = expand_dims(axes = var_989_axes_0, x = var_987)[name = tensor<string, []>("op_989")];
            tensor<int32, [1]> h_3_axes_0 = const()[name = tensor<string, []>("h_3_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1, 1]> h_3 = expand_dims(axes = h_3_axes_0, x = var_989)[name = tensor<string, []>("h_3")];
            tensor<fp32, [1, 1024, 1, 1]> hidden_7 = add(x = hidden_5, y = h_3)[name = tensor<string, []>("hidden_7")];
            tensor<int32, [4]> var_1003_begin_0 = const()[name = tensor<string, []>("op_1003_begin_0"), val = tensor<int32, [4]>([0, 2048, 0, 0])];
            tensor<int32, [4]> var_1003_end_0 = const()[name = tensor<string, []>("op_1003_end_0"), val = tensor<int32, [4]>([1, 3072, 1, 16])];
            tensor<bool, [4]> var_1003_end_mask_0 = const()[name = tensor<string, []>("op_1003_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp32, [1, 1024, 1, 16]> var_1003 = slice_by_index(begin = var_1003_begin_0, end = var_1003_end_0, end_mask = var_1003_end_mask_0, x = cast_1)[name = tensor<string, []>("op_1003")];
            tensor<int32, [4]> var_1023_begin_0 = const()[name = tensor<string, []>("op_1023_begin_0"), val = tensor<int32, [4]>([0, 2048, 0, 0])];
            tensor<int32, [4]> var_1023_end_0 = const()[name = tensor<string, []>("op_1023_end_0"), val = tensor<int32, [4]>([1, 3072, 1, 16])];
            tensor<bool, [4]> var_1023_end_mask_0 = const()[name = tensor<string, []>("op_1023_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp32, [1, 1024, 1, 16]> var_1023 = slice_by_index(begin = var_1023_begin_0, end = var_1023_end_0, end_mask = var_1023_end_mask_0, x = cast_4)[name = tensor<string, []>("op_1023")];
            tensor<int32, [1]> var_1035_axes_0 = const()[name = tensor<string, []>("op_1035_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024, 1]> var_1035 = squeeze(axes = var_1035_axes_0, x = hidden_7)[name = tensor<string, []>("op_1035")];
            tensor<int32, [1]> var_1037_axes_0 = const()[name = tensor<string, []>("op_1037_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024]> var_1037 = squeeze(axes = var_1037_axes_0, x = var_1035)[name = tensor<string, []>("op_1037")];
            tensor<int32, [1]> hidden_states_33_axes_0 = const()[name = tensor<string, []>("hidden_states_33_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1, 1024]> hidden_states_33 = expand_dims(axes = hidden_states_33_axes_0, x = var_1037)[name = tensor<string, []>("hidden_states_33")];
            tensor<fp32, []> var_1043_promoted = const()[name = tensor<string, []>("op_1043_promoted"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, 1, 1024]> var_1049 = pow(x = hidden_states_33, y = var_1043_promoted)[name = tensor<string, []>("op_1049")];
            tensor<int32, [1]> variance_17_axes_0 = const()[name = tensor<string, []>("variance_17_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_17_keep_dims_0 = const()[name = tensor<string, []>("variance_17_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, 1, 1]> variance_17 = reduce_mean(axes = variance_17_axes_0, keep_dims = variance_17_keep_dims_0, x = var_1049)[name = tensor<string, []>("variance_17")];
            tensor<fp32, [1024]> const_21 = const()[name = tensor<string, []>("const_21"), val = tensor<fp32, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110202176)))];
            tensor<fp32, [1, 1, 1024]> var_1053 = mul(x = const_21, y = hidden_states_33)[name = tensor<string, []>("op_1053")];
            tensor<fp32, []> var_1054 = const()[name = tensor<string, []>("op_1054"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 1]> var_1055 = add(x = variance_17, y = var_1054)[name = tensor<string, []>("op_1055")];
            tensor<fp32, []> var_1056_epsilon_0 = const()[name = tensor<string, []>("op_1056_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, 1, 1]> var_1056 = rsqrt(epsilon = var_1056_epsilon_0, x = var_1055)[name = tensor<string, []>("op_1056")];
            tensor<fp32, [1, 1, 1024]> input_21 = mul(x = var_1053, y = var_1056)[name = tensor<string, []>("input_21")];
            tensor<fp32, [1, 1, 2048]> var_1060 = linear(bias = linear_0_bias_0, weight = layers_2_self_attn_q_proj_weight_palettized, x = input_21)[name = tensor<string, []>("linear_14")];
            tensor<int32, [4]> var_1065 = const()[name = tensor<string, []>("op_1065"), val = tensor<int32, [4]>([1, 1, 16, 128])];
            tensor<fp32, [1, 1, 16, 128]> var_1066 = reshape(shape = var_1065, x = var_1060)[name = tensor<string, []>("op_1066")];
            tensor<fp32, [1, 1, 1024]> var_1072 = linear(bias = linear_1_bias_0, weight = layers_2_self_attn_k_proj_weight_palettized, x = input_21)[name = tensor<string, []>("linear_15")];
            tensor<int32, [4]> var_1077 = const()[name = tensor<string, []>("op_1077"), val = tensor<int32, [4]>([1, 1, 8, 128])];
            tensor<fp32, [1, 1, 8, 128]> var_1078 = reshape(shape = var_1077, x = var_1072)[name = tensor<string, []>("op_1078")];
            tensor<fp32, [1, 1, 1024]> var_1084 = linear(bias = linear_1_bias_0, weight = layers_2_self_attn_v_proj_weight_palettized, x = input_21)[name = tensor<string, []>("linear_16")];
            tensor<fp32, []> var_1097_promoted = const()[name = tensor<string, []>("op_1097_promoted"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, 1, 16, 128]> var_1103 = pow(x = var_1066, y = var_1097_promoted)[name = tensor<string, []>("op_1103")];
            tensor<bool, []> variance_19_keep_dims_0 = const()[name = tensor<string, []>("variance_19_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<int32, [1]> const_60 = const()[name = tensor<string, []>("const_60"), val = tensor<int32, [1]>([3])];
            tensor<fp32, [1, 1, 16, 1]> variance_19 = reduce_mean(axes = const_60, keep_dims = variance_19_keep_dims_0, x = var_1103)[name = tensor<string, []>("variance_19")];
            tensor<fp32, [1, 1, 1, 128]> const_61 = const()[name = tensor<string, []>("const_61"), val = tensor<fp32, [1, 1, 1, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110206336)))];
            tensor<fp32, [1, 1, 16, 128]> var_1107 = mul(x = const_61, y = var_1066)[name = tensor<string, []>("op_1107")];
            tensor<fp32, []> var_1108 = const()[name = tensor<string, []>("op_1108"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 16, 1]> var_1109 = add(x = variance_19, y = var_1108)[name = tensor<string, []>("op_1109")];
            tensor<fp32, []> var_1110_epsilon_0 = const()[name = tensor<string, []>("op_1110_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, 1, 16, 1]> var_1110 = rsqrt(epsilon = var_1110_epsilon_0, x = var_1109)[name = tensor<string, []>("op_1110")];
            tensor<fp32, [1, 1, 16, 128]> q_13 = mul(x = var_1107, y = var_1110)[name = tensor<string, []>("q_13")];
            tensor<fp32, []> var_1115_promoted = const()[name = tensor<string, []>("op_1115_promoted"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, 1, 8, 128]> var_1121 = pow(x = var_1078, y = var_1115_promoted)[name = tensor<string, []>("op_1121")];
            tensor<bool, []> variance_21_keep_dims_0 = const()[name = tensor<string, []>("variance_21_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<int32, [1]> const_62 = const()[name = tensor<string, []>("const_62"), val = tensor<int32, [1]>([3])];
            tensor<fp32, [1, 1, 8, 1]> variance_21 = reduce_mean(axes = const_62, keep_dims = variance_21_keep_dims_0, x = var_1121)[name = tensor<string, []>("variance_21")];
            tensor<fp32, [1, 1, 1, 128]> const_63 = const()[name = tensor<string, []>("const_63"), val = tensor<fp32, [1, 1, 1, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110206912)))];
            tensor<fp32, [1, 1, 8, 128]> var_1125 = mul(x = const_63, y = var_1078)[name = tensor<string, []>("op_1125")];
            tensor<fp32, []> var_1126 = const()[name = tensor<string, []>("op_1126"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 8, 1]> var_1127 = add(x = variance_21, y = var_1126)[name = tensor<string, []>("op_1127")];
            tensor<fp32, []> var_1128_epsilon_0 = const()[name = tensor<string, []>("op_1128_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, 1, 8, 1]> var_1128 = rsqrt(epsilon = var_1128_epsilon_0, x = var_1127)[name = tensor<string, []>("op_1128")];
            tensor<fp32, [1, 1, 8, 128]> k_13 = mul(x = var_1125, y = var_1128)[name = tensor<string, []>("k_13")];
            tensor<fp32, [1, 1, 16, 128]> var_1143 = mul(x = q_13, y = cos_r_1)[name = tensor<string, []>("op_1143")];
            tensor<int32, [4]> x1_9_begin_0 = const()[name = tensor<string, []>("x1_9_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_9_end_0 = const()[name = tensor<string, []>("x1_9_end_0"), val = tensor<int32, [4]>([1, 1, 16, 64])];
            tensor<bool, [4]> x1_9_end_mask_0 = const()[name = tensor<string, []>("x1_9_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp32, [1, 1, 16, 64]> x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = q_13)[name = tensor<string, []>("x1_9")];
            tensor<int32, [4]> x2_9_begin_0 = const()[name = tensor<string, []>("x2_9_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_9_end_0 = const()[name = tensor<string, []>("x2_9_end_0"), val = tensor<int32, [4]>([1, 1, 16, 128])];
            tensor<bool, [4]> x2_9_end_mask_0 = const()[name = tensor<string, []>("x2_9_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp32, [1, 1, 16, 64]> x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = q_13)[name = tensor<string, []>("x2_9")];
            tensor<fp32, []> const_26_promoted = const()[name = tensor<string, []>("const_26_promoted"), val = tensor<fp32, []>(-0x1p+0)];
            tensor<fp32, [1, 1, 16, 64]> var_1164 = mul(x = x2_9, y = const_26_promoted)[name = tensor<string, []>("op_1164")];
            tensor<int32, []> var_1166 = const()[name = tensor<string, []>("op_1166"), val = tensor<int32, []>(-1)];
            tensor<bool, []> var_1167_interleave_0 = const()[name = tensor<string, []>("op_1167_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 1, 16, 128]> var_1167 = concat(axis = var_1166, interleave = var_1167_interleave_0, values = (var_1164, x1_9))[name = tensor<string, []>("op_1167")];
            tensor<fp32, [1, 1, 16, 128]> var_1168 = mul(x = var_1167, y = sin_r_1)[name = tensor<string, []>("op_1168")];
            tensor<fp32, [1, 1, 16, 128]> q_17 = add(x = var_1143, y = var_1168)[name = tensor<string, []>("q_17")];
            tensor<fp32, [1, 1, 8, 128]> var_1171 = mul(x = k_13, y = cos_r_1)[name = tensor<string, []>("op_1171")];
            tensor<int32, [4]> x1_11_begin_0 = const()[name = tensor<string, []>("x1_11_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_11_end_0 = const()[name = tensor<string, []>("x1_11_end_0"), val = tensor<int32, [4]>([1, 1, 8, 64])];
            tensor<bool, [4]> x1_11_end_mask_0 = const()[name = tensor<string, []>("x1_11_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp32, [1, 1, 8, 64]> x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = k_13)[name = tensor<string, []>("x1_11")];
            tensor<int32, [4]> x2_11_begin_0 = const()[name = tensor<string, []>("x2_11_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_11_end_0 = const()[name = tensor<string, []>("x2_11_end_0"), val = tensor<int32, [4]>([1, 1, 8, 128])];
            tensor<bool, [4]> x2_11_end_mask_0 = const()[name = tensor<string, []>("x2_11_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp32, [1, 1, 8, 64]> x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = k_13)[name = tensor<string, []>("x2_11")];
            tensor<fp32, []> const_29_promoted = const()[name = tensor<string, []>("const_29_promoted"), val = tensor<fp32, []>(-0x1p+0)];
            tensor<fp32, [1, 1, 8, 64]> var_1192 = mul(x = x2_11, y = const_29_promoted)[name = tensor<string, []>("op_1192")];
            tensor<int32, []> var_1194 = const()[name = tensor<string, []>("op_1194"), val = tensor<int32, []>(-1)];
            tensor<bool, []> var_1195_interleave_0 = const()[name = tensor<string, []>("op_1195_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 1, 8, 128]> var_1195 = concat(axis = var_1194, interleave = var_1195_interleave_0, values = (var_1192, x1_11))[name = tensor<string, []>("op_1195")];
            tensor<fp32, [1, 1, 8, 128]> var_1196 = mul(x = var_1195, y = sin_r_1)[name = tensor<string, []>("op_1196")];
            tensor<fp32, [1, 1, 8, 128]> k_17 = add(x = var_1171, y = var_1196)[name = tensor<string, []>("k_17")];
            tensor<int32, [4]> var_1203 = const()[name = tensor<string, []>("op_1203"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
            tensor<fp32, [1, 1024, 1, 1]> nk_5 = reshape(shape = var_1203, x = k_17)[name = tensor<string, []>("nk_5")];
            tensor<int32, [4]> var_1209 = const()[name = tensor<string, []>("op_1209"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
            tensor<fp32, [1, 1024, 1, 1]> nv_5 = reshape(shape = var_1209, x = var_1084)[name = tensor<string, []>("nv_5")];
            tensor<fp32, [1, 1024, 1, 16]> var_1214 = mul(x = var_1003, y = var_473)[name = tensor<string, []>("op_1214")];
            tensor<fp32, [1, 1024, 1, 16]> var_1215 = mul(x = nk_5, y = update_mask)[name = tensor<string, []>("op_1215")];
            tensor<fp32, [1, 1024, 1, 16]> lkc_11 = add(x = var_1214, y = var_1215)[name = tensor<string, []>("lkc_11")];
            tensor<fp32, [1, 1024, 1, 16]> var_1221 = mul(x = var_1023, y = var_473)[name = tensor<string, []>("op_1221")];
            tensor<fp32, [1, 1024, 1, 16]> var_1222 = mul(x = nv_5, y = update_mask)[name = tensor<string, []>("op_1222")];
            tensor<fp32, [1, 1024, 1, 16]> lvc_11 = add(x = var_1221, y = var_1222)[name = tensor<string, []>("lvc_11")];
            tensor<int32, [1]> var_1226_axes_0 = const()[name = tensor<string, []>("op_1226_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 1024, 16]> var_1226 = squeeze(axes = var_1226_axes_0, x = lkc_11)[name = tensor<string, []>("op_1226")];
            tensor<int32, [4]> var_1231 = const()[name = tensor<string, []>("op_1231"), val = tensor<int32, [4]>([1, 8, 128, 16])];
            tensor<fp32, [1, 8, 128, 16]> kc_9 = reshape(shape = var_1231, x = var_1226)[name = tensor<string, []>("kc_9")];
            tensor<int32, [1]> var_1234_axes_0 = const()[name = tensor<string, []>("op_1234_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 1024, 16]> var_1234 = squeeze(axes = var_1234_axes_0, x = lvc_11)[name = tensor<string, []>("op_1234")];
            tensor<int32, [4]> var_1239 = const()[name = tensor<string, []>("op_1239"), val = tensor<int32, [4]>([1, 8, 128, 16])];
            tensor<fp32, [1, 8, 128, 16]> vc_9 = reshape(shape = var_1239, x = var_1234)[name = tensor<string, []>("vc_9")];
            tensor<int32, [1]> var_1242_axes_0 = const()[name = tensor<string, []>("op_1242_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 8, 1, 128, 16]> var_1242 = expand_dims(axes = var_1242_axes_0, x = kc_9)[name = tensor<string, []>("op_1242")];
            tensor<int32, [5]> var_1250_reps_0 = const()[name = tensor<string, []>("op_1250_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
            tensor<fp32, [1, 8, 2, 128, 16]> var_1250 = tile(reps = var_1250_reps_0, x = var_1242)[name = tensor<string, []>("op_1250")];
            tensor<int32, [4]> var_1255 = const()[name = tensor<string, []>("op_1255"), val = tensor<int32, [4]>([1, 16, 128, 16])];
            tensor<fp32, [1, 16, 128, 16]> kc_11 = reshape(shape = var_1255, x = var_1250)[name = tensor<string, []>("kc_11")];
            tensor<int32, [1]> var_1258_axes_0 = const()[name = tensor<string, []>("op_1258_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 8, 1, 128, 16]> var_1258 = expand_dims(axes = var_1258_axes_0, x = vc_9)[name = tensor<string, []>("op_1258")];
            tensor<int32, [5]> var_1266_reps_0 = const()[name = tensor<string, []>("op_1266_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
            tensor<fp32, [1, 8, 2, 128, 16]> var_1266 = tile(reps = var_1266_reps_0, x = var_1258)[name = tensor<string, []>("op_1266")];
            tensor<int32, [4]> var_1271 = const()[name = tensor<string, []>("op_1271"), val = tensor<int32, [4]>([1, 16, 128, 16])];
            tensor<fp32, [1, 16, 128, 16]> vc_11 = reshape(shape = var_1271, x = var_1266)[name = tensor<string, []>("vc_11")];
            tensor<int32, [4]> var_1275_perm_0 = const()[name = tensor<string, []>("op_1275_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_1276_transpose_x_0 = const()[name = tensor<string, []>("op_1276_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1276_transpose_y_0 = const()[name = tensor<string, []>("op_1276_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 16, 1, 128]> var_1275 = transpose(perm = var_1275_perm_0, x = q_17)[name = tensor<string, []>("transpose_5")];
            tensor<fp32, [1, 16, 1, 16]> var_1276 = matmul(transpose_x = var_1276_transpose_x_0, transpose_y = var_1276_transpose_y_0, x = var_1275, y = kc_11)[name = tensor<string, []>("op_1276")];
            tensor<fp32, []> _inversed_aw_17_y_0 = const()[name = tensor<string, []>("_inversed_aw_17_y_0"), val = tensor<fp32, []>(0x1.6a09e6p-4)];
            tensor<fp32, [1, 16, 1, 16]> _inversed_aw_17 = mul(x = var_1276, y = _inversed_aw_17_y_0)[name = tensor<string, []>("_inversed_aw_17")];
            tensor<fp32, [1, 16, 1, 16]> aw_19 = add(x = _inversed_aw_17, y = var_542)[name = tensor<string, []>("aw_19")];
            tensor<int32, []> var_1290 = const()[name = tensor<string, []>("op_1290"), val = tensor<int32, []>(-1)];
            tensor<fp32, [1, 16, 1, 16]> aw_23 = softmax(axis = var_1290, x = aw_19)[name = tensor<string, []>("aw_23")];
            tensor<bool, []> var_1296_transpose_x_1 = const()[name = tensor<string, []>("op_1296_transpose_x_1"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1296_transpose_y_1 = const()[name = tensor<string, []>("op_1296_transpose_y_1"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, 16, 1, 128]> var_1296 = matmul(transpose_x = var_1296_transpose_x_1, transpose_y = var_1296_transpose_y_1, x = aw_23, y = vc_11)[name = tensor<string, []>("op_1296")];
            tensor<int32, [4]> var_1299_perm_0 = const()[name = tensor<string, []>("op_1299_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1303 = const()[name = tensor<string, []>("op_1303"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp32, [1, 1, 16, 128]> var_1299 = transpose(perm = var_1299_perm_0, x = var_1296)[name = tensor<string, []>("transpose_4")];
            tensor<fp32, [1, 1, 2048]> input_23 = reshape(shape = var_1303, x = var_1299)[name = tensor<string, []>("input_23")];
            tensor<fp32, [1, 1, 1024]> var_1307 = linear(bias = linear_1_bias_0, weight = layers_2_self_attn_o_proj_weight_palettized, x = input_23)[name = tensor<string, []>("linear_17")];
            tensor<int32, [1]> var_1309_axes_0 = const()[name = tensor<string, []>("op_1309_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1024]> var_1309 = squeeze(axes = var_1309_axes_0, x = var_1307)[name = tensor<string, []>("op_1309")];
            tensor<int32, [1]> var_1311_axes_0 = const()[name = tensor<string, []>("op_1311_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1024]> var_1311 = squeeze(axes = var_1311_axes_0, x = var_1309)[name = tensor<string, []>("op_1311")];
            tensor<int32, [1]> var_1313_axes_0 = const()[name = tensor<string, []>("op_1313_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1]> var_1313 = expand_dims(axes = var_1313_axes_0, x = var_1311)[name = tensor<string, []>("op_1313")];
            tensor<int32, [1]> ao_5_axes_0 = const()[name = tensor<string, []>("ao_5_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1, 1]> ao_5 = expand_dims(axes = ao_5_axes_0, x = var_1313)[name = tensor<string, []>("ao_5")];
            tensor<fp32, [1, 1024, 1, 1]> hidden_9 = add(x = hidden_7, y = ao_5)[name = tensor<string, []>("hidden_9")];
            tensor<int32, [1]> var_1319_axes_0 = const()[name = tensor<string, []>("op_1319_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024, 1]> var_1319 = squeeze(axes = var_1319_axes_0, x = hidden_9)[name = tensor<string, []>("op_1319")];
            tensor<int32, [1]> var_1321_axes_0 = const()[name = tensor<string, []>("op_1321_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024]> var_1321 = squeeze(axes = var_1321_axes_0, x = var_1319)[name = tensor<string, []>("op_1321")];
            tensor<int32, [1]> hidden_states_45_axes_0 = const()[name = tensor<string, []>("hidden_states_45_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1, 1024]> hidden_states_45 = expand_dims(axes = hidden_states_45_axes_0, x = var_1321)[name = tensor<string, []>("hidden_states_45")];
            tensor<fp32, []> var_1327_promoted = const()[name = tensor<string, []>("op_1327_promoted"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, 1, 1024]> var_1333 = pow(x = hidden_states_45, y = var_1327_promoted)[name = tensor<string, []>("op_1333")];
            tensor<int32, [1]> variance_23_axes_0 = const()[name = tensor<string, []>("variance_23_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_23_keep_dims_0 = const()[name = tensor<string, []>("variance_23_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, 1, 1]> variance_23 = reduce_mean(axes = variance_23_axes_0, keep_dims = variance_23_keep_dims_0, x = var_1333)[name = tensor<string, []>("variance_23")];
            tensor<fp32, [1024]> const_30 = const()[name = tensor<string, []>("const_30"), val = tensor<fp32, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110207488)))];
            tensor<fp32, [1, 1, 1024]> var_1337 = mul(x = const_30, y = hidden_states_45)[name = tensor<string, []>("op_1337")];
            tensor<fp32, []> var_1338 = const()[name = tensor<string, []>("op_1338"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 1]> var_1339 = add(x = variance_23, y = var_1338)[name = tensor<string, []>("op_1339")];
            tensor<fp32, []> var_1340_epsilon_0 = const()[name = tensor<string, []>("op_1340_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, 1, 1]> var_1340 = rsqrt(epsilon = var_1340_epsilon_0, x = var_1339)[name = tensor<string, []>("op_1340")];
            tensor<fp32, [1, 1, 1024]> input_25 = mul(x = var_1337, y = var_1340)[name = tensor<string, []>("input_25")];
            tensor<fp32, [1, 1, 3072]> input_27 = linear(bias = linear_4_bias_0, weight = layers_2_mlp_gate_proj_weight_palettized, x = input_25)[name = tensor<string, []>("linear_18")];
            tensor<fp32, [1, 1, 3072]> var_1348 = silu(x = input_27)[name = tensor<string, []>("op_1348")];
            tensor<fp32, [1, 1, 3072]> var_1350 = linear(bias = linear_4_bias_0, weight = layers_2_mlp_up_proj_weight_palettized, x = input_25)[name = tensor<string, []>("linear_19")];
            tensor<fp32, [1, 1, 3072]> input_29 = mul(x = var_1348, y = var_1350)[name = tensor<string, []>("input_29")];
            tensor<fp32, [1, 1, 1024]> var_1353 = linear(bias = linear_1_bias_0, weight = layers_2_mlp_down_proj_weight_palettized, x = input_29)[name = tensor<string, []>("linear_20")];
            tensor<int32, [1]> var_1355_axes_0 = const()[name = tensor<string, []>("op_1355_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1024]> var_1355 = squeeze(axes = var_1355_axes_0, x = var_1353)[name = tensor<string, []>("op_1355")];
            tensor<int32, [1]> var_1357_axes_0 = const()[name = tensor<string, []>("op_1357_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1024]> var_1357 = squeeze(axes = var_1357_axes_0, x = var_1355)[name = tensor<string, []>("op_1357")];
            tensor<int32, [1]> var_1359_axes_0 = const()[name = tensor<string, []>("op_1359_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1]> var_1359 = expand_dims(axes = var_1359_axes_0, x = var_1357)[name = tensor<string, []>("op_1359")];
            tensor<int32, [1]> h_5_axes_0 = const()[name = tensor<string, []>("h_5_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1, 1]> h_5 = expand_dims(axes = h_5_axes_0, x = var_1359)[name = tensor<string, []>("h_5")];
            tensor<fp32, [1, 1024, 1, 1]> hidden_11 = add(x = hidden_9, y = h_5)[name = tensor<string, []>("hidden_11")];
            tensor<int32, [4]> var_1373_begin_0 = const()[name = tensor<string, []>("op_1373_begin_0"), val = tensor<int32, [4]>([0, 3072, 0, 0])];
            tensor<int32, [4]> var_1373_end_0 = const()[name = tensor<string, []>("op_1373_end_0"), val = tensor<int32, [4]>([1, 4096, 1, 16])];
            tensor<bool, [4]> var_1373_end_mask_0 = const()[name = tensor<string, []>("op_1373_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp32, [1, 1024, 1, 16]> var_1373 = slice_by_index(begin = var_1373_begin_0, end = var_1373_end_0, end_mask = var_1373_end_mask_0, x = cast_1)[name = tensor<string, []>("op_1373")];
            tensor<int32, [4]> var_1393_begin_0 = const()[name = tensor<string, []>("op_1393_begin_0"), val = tensor<int32, [4]>([0, 3072, 0, 0])];
            tensor<int32, [4]> var_1393_end_0 = const()[name = tensor<string, []>("op_1393_end_0"), val = tensor<int32, [4]>([1, 4096, 1, 16])];
            tensor<bool, [4]> var_1393_end_mask_0 = const()[name = tensor<string, []>("op_1393_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp32, [1, 1024, 1, 16]> var_1393 = slice_by_index(begin = var_1393_begin_0, end = var_1393_end_0, end_mask = var_1393_end_mask_0, x = cast_4)[name = tensor<string, []>("op_1393")];
            tensor<int32, [1]> var_1405_axes_0 = const()[name = tensor<string, []>("op_1405_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024, 1]> var_1405 = squeeze(axes = var_1405_axes_0, x = hidden_11)[name = tensor<string, []>("op_1405")];
            tensor<int32, [1]> var_1407_axes_0 = const()[name = tensor<string, []>("op_1407_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024]> var_1407 = squeeze(axes = var_1407_axes_0, x = var_1405)[name = tensor<string, []>("op_1407")];
            tensor<int32, [1]> hidden_states_49_axes_0 = const()[name = tensor<string, []>("hidden_states_49_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1, 1024]> hidden_states_49 = expand_dims(axes = hidden_states_49_axes_0, x = var_1407)[name = tensor<string, []>("hidden_states_49")];
            tensor<fp32, []> var_1413_promoted = const()[name = tensor<string, []>("op_1413_promoted"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, 1, 1024]> var_1419 = pow(x = hidden_states_49, y = var_1413_promoted)[name = tensor<string, []>("op_1419")];
            tensor<int32, [1]> variance_25_axes_0 = const()[name = tensor<string, []>("variance_25_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_25_keep_dims_0 = const()[name = tensor<string, []>("variance_25_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, 1, 1]> variance_25 = reduce_mean(axes = variance_25_axes_0, keep_dims = variance_25_keep_dims_0, x = var_1419)[name = tensor<string, []>("variance_25")];
            tensor<fp32, [1024]> const_31 = const()[name = tensor<string, []>("const_31"), val = tensor<fp32, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110211648)))];
            tensor<fp32, [1, 1, 1024]> var_1423 = mul(x = const_31, y = hidden_states_49)[name = tensor<string, []>("op_1423")];
            tensor<fp32, []> var_1424 = const()[name = tensor<string, []>("op_1424"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 1]> var_1425 = add(x = variance_25, y = var_1424)[name = tensor<string, []>("op_1425")];
            tensor<fp32, []> var_1426_epsilon_0 = const()[name = tensor<string, []>("op_1426_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, 1, 1]> var_1426 = rsqrt(epsilon = var_1426_epsilon_0, x = var_1425)[name = tensor<string, []>("op_1426")];
            tensor<fp32, [1, 1, 1024]> input_31 = mul(x = var_1423, y = var_1426)[name = tensor<string, []>("input_31")];
            tensor<fp32, [1, 1, 2048]> var_1430 = linear(bias = linear_0_bias_0, weight = layers_3_self_attn_q_proj_weight_palettized, x = input_31)[name = tensor<string, []>("linear_21")];
            tensor<int32, [4]> var_1435 = const()[name = tensor<string, []>("op_1435"), val = tensor<int32, [4]>([1, 1, 16, 128])];
            tensor<fp32, [1, 1, 16, 128]> var_1436 = reshape(shape = var_1435, x = var_1430)[name = tensor<string, []>("op_1436")];
            tensor<fp32, [1, 1, 1024]> var_1442 = linear(bias = linear_1_bias_0, weight = layers_3_self_attn_k_proj_weight_palettized, x = input_31)[name = tensor<string, []>("linear_22")];
            tensor<int32, [4]> var_1447 = const()[name = tensor<string, []>("op_1447"), val = tensor<int32, [4]>([1, 1, 8, 128])];
            tensor<fp32, [1, 1, 8, 128]> var_1448 = reshape(shape = var_1447, x = var_1442)[name = tensor<string, []>("op_1448")];
            tensor<fp32, [1, 1, 1024]> var_1454 = linear(bias = linear_1_bias_0, weight = layers_3_self_attn_v_proj_weight_palettized, x = input_31)[name = tensor<string, []>("linear_23")];
            tensor<fp32, []> var_1467_promoted = const()[name = tensor<string, []>("op_1467_promoted"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, 1, 16, 128]> var_1473 = pow(x = var_1436, y = var_1467_promoted)[name = tensor<string, []>("op_1473")];
            tensor<bool, []> variance_27_keep_dims_0 = const()[name = tensor<string, []>("variance_27_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<int32, [1]> const_64 = const()[name = tensor<string, []>("const_64"), val = tensor<int32, [1]>([3])];
            tensor<fp32, [1, 1, 16, 1]> variance_27 = reduce_mean(axes = const_64, keep_dims = variance_27_keep_dims_0, x = var_1473)[name = tensor<string, []>("variance_27")];
            tensor<fp32, [1, 1, 1, 128]> const_65 = const()[name = tensor<string, []>("const_65"), val = tensor<fp32, [1, 1, 1, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110215808)))];
            tensor<fp32, [1, 1, 16, 128]> var_1477 = mul(x = const_65, y = var_1436)[name = tensor<string, []>("op_1477")];
            tensor<fp32, []> var_1478 = const()[name = tensor<string, []>("op_1478"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 16, 1]> var_1479 = add(x = variance_27, y = var_1478)[name = tensor<string, []>("op_1479")];
            tensor<fp32, []> var_1480_epsilon_0 = const()[name = tensor<string, []>("op_1480_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, 1, 16, 1]> var_1480 = rsqrt(epsilon = var_1480_epsilon_0, x = var_1479)[name = tensor<string, []>("op_1480")];
            tensor<fp32, [1, 1, 16, 128]> q_19 = mul(x = var_1477, y = var_1480)[name = tensor<string, []>("q_19")];
            tensor<fp32, []> var_1485_promoted = const()[name = tensor<string, []>("op_1485_promoted"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, 1, 8, 128]> var_1491 = pow(x = var_1448, y = var_1485_promoted)[name = tensor<string, []>("op_1491")];
            tensor<bool, []> variance_29_keep_dims_0 = const()[name = tensor<string, []>("variance_29_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<int32, [1]> const_66 = const()[name = tensor<string, []>("const_66"), val = tensor<int32, [1]>([3])];
            tensor<fp32, [1, 1, 8, 1]> variance_29 = reduce_mean(axes = const_66, keep_dims = variance_29_keep_dims_0, x = var_1491)[name = tensor<string, []>("variance_29")];
            tensor<fp32, [1, 1, 1, 128]> const_67 = const()[name = tensor<string, []>("const_67"), val = tensor<fp32, [1, 1, 1, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110216384)))];
            tensor<fp32, [1, 1, 8, 128]> var_1495 = mul(x = const_67, y = var_1448)[name = tensor<string, []>("op_1495")];
            tensor<fp32, []> var_1496 = const()[name = tensor<string, []>("op_1496"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 8, 1]> var_1497 = add(x = variance_29, y = var_1496)[name = tensor<string, []>("op_1497")];
            tensor<fp32, []> var_1498_epsilon_0 = const()[name = tensor<string, []>("op_1498_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, 1, 8, 1]> var_1498 = rsqrt(epsilon = var_1498_epsilon_0, x = var_1497)[name = tensor<string, []>("op_1498")];
            tensor<fp32, [1, 1, 8, 128]> k_19 = mul(x = var_1495, y = var_1498)[name = tensor<string, []>("k_19")];
            tensor<fp32, [1, 1, 16, 128]> var_1513 = mul(x = q_19, y = cos_r_1)[name = tensor<string, []>("op_1513")];
            tensor<int32, [4]> x1_13_begin_0 = const()[name = tensor<string, []>("x1_13_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_13_end_0 = const()[name = tensor<string, []>("x1_13_end_0"), val = tensor<int32, [4]>([1, 1, 16, 64])];
            tensor<bool, [4]> x1_13_end_mask_0 = const()[name = tensor<string, []>("x1_13_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp32, [1, 1, 16, 64]> x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = q_19)[name = tensor<string, []>("x1_13")];
            tensor<int32, [4]> x2_13_begin_0 = const()[name = tensor<string, []>("x2_13_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_13_end_0 = const()[name = tensor<string, []>("x2_13_end_0"), val = tensor<int32, [4]>([1, 1, 16, 128])];
            tensor<bool, [4]> x2_13_end_mask_0 = const()[name = tensor<string, []>("x2_13_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp32, [1, 1, 16, 64]> x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = q_19)[name = tensor<string, []>("x2_13")];
            tensor<fp32, []> const_36_promoted = const()[name = tensor<string, []>("const_36_promoted"), val = tensor<fp32, []>(-0x1p+0)];
            tensor<fp32, [1, 1, 16, 64]> var_1534 = mul(x = x2_13, y = const_36_promoted)[name = tensor<string, []>("op_1534")];
            tensor<int32, []> var_1536 = const()[name = tensor<string, []>("op_1536"), val = tensor<int32, []>(-1)];
            tensor<bool, []> var_1537_interleave_0 = const()[name = tensor<string, []>("op_1537_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 1, 16, 128]> var_1537 = concat(axis = var_1536, interleave = var_1537_interleave_0, values = (var_1534, x1_13))[name = tensor<string, []>("op_1537")];
            tensor<fp32, [1, 1, 16, 128]> var_1538 = mul(x = var_1537, y = sin_r_1)[name = tensor<string, []>("op_1538")];
            tensor<fp32, [1, 1, 16, 128]> q_23 = add(x = var_1513, y = var_1538)[name = tensor<string, []>("q_23")];
            tensor<fp32, [1, 1, 8, 128]> var_1541 = mul(x = k_19, y = cos_r_1)[name = tensor<string, []>("op_1541")];
            tensor<int32, [4]> x1_15_begin_0 = const()[name = tensor<string, []>("x1_15_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_15_end_0 = const()[name = tensor<string, []>("x1_15_end_0"), val = tensor<int32, [4]>([1, 1, 8, 64])];
            tensor<bool, [4]> x1_15_end_mask_0 = const()[name = tensor<string, []>("x1_15_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp32, [1, 1, 8, 64]> x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = k_19)[name = tensor<string, []>("x1_15")];
            tensor<int32, [4]> x2_15_begin_0 = const()[name = tensor<string, []>("x2_15_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_15_end_0 = const()[name = tensor<string, []>("x2_15_end_0"), val = tensor<int32, [4]>([1, 1, 8, 128])];
            tensor<bool, [4]> x2_15_end_mask_0 = const()[name = tensor<string, []>("x2_15_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp32, [1, 1, 8, 64]> x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = k_19)[name = tensor<string, []>("x2_15")];
            tensor<fp32, []> const_39_promoted = const()[name = tensor<string, []>("const_39_promoted"), val = tensor<fp32, []>(-0x1p+0)];
            tensor<fp32, [1, 1, 8, 64]> var_1562 = mul(x = x2_15, y = const_39_promoted)[name = tensor<string, []>("op_1562")];
            tensor<int32, []> var_1564 = const()[name = tensor<string, []>("op_1564"), val = tensor<int32, []>(-1)];
            tensor<bool, []> var_1565_interleave_0 = const()[name = tensor<string, []>("op_1565_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 1, 8, 128]> var_1565 = concat(axis = var_1564, interleave = var_1565_interleave_0, values = (var_1562, x1_15))[name = tensor<string, []>("op_1565")];
            tensor<fp32, [1, 1, 8, 128]> var_1566 = mul(x = var_1565, y = sin_r_1)[name = tensor<string, []>("op_1566")];
            tensor<fp32, [1, 1, 8, 128]> k_23 = add(x = var_1541, y = var_1566)[name = tensor<string, []>("k_23")];
            tensor<int32, [4]> var_1573 = const()[name = tensor<string, []>("op_1573"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
            tensor<fp32, [1, 1024, 1, 1]> nk_7 = reshape(shape = var_1573, x = k_23)[name = tensor<string, []>("nk_7")];
            tensor<int32, [4]> var_1579 = const()[name = tensor<string, []>("op_1579"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
            tensor<fp32, [1, 1024, 1, 1]> nv_7 = reshape(shape = var_1579, x = var_1454)[name = tensor<string, []>("nv_7")];
            tensor<fp32, [1, 1024, 1, 16]> var_1584 = mul(x = var_1373, y = var_473)[name = tensor<string, []>("op_1584")];
            tensor<fp32, [1, 1024, 1, 16]> var_1585 = mul(x = nk_7, y = update_mask)[name = tensor<string, []>("op_1585")];
            tensor<fp32, [1, 1024, 1, 16]> lkc_15 = add(x = var_1584, y = var_1585)[name = tensor<string, []>("lkc_15")];
            tensor<fp32, [1, 1024, 1, 16]> var_1591 = mul(x = var_1393, y = var_473)[name = tensor<string, []>("op_1591")];
            tensor<fp32, [1, 1024, 1, 16]> var_1592 = mul(x = nv_7, y = update_mask)[name = tensor<string, []>("op_1592")];
            tensor<fp32, [1, 1024, 1, 16]> lvc_15 = add(x = var_1591, y = var_1592)[name = tensor<string, []>("lvc_15")];
            tensor<int32, [1]> var_1596_axes_0 = const()[name = tensor<string, []>("op_1596_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 1024, 16]> var_1596 = squeeze(axes = var_1596_axes_0, x = lkc_15)[name = tensor<string, []>("op_1596")];
            tensor<int32, [4]> var_1601 = const()[name = tensor<string, []>("op_1601"), val = tensor<int32, [4]>([1, 8, 128, 16])];
            tensor<fp32, [1, 8, 128, 16]> kc_13 = reshape(shape = var_1601, x = var_1596)[name = tensor<string, []>("kc_13")];
            tensor<int32, [1]> var_1604_axes_0 = const()[name = tensor<string, []>("op_1604_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 1024, 16]> var_1604 = squeeze(axes = var_1604_axes_0, x = lvc_15)[name = tensor<string, []>("op_1604")];
            tensor<int32, [4]> var_1609 = const()[name = tensor<string, []>("op_1609"), val = tensor<int32, [4]>([1, 8, 128, 16])];
            tensor<fp32, [1, 8, 128, 16]> vc_13 = reshape(shape = var_1609, x = var_1604)[name = tensor<string, []>("vc_13")];
            tensor<int32, [1]> var_1612_axes_0 = const()[name = tensor<string, []>("op_1612_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 8, 1, 128, 16]> var_1612 = expand_dims(axes = var_1612_axes_0, x = kc_13)[name = tensor<string, []>("op_1612")];
            tensor<int32, [5]> var_1620_reps_0 = const()[name = tensor<string, []>("op_1620_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
            tensor<fp32, [1, 8, 2, 128, 16]> var_1620 = tile(reps = var_1620_reps_0, x = var_1612)[name = tensor<string, []>("op_1620")];
            tensor<int32, [4]> var_1625 = const()[name = tensor<string, []>("op_1625"), val = tensor<int32, [4]>([1, 16, 128, 16])];
            tensor<fp32, [1, 16, 128, 16]> kc_15 = reshape(shape = var_1625, x = var_1620)[name = tensor<string, []>("kc_15")];
            tensor<int32, [1]> var_1628_axes_0 = const()[name = tensor<string, []>("op_1628_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 8, 1, 128, 16]> var_1628 = expand_dims(axes = var_1628_axes_0, x = vc_13)[name = tensor<string, []>("op_1628")];
            tensor<int32, [5]> var_1636_reps_0 = const()[name = tensor<string, []>("op_1636_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
            tensor<fp32, [1, 8, 2, 128, 16]> var_1636 = tile(reps = var_1636_reps_0, x = var_1628)[name = tensor<string, []>("op_1636")];
            tensor<int32, [4]> var_1641 = const()[name = tensor<string, []>("op_1641"), val = tensor<int32, [4]>([1, 16, 128, 16])];
            tensor<fp32, [1, 16, 128, 16]> vc_15 = reshape(shape = var_1641, x = var_1636)[name = tensor<string, []>("vc_15")];
            tensor<int32, [4]> var_1645_perm_0 = const()[name = tensor<string, []>("op_1645_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_1646_transpose_x_0 = const()[name = tensor<string, []>("op_1646_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1646_transpose_y_0 = const()[name = tensor<string, []>("op_1646_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 16, 1, 128]> var_1645 = transpose(perm = var_1645_perm_0, x = q_23)[name = tensor<string, []>("transpose_3")];
            tensor<fp32, [1, 16, 1, 16]> var_1646 = matmul(transpose_x = var_1646_transpose_x_0, transpose_y = var_1646_transpose_y_0, x = var_1645, y = kc_15)[name = tensor<string, []>("op_1646")];
            tensor<fp32, []> _inversed_aw_25_y_0 = const()[name = tensor<string, []>("_inversed_aw_25_y_0"), val = tensor<fp32, []>(0x1.6a09e6p-4)];
            tensor<fp32, [1, 16, 1, 16]> _inversed_aw_25 = mul(x = var_1646, y = _inversed_aw_25_y_0)[name = tensor<string, []>("_inversed_aw_25")];
            tensor<fp32, [1, 16, 1, 16]> aw_27 = add(x = _inversed_aw_25, y = var_542)[name = tensor<string, []>("aw_27")];
            tensor<int32, []> var_1660 = const()[name = tensor<string, []>("op_1660"), val = tensor<int32, []>(-1)];
            tensor<fp32, [1, 16, 1, 16]> aw_31 = softmax(axis = var_1660, x = aw_27)[name = tensor<string, []>("aw_31")];
            tensor<bool, []> var_1666_transpose_x_1 = const()[name = tensor<string, []>("op_1666_transpose_x_1"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1666_transpose_y_1 = const()[name = tensor<string, []>("op_1666_transpose_y_1"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, 16, 1, 128]> var_1666 = matmul(transpose_x = var_1666_transpose_x_1, transpose_y = var_1666_transpose_y_1, x = aw_31, y = vc_15)[name = tensor<string, []>("op_1666")];
            tensor<int32, [4]> var_1669_perm_0 = const()[name = tensor<string, []>("op_1669_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1673 = const()[name = tensor<string, []>("op_1673"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp32, [1, 1, 16, 128]> var_1669 = transpose(perm = var_1669_perm_0, x = var_1666)[name = tensor<string, []>("transpose_2")];
            tensor<fp32, [1, 1, 2048]> input_33 = reshape(shape = var_1673, x = var_1669)[name = tensor<string, []>("input_33")];
            tensor<fp32, [1, 1, 1024]> var_1677 = linear(bias = linear_1_bias_0, weight = layers_3_self_attn_o_proj_weight_palettized, x = input_33)[name = tensor<string, []>("linear_24")];
            tensor<int32, [1]> var_1679_axes_0 = const()[name = tensor<string, []>("op_1679_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1024]> var_1679 = squeeze(axes = var_1679_axes_0, x = var_1677)[name = tensor<string, []>("op_1679")];
            tensor<int32, [1]> var_1681_axes_0 = const()[name = tensor<string, []>("op_1681_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1024]> var_1681 = squeeze(axes = var_1681_axes_0, x = var_1679)[name = tensor<string, []>("op_1681")];
            tensor<int32, [1]> var_1683_axes_0 = const()[name = tensor<string, []>("op_1683_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1]> var_1683 = expand_dims(axes = var_1683_axes_0, x = var_1681)[name = tensor<string, []>("op_1683")];
            tensor<int32, [1]> ao_7_axes_0 = const()[name = tensor<string, []>("ao_7_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1, 1]> ao_7 = expand_dims(axes = ao_7_axes_0, x = var_1683)[name = tensor<string, []>("ao_7")];
            tensor<fp32, [1, 1024, 1, 1]> hidden_13 = add(x = hidden_11, y = ao_7)[name = tensor<string, []>("hidden_13")];
            tensor<int32, [1]> var_1689_axes_0 = const()[name = tensor<string, []>("op_1689_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024, 1]> var_1689 = squeeze(axes = var_1689_axes_0, x = hidden_13)[name = tensor<string, []>("op_1689")];
            tensor<int32, [1]> var_1691_axes_0 = const()[name = tensor<string, []>("op_1691_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024]> var_1691 = squeeze(axes = var_1691_axes_0, x = var_1689)[name = tensor<string, []>("op_1691")];
            tensor<int32, [1]> hidden_states_61_axes_0 = const()[name = tensor<string, []>("hidden_states_61_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1, 1024]> hidden_states_61 = expand_dims(axes = hidden_states_61_axes_0, x = var_1691)[name = tensor<string, []>("hidden_states_61")];
            tensor<fp32, []> var_1697_promoted = const()[name = tensor<string, []>("op_1697_promoted"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, 1, 1024]> var_1703 = pow(x = hidden_states_61, y = var_1697_promoted)[name = tensor<string, []>("op_1703")];
            tensor<int32, [1]> variance_31_axes_0 = const()[name = tensor<string, []>("variance_31_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_31_keep_dims_0 = const()[name = tensor<string, []>("variance_31_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, 1, 1]> variance_31 = reduce_mean(axes = variance_31_axes_0, keep_dims = variance_31_keep_dims_0, x = var_1703)[name = tensor<string, []>("variance_31")];
            tensor<fp32, [1024]> const_40 = const()[name = tensor<string, []>("const_40"), val = tensor<fp32, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110216960)))];
            tensor<fp32, [1, 1, 1024]> var_1707 = mul(x = const_40, y = hidden_states_61)[name = tensor<string, []>("op_1707")];
            tensor<fp32, []> var_1708 = const()[name = tensor<string, []>("op_1708"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 1]> var_1709 = add(x = variance_31, y = var_1708)[name = tensor<string, []>("op_1709")];
            tensor<fp32, []> var_1710_epsilon_0 = const()[name = tensor<string, []>("op_1710_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, 1, 1]> var_1710 = rsqrt(epsilon = var_1710_epsilon_0, x = var_1709)[name = tensor<string, []>("op_1710")];
            tensor<fp32, [1, 1, 1024]> input_35 = mul(x = var_1707, y = var_1710)[name = tensor<string, []>("input_35")];
            tensor<fp32, [1, 1, 3072]> input_37 = linear(bias = linear_4_bias_0, weight = layers_3_mlp_gate_proj_weight_palettized, x = input_35)[name = tensor<string, []>("linear_25")];
            tensor<fp32, [1, 1, 3072]> var_1718 = silu(x = input_37)[name = tensor<string, []>("op_1718")];
            tensor<fp32, [1, 1, 3072]> var_1720 = linear(bias = linear_4_bias_0, weight = layers_3_mlp_up_proj_weight_palettized, x = input_35)[name = tensor<string, []>("linear_26")];
            tensor<fp32, [1, 1, 3072]> input_39 = mul(x = var_1718, y = var_1720)[name = tensor<string, []>("input_39")];
            tensor<fp32, [1, 1, 1024]> var_1723 = linear(bias = linear_1_bias_0, weight = layers_3_mlp_down_proj_weight_palettized, x = input_39)[name = tensor<string, []>("linear_27")];
            tensor<int32, [1]> var_1725_axes_0 = const()[name = tensor<string, []>("op_1725_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1024]> var_1725 = squeeze(axes = var_1725_axes_0, x = var_1723)[name = tensor<string, []>("op_1725")];
            tensor<int32, [1]> var_1727_axes_0 = const()[name = tensor<string, []>("op_1727_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1024]> var_1727 = squeeze(axes = var_1727_axes_0, x = var_1725)[name = tensor<string, []>("op_1727")];
            tensor<int32, [1]> var_1729_axes_0 = const()[name = tensor<string, []>("op_1729_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1]> var_1729 = expand_dims(axes = var_1729_axes_0, x = var_1727)[name = tensor<string, []>("op_1729")];
            tensor<int32, [1]> h_7_axes_0 = const()[name = tensor<string, []>("h_7_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1, 1]> h_7 = expand_dims(axes = h_7_axes_0, x = var_1729)[name = tensor<string, []>("h_7")];
            tensor<fp32, [1, 1024, 1, 1]> hidden_15 = add(x = hidden_13, y = h_7)[name = tensor<string, []>("hidden_15")];
            tensor<int32, [4]> var_1743_begin_0 = const()[name = tensor<string, []>("op_1743_begin_0"), val = tensor<int32, [4]>([0, 4096, 0, 0])];
            tensor<int32, [4]> var_1743_end_0 = const()[name = tensor<string, []>("op_1743_end_0"), val = tensor<int32, [4]>([1, 1, 1, 16])];
            tensor<bool, [4]> var_1743_end_mask_0 = const()[name = tensor<string, []>("op_1743_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp32, [1, 1024, 1, 16]> var_1743 = slice_by_index(begin = var_1743_begin_0, end = var_1743_end_0, end_mask = var_1743_end_mask_0, x = cast_1)[name = tensor<string, []>("op_1743")];
            tensor<int32, [4]> var_1763_begin_0 = const()[name = tensor<string, []>("op_1763_begin_0"), val = tensor<int32, [4]>([0, 4096, 0, 0])];
            tensor<int32, [4]> var_1763_end_0 = const()[name = tensor<string, []>("op_1763_end_0"), val = tensor<int32, [4]>([1, 1, 1, 16])];
            tensor<bool, [4]> var_1763_end_mask_0 = const()[name = tensor<string, []>("op_1763_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp32, [1, 1024, 1, 16]> var_1763 = slice_by_index(begin = var_1763_begin_0, end = var_1763_end_0, end_mask = var_1763_end_mask_0, x = cast_4)[name = tensor<string, []>("op_1763")];
            tensor<int32, [1]> var_1775_axes_0 = const()[name = tensor<string, []>("op_1775_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024, 1]> var_1775 = squeeze(axes = var_1775_axes_0, x = hidden_15)[name = tensor<string, []>("op_1775")];
            tensor<int32, [1]> var_1777_axes_0 = const()[name = tensor<string, []>("op_1777_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024]> var_1777 = squeeze(axes = var_1777_axes_0, x = var_1775)[name = tensor<string, []>("op_1777")];
            tensor<int32, [1]> hidden_states_65_axes_0 = const()[name = tensor<string, []>("hidden_states_65_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1, 1024]> hidden_states_65 = expand_dims(axes = hidden_states_65_axes_0, x = var_1777)[name = tensor<string, []>("hidden_states_65")];
            tensor<fp32, []> var_1783_promoted = const()[name = tensor<string, []>("op_1783_promoted"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, 1, 1024]> var_1789 = pow(x = hidden_states_65, y = var_1783_promoted)[name = tensor<string, []>("op_1789")];
            tensor<int32, [1]> variance_33_axes_0 = const()[name = tensor<string, []>("variance_33_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_33_keep_dims_0 = const()[name = tensor<string, []>("variance_33_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, 1, 1]> variance_33 = reduce_mean(axes = variance_33_axes_0, keep_dims = variance_33_keep_dims_0, x = var_1789)[name = tensor<string, []>("variance_33")];
            tensor<fp32, [1024]> const_41 = const()[name = tensor<string, []>("const_41"), val = tensor<fp32, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110221120)))];
            tensor<fp32, [1, 1, 1024]> var_1793 = mul(x = const_41, y = hidden_states_65)[name = tensor<string, []>("op_1793")];
            tensor<fp32, []> var_1794 = const()[name = tensor<string, []>("op_1794"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 1]> var_1795 = add(x = variance_33, y = var_1794)[name = tensor<string, []>("op_1795")];
            tensor<fp32, []> var_1796_epsilon_0 = const()[name = tensor<string, []>("op_1796_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, 1, 1]> var_1796 = rsqrt(epsilon = var_1796_epsilon_0, x = var_1795)[name = tensor<string, []>("op_1796")];
            tensor<fp32, [1, 1, 1024]> input_41 = mul(x = var_1793, y = var_1796)[name = tensor<string, []>("input_41")];
            tensor<fp32, [1, 1, 2048]> var_1800 = linear(bias = linear_0_bias_0, weight = layers_4_self_attn_q_proj_weight_palettized, x = input_41)[name = tensor<string, []>("linear_28")];
            tensor<int32, [4]> var_1805 = const()[name = tensor<string, []>("op_1805"), val = tensor<int32, [4]>([1, 1, 16, 128])];
            tensor<fp32, [1, 1, 16, 128]> var_1806 = reshape(shape = var_1805, x = var_1800)[name = tensor<string, []>("op_1806")];
            tensor<fp32, [1, 1, 1024]> var_1812 = linear(bias = linear_1_bias_0, weight = layers_4_self_attn_k_proj_weight_palettized, x = input_41)[name = tensor<string, []>("linear_29")];
            tensor<int32, [4]> var_1817 = const()[name = tensor<string, []>("op_1817"), val = tensor<int32, [4]>([1, 1, 8, 128])];
            tensor<fp32, [1, 1, 8, 128]> var_1818 = reshape(shape = var_1817, x = var_1812)[name = tensor<string, []>("op_1818")];
            tensor<fp32, [1, 1, 1024]> var_1824 = linear(bias = linear_1_bias_0, weight = layers_4_self_attn_v_proj_weight_palettized, x = input_41)[name = tensor<string, []>("linear_30")];
            tensor<fp32, []> var_1837_promoted = const()[name = tensor<string, []>("op_1837_promoted"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, 1, 16, 128]> var_1843 = pow(x = var_1806, y = var_1837_promoted)[name = tensor<string, []>("op_1843")];
            tensor<bool, []> variance_35_keep_dims_0 = const()[name = tensor<string, []>("variance_35_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<int32, [1]> const_68 = const()[name = tensor<string, []>("const_68"), val = tensor<int32, [1]>([3])];
            tensor<fp32, [1, 1, 16, 1]> variance_35 = reduce_mean(axes = const_68, keep_dims = variance_35_keep_dims_0, x = var_1843)[name = tensor<string, []>("variance_35")];
            tensor<fp32, [1, 1, 1, 128]> const_69 = const()[name = tensor<string, []>("const_69"), val = tensor<fp32, [1, 1, 1, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110225280)))];
            tensor<fp32, [1, 1, 16, 128]> var_1847 = mul(x = const_69, y = var_1806)[name = tensor<string, []>("op_1847")];
            tensor<fp32, []> var_1848 = const()[name = tensor<string, []>("op_1848"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 16, 1]> var_1849 = add(x = variance_35, y = var_1848)[name = tensor<string, []>("op_1849")];
            tensor<fp32, []> var_1850_epsilon_0 = const()[name = tensor<string, []>("op_1850_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, 1, 16, 1]> var_1850 = rsqrt(epsilon = var_1850_epsilon_0, x = var_1849)[name = tensor<string, []>("op_1850")];
            tensor<fp32, [1, 1, 16, 128]> q_25 = mul(x = var_1847, y = var_1850)[name = tensor<string, []>("q_25")];
            tensor<fp32, []> var_1855_promoted = const()[name = tensor<string, []>("op_1855_promoted"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, 1, 8, 128]> var_1861 = pow(x = var_1818, y = var_1855_promoted)[name = tensor<string, []>("op_1861")];
            tensor<bool, []> variance_37_keep_dims_0 = const()[name = tensor<string, []>("variance_37_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<int32, [1]> const_70 = const()[name = tensor<string, []>("const_70"), val = tensor<int32, [1]>([3])];
            tensor<fp32, [1, 1, 8, 1]> variance_37 = reduce_mean(axes = const_70, keep_dims = variance_37_keep_dims_0, x = var_1861)[name = tensor<string, []>("variance_37")];
            tensor<fp32, [1, 1, 1, 128]> const_71 = const()[name = tensor<string, []>("const_71"), val = tensor<fp32, [1, 1, 1, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110225856)))];
            tensor<fp32, [1, 1, 8, 128]> var_1865 = mul(x = const_71, y = var_1818)[name = tensor<string, []>("op_1865")];
            tensor<fp32, []> var_1866 = const()[name = tensor<string, []>("op_1866"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 8, 1]> var_1867 = add(x = variance_37, y = var_1866)[name = tensor<string, []>("op_1867")];
            tensor<fp32, []> var_1868_epsilon_0 = const()[name = tensor<string, []>("op_1868_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, 1, 8, 1]> var_1868 = rsqrt(epsilon = var_1868_epsilon_0, x = var_1867)[name = tensor<string, []>("op_1868")];
            tensor<fp32, [1, 1, 8, 128]> k_25 = mul(x = var_1865, y = var_1868)[name = tensor<string, []>("k_25")];
            tensor<fp32, [1, 1, 16, 128]> var_1883 = mul(x = q_25, y = cos_r_1)[name = tensor<string, []>("op_1883")];
            tensor<int32, [4]> x1_17_begin_0 = const()[name = tensor<string, []>("x1_17_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_17_end_0 = const()[name = tensor<string, []>("x1_17_end_0"), val = tensor<int32, [4]>([1, 1, 16, 64])];
            tensor<bool, [4]> x1_17_end_mask_0 = const()[name = tensor<string, []>("x1_17_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp32, [1, 1, 16, 64]> x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = q_25)[name = tensor<string, []>("x1_17")];
            tensor<int32, [4]> x2_17_begin_0 = const()[name = tensor<string, []>("x2_17_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_17_end_0 = const()[name = tensor<string, []>("x2_17_end_0"), val = tensor<int32, [4]>([1, 1, 16, 128])];
            tensor<bool, [4]> x2_17_end_mask_0 = const()[name = tensor<string, []>("x2_17_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp32, [1, 1, 16, 64]> x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = q_25)[name = tensor<string, []>("x2_17")];
            tensor<fp32, []> const_46_promoted = const()[name = tensor<string, []>("const_46_promoted"), val = tensor<fp32, []>(-0x1p+0)];
            tensor<fp32, [1, 1, 16, 64]> var_1904 = mul(x = x2_17, y = const_46_promoted)[name = tensor<string, []>("op_1904")];
            tensor<int32, []> var_1906 = const()[name = tensor<string, []>("op_1906"), val = tensor<int32, []>(-1)];
            tensor<bool, []> var_1907_interleave_0 = const()[name = tensor<string, []>("op_1907_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 1, 16, 128]> var_1907 = concat(axis = var_1906, interleave = var_1907_interleave_0, values = (var_1904, x1_17))[name = tensor<string, []>("op_1907")];
            tensor<fp32, [1, 1, 16, 128]> var_1908 = mul(x = var_1907, y = sin_r_1)[name = tensor<string, []>("op_1908")];
            tensor<fp32, [1, 1, 16, 128]> q = add(x = var_1883, y = var_1908)[name = tensor<string, []>("q")];
            tensor<fp32, [1, 1, 8, 128]> var_1911 = mul(x = k_25, y = cos_r_1)[name = tensor<string, []>("op_1911")];
            tensor<int32, [4]> x1_begin_0 = const()[name = tensor<string, []>("x1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_end_0 = const()[name = tensor<string, []>("x1_end_0"), val = tensor<int32, [4]>([1, 1, 8, 64])];
            tensor<bool, [4]> x1_end_mask_0 = const()[name = tensor<string, []>("x1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp32, [1, 1, 8, 64]> x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = k_25)[name = tensor<string, []>("x1")];
            tensor<int32, [4]> x2_begin_0 = const()[name = tensor<string, []>("x2_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_end_0 = const()[name = tensor<string, []>("x2_end_0"), val = tensor<int32, [4]>([1, 1, 8, 128])];
            tensor<bool, [4]> x2_end_mask_0 = const()[name = tensor<string, []>("x2_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp32, [1, 1, 8, 64]> x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = k_25)[name = tensor<string, []>("x2")];
            tensor<fp32, []> const_49_promoted = const()[name = tensor<string, []>("const_49_promoted"), val = tensor<fp32, []>(-0x1p+0)];
            tensor<fp32, [1, 1, 8, 64]> var_1932 = mul(x = x2, y = const_49_promoted)[name = tensor<string, []>("op_1932")];
            tensor<int32, []> var_1934 = const()[name = tensor<string, []>("op_1934"), val = tensor<int32, []>(-1)];
            tensor<bool, []> var_1935_interleave_0 = const()[name = tensor<string, []>("op_1935_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 1, 8, 128]> var_1935 = concat(axis = var_1934, interleave = var_1935_interleave_0, values = (var_1932, x1))[name = tensor<string, []>("op_1935")];
            tensor<fp32, [1, 1, 8, 128]> var_1936 = mul(x = var_1935, y = sin_r_1)[name = tensor<string, []>("op_1936")];
            tensor<fp32, [1, 1, 8, 128]> k = add(x = var_1911, y = var_1936)[name = tensor<string, []>("k")];
            tensor<int32, [4]> var_1943 = const()[name = tensor<string, []>("op_1943"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
            tensor<fp32, [1, 1024, 1, 1]> nk = reshape(shape = var_1943, x = k)[name = tensor<string, []>("nk")];
            tensor<int32, [4]> var_1949 = const()[name = tensor<string, []>("op_1949"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
            tensor<fp32, [1, 1024, 1, 1]> nv = reshape(shape = var_1949, x = var_1824)[name = tensor<string, []>("nv")];
            tensor<fp32, [1, 1024, 1, 16]> var_1954 = mul(x = var_1743, y = var_473)[name = tensor<string, []>("op_1954")];
            tensor<fp32, [1, 1024, 1, 16]> var_1955 = mul(x = nk, y = update_mask)[name = tensor<string, []>("op_1955")];
            tensor<fp32, [1, 1024, 1, 16]> lkc = add(x = var_1954, y = var_1955)[name = tensor<string, []>("lkc")];
            tensor<fp32, [1, 1024, 1, 16]> var_1961 = mul(x = var_1763, y = var_473)[name = tensor<string, []>("op_1961")];
            tensor<fp32, [1, 1024, 1, 16]> var_1962 = mul(x = nv, y = update_mask)[name = tensor<string, []>("op_1962")];
            tensor<fp32, [1, 1024, 1, 16]> lvc = add(x = var_1961, y = var_1962)[name = tensor<string, []>("lvc")];
            tensor<int32, [1]> var_1966_axes_0 = const()[name = tensor<string, []>("op_1966_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 1024, 16]> var_1966 = squeeze(axes = var_1966_axes_0, x = lkc)[name = tensor<string, []>("op_1966")];
            tensor<int32, [4]> var_1971 = const()[name = tensor<string, []>("op_1971"), val = tensor<int32, [4]>([1, 8, 128, 16])];
            tensor<fp32, [1, 8, 128, 16]> kc_17 = reshape(shape = var_1971, x = var_1966)[name = tensor<string, []>("kc_17")];
            tensor<int32, [1]> var_1974_axes_0 = const()[name = tensor<string, []>("op_1974_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 1024, 16]> var_1974 = squeeze(axes = var_1974_axes_0, x = lvc)[name = tensor<string, []>("op_1974")];
            tensor<int32, [4]> var_1979 = const()[name = tensor<string, []>("op_1979"), val = tensor<int32, [4]>([1, 8, 128, 16])];
            tensor<fp32, [1, 8, 128, 16]> vc_17 = reshape(shape = var_1979, x = var_1974)[name = tensor<string, []>("vc_17")];
            tensor<int32, [1]> var_1982_axes_0 = const()[name = tensor<string, []>("op_1982_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 8, 1, 128, 16]> var_1982 = expand_dims(axes = var_1982_axes_0, x = kc_17)[name = tensor<string, []>("op_1982")];
            tensor<int32, [5]> var_1990_reps_0 = const()[name = tensor<string, []>("op_1990_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
            tensor<fp32, [1, 8, 2, 128, 16]> var_1990 = tile(reps = var_1990_reps_0, x = var_1982)[name = tensor<string, []>("op_1990")];
            tensor<int32, [4]> var_1995 = const()[name = tensor<string, []>("op_1995"), val = tensor<int32, [4]>([1, 16, 128, 16])];
            tensor<fp32, [1, 16, 128, 16]> kc = reshape(shape = var_1995, x = var_1990)[name = tensor<string, []>("kc")];
            tensor<int32, [1]> var_1998_axes_0 = const()[name = tensor<string, []>("op_1998_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp32, [1, 8, 1, 128, 16]> var_1998 = expand_dims(axes = var_1998_axes_0, x = vc_17)[name = tensor<string, []>("op_1998")];
            tensor<int32, [5]> var_2006_reps_0 = const()[name = tensor<string, []>("op_2006_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
            tensor<fp32, [1, 8, 2, 128, 16]> var_2006 = tile(reps = var_2006_reps_0, x = var_1998)[name = tensor<string, []>("op_2006")];
            tensor<int32, [4]> var_2011 = const()[name = tensor<string, []>("op_2011"), val = tensor<int32, [4]>([1, 16, 128, 16])];
            tensor<fp32, [1, 16, 128, 16]> vc = reshape(shape = var_2011, x = var_2006)[name = tensor<string, []>("vc")];
            tensor<int32, [4]> var_2015_perm_0 = const()[name = tensor<string, []>("op_2015_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_2016_transpose_x_0 = const()[name = tensor<string, []>("op_2016_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2016_transpose_y_0 = const()[name = tensor<string, []>("op_2016_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 16, 1, 128]> var_2015 = transpose(perm = var_2015_perm_0, x = q)[name = tensor<string, []>("transpose_1")];
            tensor<fp32, [1, 16, 1, 16]> var_2016 = matmul(transpose_x = var_2016_transpose_x_0, transpose_y = var_2016_transpose_y_0, x = var_2015, y = kc)[name = tensor<string, []>("op_2016")];
            tensor<fp32, []> _inversed_aw_33_y_0 = const()[name = tensor<string, []>("_inversed_aw_33_y_0"), val = tensor<fp32, []>(0x1.6a09e6p-4)];
            tensor<fp32, [1, 16, 1, 16]> _inversed_aw_33 = mul(x = var_2016, y = _inversed_aw_33_y_0)[name = tensor<string, []>("_inversed_aw_33")];
            tensor<fp32, [1, 16, 1, 16]> aw_35 = add(x = _inversed_aw_33, y = var_542)[name = tensor<string, []>("aw_35")];
            tensor<int32, []> var_2030 = const()[name = tensor<string, []>("op_2030"), val = tensor<int32, []>(-1)];
            tensor<fp32, [1, 16, 1, 16]> aw = softmax(axis = var_2030, x = aw_35)[name = tensor<string, []>("aw")];
            tensor<bool, []> var_2036_transpose_x_1 = const()[name = tensor<string, []>("op_2036_transpose_x_1"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2036_transpose_y_1 = const()[name = tensor<string, []>("op_2036_transpose_y_1"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, 16, 1, 128]> var_2036 = matmul(transpose_x = var_2036_transpose_x_1, transpose_y = var_2036_transpose_y_1, x = aw, y = vc)[name = tensor<string, []>("op_2036")];
            tensor<int32, [4]> var_2039_perm_0 = const()[name = tensor<string, []>("op_2039_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_2043 = const()[name = tensor<string, []>("op_2043"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp32, [1, 1, 16, 128]> var_2039 = transpose(perm = var_2039_perm_0, x = var_2036)[name = tensor<string, []>("transpose_0")];
            tensor<fp32, [1, 1, 2048]> input_43 = reshape(shape = var_2043, x = var_2039)[name = tensor<string, []>("input_43")];
            tensor<fp32, [1, 1, 1024]> var_2047 = linear(bias = linear_1_bias_0, weight = layers_4_self_attn_o_proj_weight_palettized, x = input_43)[name = tensor<string, []>("linear_31")];
            tensor<int32, [1]> var_2049_axes_0 = const()[name = tensor<string, []>("op_2049_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1024]> var_2049 = squeeze(axes = var_2049_axes_0, x = var_2047)[name = tensor<string, []>("op_2049")];
            tensor<int32, [1]> var_2051_axes_0 = const()[name = tensor<string, []>("op_2051_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1024]> var_2051 = squeeze(axes = var_2051_axes_0, x = var_2049)[name = tensor<string, []>("op_2051")];
            tensor<int32, [1]> var_2053_axes_0 = const()[name = tensor<string, []>("op_2053_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1]> var_2053 = expand_dims(axes = var_2053_axes_0, x = var_2051)[name = tensor<string, []>("op_2053")];
            tensor<int32, [1]> ao_axes_0 = const()[name = tensor<string, []>("ao_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1, 1]> ao = expand_dims(axes = ao_axes_0, x = var_2053)[name = tensor<string, []>("ao")];
            tensor<fp32, [1, 1024, 1, 1]> hidden_17 = add(x = hidden_15, y = ao)[name = tensor<string, []>("hidden_17")];
            tensor<int32, [1]> var_2059_axes_0 = const()[name = tensor<string, []>("op_2059_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024, 1]> var_2059 = squeeze(axes = var_2059_axes_0, x = hidden_17)[name = tensor<string, []>("op_2059")];
            tensor<int32, [1]> var_2061_axes_0 = const()[name = tensor<string, []>("op_2061_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024]> var_2061 = squeeze(axes = var_2061_axes_0, x = var_2059)[name = tensor<string, []>("op_2061")];
            tensor<int32, [1]> hidden_states_77_axes_0 = const()[name = tensor<string, []>("hidden_states_77_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1, 1024]> hidden_states_77 = expand_dims(axes = hidden_states_77_axes_0, x = var_2061)[name = tensor<string, []>("hidden_states_77")];
            tensor<fp32, []> var_2067_promoted = const()[name = tensor<string, []>("op_2067_promoted"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, 1, 1024]> var_2073 = pow(x = hidden_states_77, y = var_2067_promoted)[name = tensor<string, []>("op_2073")];
            tensor<int32, [1]> variance_39_axes_0 = const()[name = tensor<string, []>("variance_39_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_39_keep_dims_0 = const()[name = tensor<string, []>("variance_39_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, 1, 1]> variance_39 = reduce_mean(axes = variance_39_axes_0, keep_dims = variance_39_keep_dims_0, x = var_2073)[name = tensor<string, []>("variance_39")];
            tensor<fp32, [1024]> const_50 = const()[name = tensor<string, []>("const_50"), val = tensor<fp32, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110226432)))];
            tensor<fp32, [1, 1, 1024]> var_2077 = mul(x = const_50, y = hidden_states_77)[name = tensor<string, []>("op_2077")];
            tensor<fp32, []> var_2078 = const()[name = tensor<string, []>("op_2078"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 1]> var_2079 = add(x = variance_39, y = var_2078)[name = tensor<string, []>("op_2079")];
            tensor<fp32, []> var_2080_epsilon_0 = const()[name = tensor<string, []>("op_2080_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, 1, 1]> var_2080 = rsqrt(epsilon = var_2080_epsilon_0, x = var_2079)[name = tensor<string, []>("op_2080")];
            tensor<fp32, [1, 1, 1024]> input_45 = mul(x = var_2077, y = var_2080)[name = tensor<string, []>("input_45")];
            tensor<fp32, [1, 1, 3072]> input_47 = linear(bias = linear_4_bias_0, weight = layers_4_mlp_gate_proj_weight_palettized, x = input_45)[name = tensor<string, []>("linear_32")];
            tensor<fp32, [1, 1, 3072]> var_2088 = silu(x = input_47)[name = tensor<string, []>("op_2088")];
            tensor<fp32, [1, 1, 3072]> var_2090 = linear(bias = linear_4_bias_0, weight = layers_4_mlp_up_proj_weight_palettized, x = input_45)[name = tensor<string, []>("linear_33")];
            tensor<fp32, [1, 1, 3072]> input_49 = mul(x = var_2088, y = var_2090)[name = tensor<string, []>("input_49")];
            tensor<fp32, [1, 1, 1024]> var_2093 = linear(bias = linear_1_bias_0, weight = layers_4_mlp_down_proj_weight_palettized, x = input_49)[name = tensor<string, []>("linear_34")];
            tensor<int32, [1]> var_2095_axes_0 = const()[name = tensor<string, []>("op_2095_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1024]> var_2095 = squeeze(axes = var_2095_axes_0, x = var_2093)[name = tensor<string, []>("op_2095")];
            tensor<int32, [1]> var_2097_axes_0 = const()[name = tensor<string, []>("op_2097_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1024]> var_2097 = squeeze(axes = var_2097_axes_0, x = var_2095)[name = tensor<string, []>("op_2097")];
            tensor<int32, [1]> var_2099_axes_0 = const()[name = tensor<string, []>("op_2099_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1]> var_2099 = expand_dims(axes = var_2099_axes_0, x = var_2097)[name = tensor<string, []>("op_2099")];
            tensor<int32, [1]> h_axes_0 = const()[name = tensor<string, []>("h_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1024, 1, 1]> h = expand_dims(axes = h_axes_0, x = var_2099)[name = tensor<string, []>("h")];
            tensor<fp32, [1, 1024, 1, 1]> hidden = add(x = hidden_17, y = h)[name = tensor<string, []>("hidden")];
            tensor<int32, [1]> var_2105_axes_0 = const()[name = tensor<string, []>("op_2105_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024, 1]> var_2105 = squeeze(axes = var_2105_axes_0, x = hidden)[name = tensor<string, []>("op_2105")];
            tensor<int32, [1]> var_2107_axes_0 = const()[name = tensor<string, []>("op_2107_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024]> var_2107 = squeeze(axes = var_2107_axes_0, x = var_2105)[name = tensor<string, []>("op_2107")];
            tensor<int32, [1]> hidden_states_81_axes_0 = const()[name = tensor<string, []>("hidden_states_81_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1, 1024]> hidden_states_81 = expand_dims(axes = hidden_states_81_axes_0, x = var_2107)[name = tensor<string, []>("hidden_states_81")];
            tensor<fp32, []> var_2113_promoted = const()[name = tensor<string, []>("op_2113_promoted"), val = tensor<fp32, []>(0x1p+1)];
            tensor<fp32, [1, 1, 1024]> var_2119 = pow(x = hidden_states_81, y = var_2113_promoted)[name = tensor<string, []>("op_2119")];
            tensor<int32, [1]> variance_axes_0 = const()[name = tensor<string, []>("variance_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<bool, []> variance_keep_dims_0 = const()[name = tensor<string, []>("variance_keep_dims_0"), val = tensor<bool, []>(true)];
            tensor<fp32, [1, 1, 1]> variance = reduce_mean(axes = variance_axes_0, keep_dims = variance_keep_dims_0, x = var_2119)[name = tensor<string, []>("variance")];
            tensor<fp32, [1024]> const_51 = const()[name = tensor<string, []>("const_51"), val = tensor<fp32, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110230592)))];
            tensor<fp32, [1, 1, 1024]> var_2123 = mul(x = const_51, y = hidden_states_81)[name = tensor<string, []>("op_2123")];
            tensor<fp32, []> var_2124 = const()[name = tensor<string, []>("op_2124"), val = tensor<fp32, []>(0x1.0c6f7ap-20)];
            tensor<fp32, [1, 1, 1]> var_2125 = add(x = variance, y = var_2124)[name = tensor<string, []>("op_2125")];
            tensor<fp32, []> var_2126_epsilon_0 = const()[name = tensor<string, []>("op_2126_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
            tensor<fp32, [1, 1, 1]> var_2126 = rsqrt(epsilon = var_2126_epsilon_0, x = var_2125)[name = tensor<string, []>("op_2126")];
            tensor<fp32, [1, 1, 1024]> input = mul(x = var_2123, y = var_2126)[name = tensor<string, []>("input")];
            tensor<fp32, [1, 1, 2048]> var_2130 = linear(bias = linear_0_bias_0, weight = lm_heads_0_weight_palettized, x = input)[name = tensor<string, []>("linear_35")];
            tensor<int32, [1]> var_2132_axes_0 = const()[name = tensor<string, []>("op_2132_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2132 = squeeze(axes = var_2132_axes_0, x = var_2130)[name = tensor<string, []>("op_2132")];
            tensor<fp32, [1, 1, 2048]> var_2135 = linear(bias = linear_0_bias_0, weight = lm_heads_1_weight_palettized, x = input)[name = tensor<string, []>("linear_36")];
            tensor<int32, [1]> var_2137_axes_0 = const()[name = tensor<string, []>("op_2137_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2137 = squeeze(axes = var_2137_axes_0, x = var_2135)[name = tensor<string, []>("op_2137")];
            tensor<fp32, [1, 1, 2048]> var_2140 = linear(bias = linear_0_bias_0, weight = lm_heads_2_weight_palettized, x = input)[name = tensor<string, []>("linear_37")];
            tensor<int32, [1]> var_2142_axes_0 = const()[name = tensor<string, []>("op_2142_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2142 = squeeze(axes = var_2142_axes_0, x = var_2140)[name = tensor<string, []>("op_2142")];
            tensor<fp32, [1, 1, 2048]> var_2145 = linear(bias = linear_0_bias_0, weight = lm_heads_3_weight_palettized, x = input)[name = tensor<string, []>("linear_38")];
            tensor<int32, [1]> var_2147_axes_0 = const()[name = tensor<string, []>("op_2147_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2147 = squeeze(axes = var_2147_axes_0, x = var_2145)[name = tensor<string, []>("op_2147")];
            tensor<fp32, [1, 1, 2048]> var_2150 = linear(bias = linear_0_bias_0, weight = lm_heads_4_weight_palettized, x = input)[name = tensor<string, []>("linear_39")];
            tensor<int32, [1]> var_2152_axes_0 = const()[name = tensor<string, []>("op_2152_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2152 = squeeze(axes = var_2152_axes_0, x = var_2150)[name = tensor<string, []>("op_2152")];
            tensor<fp32, [1, 1, 2048]> var_2155 = linear(bias = linear_0_bias_0, weight = lm_heads_5_weight_palettized, x = input)[name = tensor<string, []>("linear_40")];
            tensor<int32, [1]> var_2157_axes_0 = const()[name = tensor<string, []>("op_2157_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2157 = squeeze(axes = var_2157_axes_0, x = var_2155)[name = tensor<string, []>("op_2157")];
            tensor<fp32, [1, 1, 2048]> var_2160 = linear(bias = linear_0_bias_0, weight = lm_heads_6_weight_palettized, x = input)[name = tensor<string, []>("linear_41")];
            tensor<int32, [1]> var_2162_axes_0 = const()[name = tensor<string, []>("op_2162_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2162 = squeeze(axes = var_2162_axes_0, x = var_2160)[name = tensor<string, []>("op_2162")];
            tensor<fp32, [1, 1, 2048]> var_2165 = linear(bias = linear_0_bias_0, weight = lm_heads_7_weight_palettized, x = input)[name = tensor<string, []>("linear_42")];
            tensor<int32, [1]> var_2167_axes_0 = const()[name = tensor<string, []>("op_2167_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2167 = squeeze(axes = var_2167_axes_0, x = var_2165)[name = tensor<string, []>("op_2167")];
            tensor<fp32, [1, 1, 2048]> var_2170 = linear(bias = linear_0_bias_0, weight = lm_heads_8_weight_palettized, x = input)[name = tensor<string, []>("linear_43")];
            tensor<int32, [1]> var_2172_axes_0 = const()[name = tensor<string, []>("op_2172_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2172 = squeeze(axes = var_2172_axes_0, x = var_2170)[name = tensor<string, []>("op_2172")];
            tensor<fp32, [1, 1, 2048]> var_2175 = linear(bias = linear_0_bias_0, weight = lm_heads_9_weight_palettized, x = input)[name = tensor<string, []>("linear_44")];
            tensor<int32, [1]> var_2177_axes_0 = const()[name = tensor<string, []>("op_2177_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2177 = squeeze(axes = var_2177_axes_0, x = var_2175)[name = tensor<string, []>("op_2177")];
            tensor<fp32, [1, 1, 2048]> var_2180 = linear(bias = linear_0_bias_0, weight = lm_heads_10_weight_palettized, x = input)[name = tensor<string, []>("linear_45")];
            tensor<int32, [1]> var_2182_axes_0 = const()[name = tensor<string, []>("op_2182_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2182 = squeeze(axes = var_2182_axes_0, x = var_2180)[name = tensor<string, []>("op_2182")];
            tensor<fp32, [1, 1, 2048]> var_2185 = linear(bias = linear_0_bias_0, weight = lm_heads_11_weight_palettized, x = input)[name = tensor<string, []>("linear_46")];
            tensor<int32, [1]> var_2187_axes_0 = const()[name = tensor<string, []>("op_2187_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2187 = squeeze(axes = var_2187_axes_0, x = var_2185)[name = tensor<string, []>("op_2187")];
            tensor<fp32, [1, 1, 2048]> var_2190 = linear(bias = linear_0_bias_0, weight = lm_heads_12_weight_palettized, x = input)[name = tensor<string, []>("linear_47")];
            tensor<int32, [1]> var_2192_axes_0 = const()[name = tensor<string, []>("op_2192_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2192 = squeeze(axes = var_2192_axes_0, x = var_2190)[name = tensor<string, []>("op_2192")];
            tensor<fp32, [1, 1, 2048]> var_2195 = linear(bias = linear_0_bias_0, weight = lm_heads_13_weight_palettized, x = input)[name = tensor<string, []>("linear_48")];
            tensor<int32, [1]> var_2197_axes_0 = const()[name = tensor<string, []>("op_2197_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2197 = squeeze(axes = var_2197_axes_0, x = var_2195)[name = tensor<string, []>("op_2197")];
            tensor<fp32, [1, 1, 2048]> var_2200 = linear(bias = linear_0_bias_0, weight = lm_heads_14_weight_palettized, x = input)[name = tensor<string, []>("linear_49")];
            tensor<int32, [1]> var_2202_axes_0 = const()[name = tensor<string, []>("op_2202_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp32, [1, 2048]> var_2202 = squeeze(axes = var_2202_axes_0, x = var_2200)[name = tensor<string, []>("op_2202")];
            tensor<int32, []> var_2205_axis_0 = const()[name = tensor<string, []>("op_2205_axis_0"), val = tensor<int32, []>(1)];
            tensor<fp32, [1, 15, 2048]> all_logits_type_fp32 = stack(axis = var_2205_axis_0, values = (var_2132, var_2137, var_2142, var_2147, var_2152, var_2157, var_2162, var_2167, var_2172, var_2177, var_2182, var_2187, var_2192, var_2197, var_2202))[name = tensor<string, []>("op_2205")];
            tensor<int32, [1]> var_2207_axes_0 = const()[name = tensor<string, []>("op_2207_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp32, [1, 1024]> var_2207 = squeeze(axes = var_2207_axes_0, x = input)[name = tensor<string, []>("op_2207")];
            tensor<int32, [1]> var_2209_axes_0 = const()[name = tensor<string, []>("op_2209_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024, 1]> var_2209 = expand_dims(axes = var_2209_axes_0, x = var_2207)[name = tensor<string, []>("op_2209")];
            tensor<int32, [1]> var_2211_axes_0 = const()[name = tensor<string, []>("op_2211_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp32, [1, 1024, 1, 1]> hidden_states_type_fp32 = expand_dims(axes = var_2211_axes_0, x = var_2209)[name = tensor<string, []>("op_2211")];
            tensor<int32, []> var_2213 = const()[name = tensor<string, []>("op_2213"), val = tensor<int32, []>(1)];
            tensor<bool, []> new_kv_k_interleave_0 = const()[name = tensor<string, []>("new_kv_k_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 5120, 1, 1]> new_kv_k = concat(axis = var_2213, interleave = new_kv_k_interleave_0, values = (nk_1, nk_3, nk_5, nk_7, nk))[name = tensor<string, []>("new_kv_k")];
            tensor<int32, []> var_2216 = const()[name = tensor<string, []>("op_2216"), val = tensor<int32, []>(1)];
            tensor<bool, []> new_kv_v_interleave_0 = const()[name = tensor<string, []>("new_kv_v_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp32, [1, 5120, 1, 1]> new_kv_v = concat(axis = var_2216, interleave = new_kv_v_interleave_0, values = (nv_1, nv_3, nv_5, nv_7, nv))[name = tensor<string, []>("new_kv_v")];
            tensor<fp32, [1, 5120, 1, 16]> var_2221 = mul(x = cast_1, y = var_473)[name = tensor<string, []>("op_2221")];
            tensor<fp32, [1, 5120, 1, 16]> var_2222 = mul(x = new_kv_k, y = update_mask)[name = tensor<string, []>("op_2222")];
            tensor<fp32, [1, 5120, 1, 16]> new_key_cache_type_fp32 = add(x = var_2221, y = var_2222)[name = tensor<string, []>("op_2224")];
            tensor<fp32, [1, 5120, 1, 16]> var_2228 = mul(x = cast_4, y = var_473)[name = tensor<string, []>("op_2228")];
            tensor<fp32, [1, 5120, 1, 16]> var_2229 = mul(x = new_kv_v, y = update_mask)[name = tensor<string, []>("op_2229")];
            tensor<fp32, [1, 5120, 1, 16]> new_value_cache_type_fp32 = add(x = var_2228, y = var_2229)[name = tensor<string, []>("op_2231")];
            tensor<string, []> cast_65_dtype_0 = const()[name = tensor<string, []>("cast_65_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<string, []> cast_66_dtype_0 = const()[name = tensor<string, []>("cast_66_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<string, []> cast_67_dtype_0 = const()[name = tensor<string, []>("cast_67_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<string, []> cast_68_dtype_0 = const()[name = tensor<string, []>("cast_68_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 15, 2048]> all_logits = cast(dtype = cast_65_dtype_0, x = all_logits_type_fp32)[name = tensor<string, []>("cast_0")];
            tensor<fp16, [1, 1024, 1, 1]> hidden_states = cast(dtype = cast_66_dtype_0, x = hidden_states_type_fp32)[name = tensor<string, []>("cast_1")];
            tensor<fp16, [1, 5120, 1, 16]> new_key_cache = cast(dtype = cast_67_dtype_0, x = new_key_cache_type_fp32)[name = tensor<string, []>("cast_2")];
            tensor<fp16, [1, 5120, 1, 16]> new_value_cache = cast(dtype = cast_68_dtype_0, x = new_value_cache_type_fp32)[name = tensor<string, []>("cast_3")];
        } -> (all_logits, hidden_states, new_key_cache, new_value_cache);
}