Upload folder using huggingface_hub

25f89bc verified 27 days ago

397 kB

	program(1.3)
	[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.11.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})]
	{
	func main<ios19>(tensor<fp16, [1, 1, 1, 2048]> causal_mask, tensor<fp16, [10, 1024, 3]> conv_state_in, tensor<int32, [1, 1]> input_ids, state<tensor<fp16, [12, 8, 2048, 64]>> kv_cache_0, tensor<int32, [1]> position_ids, tensor<fp16, [1, 1, 2048, 1]> update_mask) {
	tensor<fp16, [64]> layers_14_self_attn_k_layernorm_weight = const()[name = string("layers_14_self_attn_k_layernorm_weight"), val = tensor<fp16, [64]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
	tensor<fp16, [64]> layers_14_self_attn_q_layernorm_weight = const()[name = string("layers_14_self_attn_q_layernorm_weight"), val = tensor<fp16, [64]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256)))];
	tensor<fp16, [64]> layers_12_self_attn_k_layernorm_weight = const()[name = string("layers_12_self_attn_k_layernorm_weight"), val = tensor<fp16, [64]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(448)))];
	tensor<fp16, [64]> layers_12_self_attn_q_layernorm_weight = const()[name = string("layers_12_self_attn_q_layernorm_weight"), val = tensor<fp16, [64]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640)))];
	tensor<fp16, [64]> layers_10_self_attn_k_layernorm_weight = const()[name = string("layers_10_self_attn_k_layernorm_weight"), val = tensor<fp16, [64]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832)))];
	tensor<fp16, [64]> layers_10_self_attn_q_layernorm_weight = const()[name = string("layers_10_self_attn_q_layernorm_weight"), val = tensor<fp16, [64]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1024)))];
	tensor<fp16, [64]> layers_8_self_attn_k_layernorm_weight = const()[name = string("layers_8_self_attn_k_layernorm_weight"), val = tensor<fp16, [64]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1216)))];
	tensor<fp16, [64]> layers_8_self_attn_q_layernorm_weight = const()[name = string("layers_8_self_attn_q_layernorm_weight"), val = tensor<fp16, [64]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1408)))];
	tensor<fp16, [64]> layers_5_self_attn_k_layernorm_weight = const()[name = string("layers_5_self_attn_k_layernorm_weight"), val = tensor<fp16, [64]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1600)))];
	tensor<fp16, [64]> layers_5_self_attn_q_layernorm_weight = const()[name = string("layers_5_self_attn_q_layernorm_weight"), val = tensor<fp16, [64]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1792)))];
	tensor<fp16, [64]> layers_2_self_attn_k_layernorm_weight = const()[name = string("layers_2_self_attn_k_layernorm_weight"), val = tensor<fp16, [64]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1984)))];
	tensor<fp16, [64]> layers_2_self_attn_q_layernorm_weight = const()[name = string("layers_2_self_attn_q_layernorm_weight"), val = tensor<fp16, [64]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2176)))];
	tensor<fp16, [1024]> layers_0_operator_norm_weight = const()[name = string("layers_0_operator_norm_weight"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2368)))];
	tensor<fp16, [4096, 64]> sin_cached = const()[name = string("sin_cached"), val = tensor<fp16, [4096, 64]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4480)))];
	tensor<fp16, [4096, 64]> cos_cached = const()[name = string("cos_cached"), val = tensor<fp16, [4096, 64]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528832)))];
	tensor<fp16, [3072, 1024, 1, 1]> layers_0_conv_in_proj_weight = const()[name = string("layers_0_conv_in_proj_weight"), val = tensor<fp16, [3072, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1053184)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_0_feed_forward_w1_weight = const()[name = string("layers_0_feed_forward_w1_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7344704)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_0_feed_forward_w3_weight = const()[name = string("layers_0_feed_forward_w3_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16781952)))];
	tensor<fp16, [1024, 4608, 1, 1]> layers_0_feed_forward_w2_weight = const()[name = string("layers_0_feed_forward_w2_weight"), val = tensor<fp16, [1024, 4608, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26219200)))];
	tensor<fp16, [3072, 1024, 1, 1]> layers_1_conv_in_proj_weight = const()[name = string("layers_1_conv_in_proj_weight"), val = tensor<fp16, [3072, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35656448)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_1_feed_forward_w1_weight = const()[name = string("layers_1_feed_forward_w1_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41947968)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_1_feed_forward_w3_weight = const()[name = string("layers_1_feed_forward_w3_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51385216)))];
	tensor<fp16, [1024, 4608, 1, 1]> layers_1_feed_forward_w2_weight = const()[name = string("layers_1_feed_forward_w2_weight"), val = tensor<fp16, [1024, 4608, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60822464)))];
	tensor<fp16, [1024, 1024, 1, 1]> layers_2_self_attn_q_proj_weight = const()[name = string("layers_2_self_attn_q_proj_weight"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70259712)))];
	tensor<fp16, [512, 1024, 1, 1]> layers_2_self_attn_k_proj_weight = const()[name = string("layers_2_self_attn_k_proj_weight"), val = tensor<fp16, [512, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72356928)))];
	tensor<fp16, [512, 1024, 1, 1]> layers_2_self_attn_v_proj_weight = const()[name = string("layers_2_self_attn_v_proj_weight"), val = tensor<fp16, [512, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73405568)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_2_feed_forward_w1_weight = const()[name = string("layers_2_feed_forward_w1_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74454208)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_2_feed_forward_w3_weight = const()[name = string("layers_2_feed_forward_w3_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83891456)))];
	tensor<fp16, [1024, 4608, 1, 1]> layers_2_feed_forward_w2_weight = const()[name = string("layers_2_feed_forward_w2_weight"), val = tensor<fp16, [1024, 4608, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93328704)))];
	tensor<fp16, [3072, 1024, 1, 1]> layers_3_conv_in_proj_weight = const()[name = string("layers_3_conv_in_proj_weight"), val = tensor<fp16, [3072, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102765952)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_3_feed_forward_w1_weight = const()[name = string("layers_3_feed_forward_w1_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109057472)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_3_feed_forward_w3_weight = const()[name = string("layers_3_feed_forward_w3_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118494720)))];
	tensor<fp16, [1024, 4608, 1, 1]> layers_3_feed_forward_w2_weight = const()[name = string("layers_3_feed_forward_w2_weight"), val = tensor<fp16, [1024, 4608, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127931968)))];
	tensor<fp16, [3072, 1024, 1, 1]> layers_4_conv_in_proj_weight = const()[name = string("layers_4_conv_in_proj_weight"), val = tensor<fp16, [3072, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137369216)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_4_feed_forward_w1_weight = const()[name = string("layers_4_feed_forward_w1_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143660736)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_4_feed_forward_w3_weight = const()[name = string("layers_4_feed_forward_w3_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153097984)))];
	tensor<fp16, [1024, 4608, 1, 1]> layers_4_feed_forward_w2_weight = const()[name = string("layers_4_feed_forward_w2_weight"), val = tensor<fp16, [1024, 4608, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162535232)))];
	tensor<fp16, [1024, 1024, 1, 1]> layers_5_self_attn_q_proj_weight = const()[name = string("layers_5_self_attn_q_proj_weight"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171972480)))];
	tensor<fp16, [512, 1024, 1, 1]> layers_5_self_attn_k_proj_weight = const()[name = string("layers_5_self_attn_k_proj_weight"), val = tensor<fp16, [512, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174069696)))];
	tensor<fp16, [512, 1024, 1, 1]> layers_5_self_attn_v_proj_weight = const()[name = string("layers_5_self_attn_v_proj_weight"), val = tensor<fp16, [512, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175118336)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_5_feed_forward_w1_weight = const()[name = string("layers_5_feed_forward_w1_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176166976)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_5_feed_forward_w3_weight = const()[name = string("layers_5_feed_forward_w3_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185604224)))];
	tensor<fp16, [1024, 4608, 1, 1]> layers_5_feed_forward_w2_weight = const()[name = string("layers_5_feed_forward_w2_weight"), val = tensor<fp16, [1024, 4608, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195041472)))];
	tensor<fp16, [3072, 1024, 1, 1]> layers_6_conv_in_proj_weight = const()[name = string("layers_6_conv_in_proj_weight"), val = tensor<fp16, [3072, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204478720)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_6_feed_forward_w1_weight = const()[name = string("layers_6_feed_forward_w1_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210770240)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_6_feed_forward_w3_weight = const()[name = string("layers_6_feed_forward_w3_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220207488)))];
	tensor<fp16, [1024, 4608, 1, 1]> layers_6_feed_forward_w2_weight = const()[name = string("layers_6_feed_forward_w2_weight"), val = tensor<fp16, [1024, 4608, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229644736)))];
	tensor<fp16, [3072, 1024, 1, 1]> layers_7_conv_in_proj_weight = const()[name = string("layers_7_conv_in_proj_weight"), val = tensor<fp16, [3072, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239081984)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_7_feed_forward_w1_weight = const()[name = string("layers_7_feed_forward_w1_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245373504)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_7_feed_forward_w3_weight = const()[name = string("layers_7_feed_forward_w3_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254810752)))];
	tensor<fp16, [1024, 4608, 1, 1]> layers_7_feed_forward_w2_weight = const()[name = string("layers_7_feed_forward_w2_weight"), val = tensor<fp16, [1024, 4608, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264248000)))];
	tensor<fp16, [1024, 1024, 1, 1]> layers_8_self_attn_q_proj_weight = const()[name = string("layers_8_self_attn_q_proj_weight"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273685248)))];
	tensor<fp16, [512, 1024, 1, 1]> layers_8_self_attn_k_proj_weight = const()[name = string("layers_8_self_attn_k_proj_weight"), val = tensor<fp16, [512, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275782464)))];
	tensor<fp16, [512, 1024, 1, 1]> layers_8_self_attn_v_proj_weight = const()[name = string("layers_8_self_attn_v_proj_weight"), val = tensor<fp16, [512, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(276831104)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_8_feed_forward_w1_weight = const()[name = string("layers_8_feed_forward_w1_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277879744)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_8_feed_forward_w3_weight = const()[name = string("layers_8_feed_forward_w3_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287316992)))];
	tensor<fp16, [1024, 4608, 1, 1]> layers_8_feed_forward_w2_weight = const()[name = string("layers_8_feed_forward_w2_weight"), val = tensor<fp16, [1024, 4608, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296754240)))];
	tensor<fp16, [3072, 1024, 1, 1]> layers_9_conv_in_proj_weight = const()[name = string("layers_9_conv_in_proj_weight"), val = tensor<fp16, [3072, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306191488)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_9_feed_forward_w1_weight = const()[name = string("layers_9_feed_forward_w1_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312483008)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_9_feed_forward_w3_weight = const()[name = string("layers_9_feed_forward_w3_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321920256)))];
	tensor<fp16, [1024, 4608, 1, 1]> layers_9_feed_forward_w2_weight = const()[name = string("layers_9_feed_forward_w2_weight"), val = tensor<fp16, [1024, 4608, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(331357504)))];
	tensor<fp16, [1024, 1024, 1, 1]> layers_10_self_attn_q_proj_weight = const()[name = string("layers_10_self_attn_q_proj_weight"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340794752)))];
	tensor<fp16, [512, 1024, 1, 1]> layers_10_self_attn_k_proj_weight = const()[name = string("layers_10_self_attn_k_proj_weight"), val = tensor<fp16, [512, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342891968)))];
	tensor<fp16, [512, 1024, 1, 1]> layers_10_self_attn_v_proj_weight = const()[name = string("layers_10_self_attn_v_proj_weight"), val = tensor<fp16, [512, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343940608)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_10_feed_forward_w1_weight = const()[name = string("layers_10_feed_forward_w1_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344989248)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_10_feed_forward_w3_weight = const()[name = string("layers_10_feed_forward_w3_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354426496)))];
	tensor<fp16, [1024, 4608, 1, 1]> layers_10_feed_forward_w2_weight = const()[name = string("layers_10_feed_forward_w2_weight"), val = tensor<fp16, [1024, 4608, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363863744)))];
	tensor<fp16, [3072, 1024, 1, 1]> layers_11_conv_in_proj_weight = const()[name = string("layers_11_conv_in_proj_weight"), val = tensor<fp16, [3072, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373300992)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_11_feed_forward_w1_weight = const()[name = string("layers_11_feed_forward_w1_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379592512)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_11_feed_forward_w3_weight = const()[name = string("layers_11_feed_forward_w3_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389029760)))];
	tensor<fp16, [1024, 4608, 1, 1]> layers_11_feed_forward_w2_weight = const()[name = string("layers_11_feed_forward_w2_weight"), val = tensor<fp16, [1024, 4608, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(398467008)))];
	tensor<fp16, [1024, 1024, 1, 1]> layers_12_self_attn_q_proj_weight = const()[name = string("layers_12_self_attn_q_proj_weight"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407904256)))];
	tensor<fp16, [512, 1024, 1, 1]> layers_12_self_attn_k_proj_weight = const()[name = string("layers_12_self_attn_k_proj_weight"), val = tensor<fp16, [512, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410001472)))];
	tensor<fp16, [512, 1024, 1, 1]> layers_12_self_attn_v_proj_weight = const()[name = string("layers_12_self_attn_v_proj_weight"), val = tensor<fp16, [512, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(411050112)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_12_feed_forward_w1_weight = const()[name = string("layers_12_feed_forward_w1_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412098752)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_12_feed_forward_w3_weight = const()[name = string("layers_12_feed_forward_w3_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421536000)))];
	tensor<fp16, [1024, 4608, 1, 1]> layers_12_feed_forward_w2_weight = const()[name = string("layers_12_feed_forward_w2_weight"), val = tensor<fp16, [1024, 4608, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430973248)))];
	tensor<fp16, [3072, 1024, 1, 1]> layers_13_conv_in_proj_weight = const()[name = string("layers_13_conv_in_proj_weight"), val = tensor<fp16, [3072, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440410496)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_13_feed_forward_w1_weight = const()[name = string("layers_13_feed_forward_w1_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446702016)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_13_feed_forward_w3_weight = const()[name = string("layers_13_feed_forward_w3_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(456139264)))];
	tensor<fp16, [1024, 4608, 1, 1]> layers_13_feed_forward_w2_weight = const()[name = string("layers_13_feed_forward_w2_weight"), val = tensor<fp16, [1024, 4608, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465576512)))];
	tensor<fp16, [1024, 1024, 1, 1]> layers_14_self_attn_q_proj_weight = const()[name = string("layers_14_self_attn_q_proj_weight"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(475013760)))];
	tensor<fp16, [512, 1024, 1, 1]> layers_14_self_attn_k_proj_weight = const()[name = string("layers_14_self_attn_k_proj_weight"), val = tensor<fp16, [512, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477110976)))];
	tensor<fp16, [512, 1024, 1, 1]> layers_14_self_attn_v_proj_weight = const()[name = string("layers_14_self_attn_v_proj_weight"), val = tensor<fp16, [512, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478159616)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_14_feed_forward_w1_weight = const()[name = string("layers_14_feed_forward_w1_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479208256)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_14_feed_forward_w3_weight = const()[name = string("layers_14_feed_forward_w3_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488645504)))];
	tensor<fp16, [1024, 4608, 1, 1]> layers_14_feed_forward_w2_weight = const()[name = string("layers_14_feed_forward_w2_weight"), val = tensor<fp16, [1024, 4608, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(498082752)))];
	tensor<fp16, [3072, 1024, 1, 1]> layers_15_conv_in_proj_weight = const()[name = string("layers_15_conv_in_proj_weight"), val = tensor<fp16, [3072, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(507520000)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_15_feed_forward_w1_weight = const()[name = string("layers_15_feed_forward_w1_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513811520)))];
	tensor<fp16, [4608, 1024, 1, 1]> layers_15_feed_forward_w3_weight = const()[name = string("layers_15_feed_forward_w3_weight"), val = tensor<fp16, [4608, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523248768)))];
	tensor<fp16, [1024, 4608, 1, 1]> layers_15_feed_forward_w2_weight = const()[name = string("layers_15_feed_forward_w2_weight"), val = tensor<fp16, [1024, 4608, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(532686016)))];
	tensor<fp16, [65536, 1024, 1, 1]> lm_head_weight = const()[name = string("lm_head_weight"), val = tensor<fp16, [65536, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542123264)))];
	int32 var_612_batch_dims_0 = const()[name = string("op_612_batch_dims_0"), val = int32(0)];
	bool var_612_validate_indices_0 = const()[name = string("op_612_validate_indices_0"), val = bool(false)];
	tensor<fp16, [65536, 1024]> embed_tokens_weight_to_fp16 = const()[name = string("embed_tokens_weight_to_fp16"), val = tensor<fp16, [65536, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(676341056)))];
	int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)];
	tensor<bool, [1, 1]> greater_equal_0 = greater_equal(x = input_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")];
	int32 slice_by_index_12 = const()[name = string("slice_by_index_12"), val = int32(65536)];
	tensor<int32, [1, 1]> add_0 = add(x = input_ids, y = slice_by_index_12)[name = string("add_0")];
	tensor<int32, [1, 1]> select_0 = select(a = input_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")];
	int32 var_612_cast_fp16_axis_0 = const()[name = string("op_612_cast_fp16_axis_0"), val = int32(0)];
	tensor<fp16, [1, 1, 1024]> var_612_cast_fp16 = gather(axis = var_612_cast_fp16_axis_0, batch_dims = var_612_batch_dims_0, indices = select_0, validate_indices = var_612_validate_indices_0, x = embed_tokens_weight_to_fp16)[name = string("op_612_cast_fp16")];
	int32 var_618 = const()[name = string("op_618"), val = int32(0)];
	int32 var_619_batch_dims_0 = const()[name = string("op_619_batch_dims_0"), val = int32(0)];
	bool var_619_validate_indices_0 = const()[name = string("op_619_validate_indices_0"), val = bool(false)];
	string position_ids_to_uint16_dtype_0 = const()[name = string("position_ids_to_uint16_dtype_0"), val = string("uint16")];
	tensor<uint16, [1]> position_ids_to_uint16 = cast(dtype = position_ids_to_uint16_dtype_0, x = position_ids)[name = string("cast_44")];
	tensor<fp16, [1, 64]> var_619_cast_uint16 = gather(axis = var_618, batch_dims = var_619_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_619_validate_indices_0, x = cos_cached)[name = string("op_619_cast_uint16")];
	tensor<int32, [4]> var_624 = const()[name = string("op_624"), val = tensor<int32, [4]>([1, 1, 1, 64])];
	tensor<fp16, [1, 1, 1, 64]> cos = reshape(shape = var_624, x = var_619_cast_uint16)[name = string("cos")];
	int32 var_626 = const()[name = string("op_626"), val = int32(0)];
	int32 var_627_batch_dims_0 = const()[name = string("op_627_batch_dims_0"), val = int32(0)];
	bool var_627_validate_indices_0 = const()[name = string("op_627_validate_indices_0"), val = bool(false)];
	tensor<fp16, [1, 64]> var_627_cast_uint16 = gather(axis = var_626, batch_dims = var_627_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_627_validate_indices_0, x = sin_cached)[name = string("op_627_cast_uint16")];
	tensor<int32, [4]> var_632 = const()[name = string("op_632"), val = tensor<int32, [4]>([1, 1, 1, 64])];
	tensor<fp16, [1, 1, 1, 64]> sin = reshape(shape = var_632, x = var_627_cast_uint16)[name = string("sin")];
	fp16 const_0_promoted = const()[name = string("const_0_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_634 = mul(x = var_612_cast_fp16, y = const_0_promoted)[name = string("op_634")];
	int32 var_636 = const()[name = string("op_636"), val = int32(-1)];
	bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_1 = concat(axis = var_636, interleave = input_1_interleave_0, values = (var_612_cast_fp16, var_634))[name = string("input_1")];
	tensor<int32, [1]> normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_642_to_fp16 = const()[name = string("op_642_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_642_to_fp16, x = input_1)[name = string("normed_1_cast_fp16")];
	tensor<int32, [2]> var_645_split_sizes_0 = const()[name = string("op_645_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_645_axis_0 = const()[name = string("op_645_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_645_0, tensor<fp16, [1, 1, 1024]> var_645_1 = split(axis = var_645_axis_0, split_sizes = var_645_split_sizes_0, x = normed_1_cast_fp16)[name = string("op_645")];
	tensor<fp16, [1, 1, 1024]> hidden_states_1 = mul(x = var_645_0, y = layers_0_operator_norm_weight)[name = string("hidden_states_1")];
	tensor<int32, [3]> var_651 = const()[name = string("op_651"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_654_axes_0 = const()[name = string("op_654_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_652 = transpose(perm = var_651, x = hidden_states_1)[name = string("transpose_161")];
	tensor<fp16, [1, 1024, 1, 1]> var_654 = expand_dims(axes = var_654_axes_0, x = var_652)[name = string("op_654")];
	string BCx_1_pad_type_0 = const()[name = string("BCx_1_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> BCx_1_strides_0 = const()[name = string("BCx_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> BCx_1_pad_0 = const()[name = string("BCx_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> BCx_1_dilations_0 = const()[name = string("BCx_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 BCx_1_groups_0 = const()[name = string("BCx_1_groups_0"), val = int32(1)];
	tensor<fp16, [1, 3072, 1, 1]> BCx_1 = conv(dilations = BCx_1_dilations_0, groups = BCx_1_groups_0, pad = BCx_1_pad_0, pad_type = BCx_1_pad_type_0, strides = BCx_1_strides_0, weight = layers_0_conv_in_proj_weight, x = var_654)[name = string("BCx_1")];
	tensor<int32, [3]> var_671_split_sizes_0 = const()[name = string("op_671_split_sizes_0"), val = tensor<int32, [3]>([1024, 1024, 1024])];
	int32 var_671_axis_0 = const()[name = string("op_671_axis_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> var_671_0, tensor<fp16, [1, 1024, 1, 1]> var_671_1, tensor<fp16, [1, 1024, 1, 1]> var_671_2 = split(axis = var_671_axis_0, split_sizes = var_671_split_sizes_0, x = BCx_1)[name = string("op_671")];
	tensor<fp16, [1, 1024, 1, 1]> Bx_1 = mul(x = var_671_0, y = var_671_2)[name = string("Bx_1")];
	tensor<int32, [3]> var_677_begin_0 = const()[name = string("op_677_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
	tensor<int32, [3]> var_677_end_0 = const()[name = string("op_677_end_0"), val = tensor<int32, [3]>([1, 1024, 3])];
	tensor<bool, [3]> var_677_end_mask_0 = const()[name = string("op_677_end_mask_0"), val = tensor<bool, [3]>([false, true, true])];
	tensor<bool, [3]> var_677_squeeze_mask_0 = const()[name = string("op_677_squeeze_mask_0"), val = tensor<bool, [3]>([true, false, false])];
	tensor<fp16, [1024, 3]> var_677_cast_fp16 = slice_by_index(begin = var_677_begin_0, end = var_677_end_0, end_mask = var_677_end_mask_0, squeeze_mask = var_677_squeeze_mask_0, x = conv_state_in)[name = string("op_677_cast_fp16")];
	tensor<int32, [1]> var_679_axes_0 = const()[name = string("op_679_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 1024, 3]> var_679_cast_fp16 = expand_dims(axes = var_679_axes_0, x = var_677_cast_fp16)[name = string("op_679_cast_fp16")];
	tensor<int32, [1]> slot_1_axes_0 = const()[name = string("slot_1_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1, 3]> slot_1_cast_fp16 = expand_dims(axes = slot_1_axes_0, x = var_679_cast_fp16)[name = string("slot_1_cast_fp16")];
	tensor<int32, [4]> live_tail_1_begin_0 = const()[name = string("live_tail_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1])];
	tensor<int32, [4]> live_tail_1_end_0 = const()[name = string("live_tail_1_end_0"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
	tensor<bool, [4]> live_tail_1_end_mask_0 = const()[name = string("live_tail_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
	tensor<fp16, [1, 1024, 1, 2]> live_tail_1_cast_fp16 = slice_by_index(begin = live_tail_1_begin_0, end = live_tail_1_end_0, end_mask = live_tail_1_end_mask_0, x = slot_1_cast_fp16)[name = string("live_tail_1_cast_fp16")];
	int32 var_688 = const()[name = string("op_688"), val = int32(-1)];
	bool new_state_1_interleave_0 = const()[name = string("new_state_1_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1024, 1, 3]> new_state_1_cast_fp16 = concat(axis = var_688, interleave = new_state_1_interleave_0, values = (live_tail_1_cast_fp16, Bx_1))[name = string("new_state_1_cast_fp16")];
	tensor<int32, [1]> var_691_axes_0 = const()[name = string("op_691_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1024, 1, 3]> var_691_cast_fp16 = squeeze(axes = var_691_axes_0, x = new_state_1_cast_fp16)[name = string("op_691_cast_fp16")];
	tensor<int32, [1]> var_693_axes_0 = const()[name = string("op_693_axes_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1024, 3]> var_693_cast_fp16 = squeeze(axes = var_693_axes_0, x = var_691_cast_fp16)[name = string("op_693_cast_fp16")];
	string conv_out_1_pad_type_0 = const()[name = string("conv_out_1_pad_type_0"), val = string("valid")];
	int32 conv_out_1_groups_0 = const()[name = string("conv_out_1_groups_0"), val = int32(1024)];
	tensor<int32, [2]> conv_out_1_strides_0 = const()[name = string("conv_out_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> conv_out_1_pad_0 = const()[name = string("conv_out_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> conv_out_1_dilations_0 = const()[name = string("conv_out_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<fp16, [1024, 1, 1, 3]> layers_0_conv_conv_weight_promoted_to_fp16 = const()[name = string("layers_0_conv_conv_weight_promoted_to_fp16"), val = tensor<fp16, [1024, 1, 1, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(810558848)))];
	tensor<fp16, [1, 1024, 1, 1]> conv_out_1_cast_fp16 = conv(dilations = conv_out_1_dilations_0, groups = conv_out_1_groups_0, pad = conv_out_1_pad_0, pad_type = conv_out_1_pad_type_0, strides = conv_out_1_strides_0, weight = layers_0_conv_conv_weight_promoted_to_fp16, x = new_state_1_cast_fp16)[name = string("conv_out_1_cast_fp16")];
	tensor<fp16, [1, 1024, 1, 1]> input_5_cast_fp16 = mul(x = var_671_1, y = conv_out_1_cast_fp16)[name = string("input_5_cast_fp16")];
	string y_1_pad_type_0 = const()[name = string("y_1_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> y_1_strides_0 = const()[name = string("y_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> y_1_pad_0 = const()[name = string("y_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> y_1_dilations_0 = const()[name = string("y_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 y_1_groups_0 = const()[name = string("y_1_groups_0"), val = int32(1)];
	tensor<fp16, [1024, 1024, 1, 1]> layers_0_conv_out_proj_weight_promoted_to_fp16 = const()[name = string("layers_0_conv_out_proj_weight_promoted_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(810565056)))];
	tensor<fp16, [1, 1024, 1, 1]> y_1_cast_fp16 = conv(dilations = y_1_dilations_0, groups = y_1_groups_0, pad = y_1_pad_0, pad_type = y_1_pad_type_0, strides = y_1_strides_0, weight = layers_0_conv_out_proj_weight_promoted_to_fp16, x = input_5_cast_fp16)[name = string("y_1_cast_fp16")];
	tensor<int32, [1]> var_719_axes_0 = const()[name = string("op_719_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_719_cast_fp16 = squeeze(axes = var_719_axes_0, x = y_1_cast_fp16)[name = string("op_719_cast_fp16")];
	tensor<int32, [3]> var_723 = const()[name = string("op_723"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> op_out_1_cast_fp16 = transpose(perm = var_723, x = var_719_cast_fp16)[name = string("transpose_160")];
	tensor<fp16, [1, 1, 1024]> x_3_cast_fp16 = add(x = var_612_cast_fp16, y = op_out_1_cast_fp16)[name = string("x_3_cast_fp16")];
	fp16 const_1_promoted_to_fp16 = const()[name = string("const_1_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_727_cast_fp16 = mul(x = x_3_cast_fp16, y = const_1_promoted_to_fp16)[name = string("op_727_cast_fp16")];
	int32 var_729 = const()[name = string("op_729"), val = int32(-1)];
	bool input_7_interleave_0 = const()[name = string("input_7_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_7_cast_fp16 = concat(axis = var_729, interleave = input_7_interleave_0, values = (x_3_cast_fp16, var_727_cast_fp16))[name = string("input_7_cast_fp16")];
	tensor<int32, [1]> normed_3_axes_0 = const()[name = string("normed_3_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_735_to_fp16 = const()[name = string("op_735_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_3_cast_fp16 = layer_norm(axes = normed_3_axes_0, epsilon = var_735_to_fp16, x = input_7_cast_fp16)[name = string("normed_3_cast_fp16")];
	tensor<int32, [2]> var_738_split_sizes_0 = const()[name = string("op_738_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_738_axis_0 = const()[name = string("op_738_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_738_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_738_cast_fp16_1 = split(axis = var_738_axis_0, split_sizes = var_738_split_sizes_0, x = normed_3_cast_fp16)[name = string("op_738_cast_fp16")];
	tensor<fp16, [1024]> layers_0_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_0_ffn_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(812662272)))];
	tensor<fp16, [1, 1, 1024]> normed_5_cast_fp16 = mul(x = var_738_cast_fp16_0, y = layers_0_ffn_norm_weight_promoted_to_fp16)[name = string("normed_5_cast_fp16")];
	tensor<int32, [3]> var_744 = const()[name = string("op_744"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_747_axes_0 = const()[name = string("op_747_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_745_cast_fp16 = transpose(perm = var_744, x = normed_5_cast_fp16)[name = string("transpose_159")];
	tensor<fp16, [1, 1024, 1, 1]> var_747_cast_fp16 = expand_dims(axes = var_747_axes_0, x = var_745_cast_fp16)[name = string("op_747_cast_fp16")];
	string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = layers_0_feed_forward_w1_weight, x = var_747_cast_fp16)[name = string("input_11")];
	string b_1_pad_type_0 = const()[name = string("b_1_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> b_1_strides_0 = const()[name = string("b_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> b_1_pad_0 = const()[name = string("b_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> b_1_dilations_0 = const()[name = string("b_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 b_1_groups_0 = const()[name = string("b_1_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> b_1 = conv(dilations = b_1_dilations_0, groups = b_1_groups_0, pad = b_1_pad_0, pad_type = b_1_pad_type_0, strides = b_1_strides_0, weight = layers_0_feed_forward_w3_weight, x = var_747_cast_fp16)[name = string("b_1")];
	tensor<fp16, [1, 4608, 1, 1]> var_775 = silu(x = input_11)[name = string("op_775")];
	tensor<fp16, [1, 4608, 1, 1]> input_13 = mul(x = var_775, y = b_1)[name = string("input_13")];
	string mlp_1_pad_type_0 = const()[name = string("mlp_1_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_1_strides_0 = const()[name = string("mlp_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_1_pad_0 = const()[name = string("mlp_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_1_dilations_0 = const()[name = string("mlp_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_1_groups_0 = const()[name = string("mlp_1_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> mlp_1 = conv(dilations = mlp_1_dilations_0, groups = mlp_1_groups_0, pad = mlp_1_pad_0, pad_type = mlp_1_pad_type_0, strides = mlp_1_strides_0, weight = layers_0_feed_forward_w2_weight, x = input_13)[name = string("mlp_1")];
	tensor<int32, [1]> var_789_axes_0 = const()[name = string("op_789_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_789 = squeeze(axes = var_789_axes_0, x = mlp_1)[name = string("op_789")];
	tensor<int32, [3]> var_793 = const()[name = string("op_793"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> mlp_3 = transpose(perm = var_793, x = var_789)[name = string("transpose_158")];
	tensor<fp16, [1, 1, 1024]> x_5_cast_fp16 = add(x = x_3_cast_fp16, y = mlp_3)[name = string("x_5_cast_fp16")];
	fp16 const_2_promoted_to_fp16 = const()[name = string("const_2_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_797_cast_fp16 = mul(x = x_5_cast_fp16, y = const_2_promoted_to_fp16)[name = string("op_797_cast_fp16")];
	int32 var_799 = const()[name = string("op_799"), val = int32(-1)];
	bool input_15_interleave_0 = const()[name = string("input_15_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_15_cast_fp16 = concat(axis = var_799, interleave = input_15_interleave_0, values = (x_5_cast_fp16, var_797_cast_fp16))[name = string("input_15_cast_fp16")];
	tensor<int32, [1]> normed_7_axes_0 = const()[name = string("normed_7_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_805_to_fp16 = const()[name = string("op_805_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_7_cast_fp16 = layer_norm(axes = normed_7_axes_0, epsilon = var_805_to_fp16, x = input_15_cast_fp16)[name = string("normed_7_cast_fp16")];
	tensor<int32, [2]> var_808_split_sizes_0 = const()[name = string("op_808_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_808_axis_0 = const()[name = string("op_808_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_808_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_808_cast_fp16_1 = split(axis = var_808_axis_0, split_sizes = var_808_split_sizes_0, x = normed_7_cast_fp16)[name = string("op_808_cast_fp16")];
	tensor<fp16, [1024]> layers_1_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_1_operator_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(812664384)))];
	tensor<fp16, [1, 1, 1024]> hidden_states_3_cast_fp16 = mul(x = var_808_cast_fp16_0, y = layers_1_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_3_cast_fp16")];
	tensor<int32, [3]> var_814 = const()[name = string("op_814"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_817_axes_0 = const()[name = string("op_817_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_815_cast_fp16 = transpose(perm = var_814, x = hidden_states_3_cast_fp16)[name = string("transpose_157")];
	tensor<fp16, [1, 1024, 1, 1]> var_817_cast_fp16 = expand_dims(axes = var_817_axes_0, x = var_815_cast_fp16)[name = string("op_817_cast_fp16")];
	string BCx_3_pad_type_0 = const()[name = string("BCx_3_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> BCx_3_strides_0 = const()[name = string("BCx_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> BCx_3_pad_0 = const()[name = string("BCx_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> BCx_3_dilations_0 = const()[name = string("BCx_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 BCx_3_groups_0 = const()[name = string("BCx_3_groups_0"), val = int32(1)];
	tensor<fp16, [1, 3072, 1, 1]> BCx_3 = conv(dilations = BCx_3_dilations_0, groups = BCx_3_groups_0, pad = BCx_3_pad_0, pad_type = BCx_3_pad_type_0, strides = BCx_3_strides_0, weight = layers_1_conv_in_proj_weight, x = var_817_cast_fp16)[name = string("BCx_3")];
	tensor<int32, [3]> var_834_split_sizes_0 = const()[name = string("op_834_split_sizes_0"), val = tensor<int32, [3]>([1024, 1024, 1024])];
	int32 var_834_axis_0 = const()[name = string("op_834_axis_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> var_834_0, tensor<fp16, [1, 1024, 1, 1]> var_834_1, tensor<fp16, [1, 1024, 1, 1]> var_834_2 = split(axis = var_834_axis_0, split_sizes = var_834_split_sizes_0, x = BCx_3)[name = string("op_834")];
	tensor<fp16, [1, 1024, 1, 1]> Bx_3 = mul(x = var_834_0, y = var_834_2)[name = string("Bx_3")];
	tensor<int32, [3]> var_840_begin_0 = const()[name = string("op_840_begin_0"), val = tensor<int32, [3]>([1, 0, 0])];
	tensor<int32, [3]> var_840_end_0 = const()[name = string("op_840_end_0"), val = tensor<int32, [3]>([2, 1024, 3])];
	tensor<bool, [3]> var_840_end_mask_0 = const()[name = string("op_840_end_mask_0"), val = tensor<bool, [3]>([false, true, true])];
	tensor<bool, [3]> var_840_squeeze_mask_0 = const()[name = string("op_840_squeeze_mask_0"), val = tensor<bool, [3]>([true, false, false])];
	tensor<fp16, [1024, 3]> var_840_cast_fp16 = slice_by_index(begin = var_840_begin_0, end = var_840_end_0, end_mask = var_840_end_mask_0, squeeze_mask = var_840_squeeze_mask_0, x = conv_state_in)[name = string("op_840_cast_fp16")];
	tensor<int32, [1]> var_842_axes_0 = const()[name = string("op_842_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 1024, 3]> var_842_cast_fp16 = expand_dims(axes = var_842_axes_0, x = var_840_cast_fp16)[name = string("op_842_cast_fp16")];
	tensor<int32, [1]> slot_3_axes_0 = const()[name = string("slot_3_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1, 3]> slot_3_cast_fp16 = expand_dims(axes = slot_3_axes_0, x = var_842_cast_fp16)[name = string("slot_3_cast_fp16")];
	tensor<int32, [4]> live_tail_3_begin_0 = const()[name = string("live_tail_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1])];
	tensor<int32, [4]> live_tail_3_end_0 = const()[name = string("live_tail_3_end_0"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
	tensor<bool, [4]> live_tail_3_end_mask_0 = const()[name = string("live_tail_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
	tensor<fp16, [1, 1024, 1, 2]> live_tail_3_cast_fp16 = slice_by_index(begin = live_tail_3_begin_0, end = live_tail_3_end_0, end_mask = live_tail_3_end_mask_0, x = slot_3_cast_fp16)[name = string("live_tail_3_cast_fp16")];
	int32 var_851 = const()[name = string("op_851"), val = int32(-1)];
	bool new_state_3_interleave_0 = const()[name = string("new_state_3_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1024, 1, 3]> new_state_3_cast_fp16 = concat(axis = var_851, interleave = new_state_3_interleave_0, values = (live_tail_3_cast_fp16, Bx_3))[name = string("new_state_3_cast_fp16")];
	tensor<int32, [1]> var_854_axes_0 = const()[name = string("op_854_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1024, 1, 3]> var_854_cast_fp16 = squeeze(axes = var_854_axes_0, x = new_state_3_cast_fp16)[name = string("op_854_cast_fp16")];
	tensor<int32, [1]> var_856_axes_0 = const()[name = string("op_856_axes_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1024, 3]> var_856_cast_fp16 = squeeze(axes = var_856_axes_0, x = var_854_cast_fp16)[name = string("op_856_cast_fp16")];
	string conv_out_3_pad_type_0 = const()[name = string("conv_out_3_pad_type_0"), val = string("valid")];
	int32 conv_out_3_groups_0 = const()[name = string("conv_out_3_groups_0"), val = int32(1024)];
	tensor<int32, [2]> conv_out_3_strides_0 = const()[name = string("conv_out_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> conv_out_3_pad_0 = const()[name = string("conv_out_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> conv_out_3_dilations_0 = const()[name = string("conv_out_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<fp16, [1024, 1, 1, 3]> layers_1_conv_conv_weight_promoted_to_fp16 = const()[name = string("layers_1_conv_conv_weight_promoted_to_fp16"), val = tensor<fp16, [1024, 1, 1, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(812666496)))];
	tensor<fp16, [1, 1024, 1, 1]> conv_out_3_cast_fp16 = conv(dilations = conv_out_3_dilations_0, groups = conv_out_3_groups_0, pad = conv_out_3_pad_0, pad_type = conv_out_3_pad_type_0, strides = conv_out_3_strides_0, weight = layers_1_conv_conv_weight_promoted_to_fp16, x = new_state_3_cast_fp16)[name = string("conv_out_3_cast_fp16")];
	tensor<fp16, [1, 1024, 1, 1]> input_19_cast_fp16 = mul(x = var_834_1, y = conv_out_3_cast_fp16)[name = string("input_19_cast_fp16")];
	string y_3_pad_type_0 = const()[name = string("y_3_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> y_3_strides_0 = const()[name = string("y_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> y_3_pad_0 = const()[name = string("y_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> y_3_dilations_0 = const()[name = string("y_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 y_3_groups_0 = const()[name = string("y_3_groups_0"), val = int32(1)];
	tensor<fp16, [1024, 1024, 1, 1]> layers_1_conv_out_proj_weight_promoted_to_fp16 = const()[name = string("layers_1_conv_out_proj_weight_promoted_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(812672704)))];
	tensor<fp16, [1, 1024, 1, 1]> y_3_cast_fp16 = conv(dilations = y_3_dilations_0, groups = y_3_groups_0, pad = y_3_pad_0, pad_type = y_3_pad_type_0, strides = y_3_strides_0, weight = layers_1_conv_out_proj_weight_promoted_to_fp16, x = input_19_cast_fp16)[name = string("y_3_cast_fp16")];
	tensor<int32, [1]> var_882_axes_0 = const()[name = string("op_882_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_882_cast_fp16 = squeeze(axes = var_882_axes_0, x = y_3_cast_fp16)[name = string("op_882_cast_fp16")];
	tensor<int32, [3]> var_886 = const()[name = string("op_886"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> op_out_3_cast_fp16 = transpose(perm = var_886, x = var_882_cast_fp16)[name = string("transpose_156")];
	tensor<fp16, [1, 1, 1024]> x_7_cast_fp16 = add(x = x_5_cast_fp16, y = op_out_3_cast_fp16)[name = string("x_7_cast_fp16")];
	fp16 const_3_promoted_to_fp16 = const()[name = string("const_3_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_890_cast_fp16 = mul(x = x_7_cast_fp16, y = const_3_promoted_to_fp16)[name = string("op_890_cast_fp16")];
	int32 var_892 = const()[name = string("op_892"), val = int32(-1)];
	bool input_21_interleave_0 = const()[name = string("input_21_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_21_cast_fp16 = concat(axis = var_892, interleave = input_21_interleave_0, values = (x_7_cast_fp16, var_890_cast_fp16))[name = string("input_21_cast_fp16")];
	tensor<int32, [1]> normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_898_to_fp16 = const()[name = string("op_898_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_898_to_fp16, x = input_21_cast_fp16)[name = string("normed_9_cast_fp16")];
	tensor<int32, [2]> var_901_split_sizes_0 = const()[name = string("op_901_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_901_axis_0 = const()[name = string("op_901_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_901_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_901_cast_fp16_1 = split(axis = var_901_axis_0, split_sizes = var_901_split_sizes_0, x = normed_9_cast_fp16)[name = string("op_901_cast_fp16")];
	tensor<fp16, [1024]> layers_1_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_1_ffn_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814769920)))];
	tensor<fp16, [1, 1, 1024]> normed_11_cast_fp16 = mul(x = var_901_cast_fp16_0, y = layers_1_ffn_norm_weight_promoted_to_fp16)[name = string("normed_11_cast_fp16")];
	tensor<int32, [3]> var_907 = const()[name = string("op_907"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_910_axes_0 = const()[name = string("op_910_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_908_cast_fp16 = transpose(perm = var_907, x = normed_11_cast_fp16)[name = string("transpose_155")];
	tensor<fp16, [1, 1024, 1, 1]> var_910_cast_fp16 = expand_dims(axes = var_910_axes_0, x = var_908_cast_fp16)[name = string("op_910_cast_fp16")];
	string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = layers_1_feed_forward_w1_weight, x = var_910_cast_fp16)[name = string("input_25")];
	string b_3_pad_type_0 = const()[name = string("b_3_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> b_3_strides_0 = const()[name = string("b_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> b_3_pad_0 = const()[name = string("b_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> b_3_dilations_0 = const()[name = string("b_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 b_3_groups_0 = const()[name = string("b_3_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> b_3 = conv(dilations = b_3_dilations_0, groups = b_3_groups_0, pad = b_3_pad_0, pad_type = b_3_pad_type_0, strides = b_3_strides_0, weight = layers_1_feed_forward_w3_weight, x = var_910_cast_fp16)[name = string("b_3")];
	tensor<fp16, [1, 4608, 1, 1]> var_938 = silu(x = input_25)[name = string("op_938")];
	tensor<fp16, [1, 4608, 1, 1]> input_27 = mul(x = var_938, y = b_3)[name = string("input_27")];
	string mlp_5_pad_type_0 = const()[name = string("mlp_5_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_5_strides_0 = const()[name = string("mlp_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_5_pad_0 = const()[name = string("mlp_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_5_dilations_0 = const()[name = string("mlp_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_5_groups_0 = const()[name = string("mlp_5_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> mlp_5 = conv(dilations = mlp_5_dilations_0, groups = mlp_5_groups_0, pad = mlp_5_pad_0, pad_type = mlp_5_pad_type_0, strides = mlp_5_strides_0, weight = layers_1_feed_forward_w2_weight, x = input_27)[name = string("mlp_5")];
	tensor<int32, [1]> var_952_axes_0 = const()[name = string("op_952_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_952 = squeeze(axes = var_952_axes_0, x = mlp_5)[name = string("op_952")];
	tensor<int32, [3]> var_956 = const()[name = string("op_956"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> mlp_7 = transpose(perm = var_956, x = var_952)[name = string("transpose_154")];
	tensor<fp16, [1, 1, 1024]> x_9_cast_fp16 = add(x = x_7_cast_fp16, y = mlp_7)[name = string("x_9_cast_fp16")];
	fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_960_cast_fp16 = mul(x = x_9_cast_fp16, y = const_4_promoted_to_fp16)[name = string("op_960_cast_fp16")];
	int32 var_962 = const()[name = string("op_962"), val = int32(-1)];
	bool input_29_interleave_0 = const()[name = string("input_29_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_29_cast_fp16 = concat(axis = var_962, interleave = input_29_interleave_0, values = (x_9_cast_fp16, var_960_cast_fp16))[name = string("input_29_cast_fp16")];
	tensor<int32, [1]> normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_968_to_fp16 = const()[name = string("op_968_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_968_to_fp16, x = input_29_cast_fp16)[name = string("normed_13_cast_fp16")];
	tensor<int32, [2]> var_971_split_sizes_0 = const()[name = string("op_971_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_971_axis_0 = const()[name = string("op_971_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_971_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_971_cast_fp16_1 = split(axis = var_971_axis_0, split_sizes = var_971_split_sizes_0, x = normed_13_cast_fp16)[name = string("op_971_cast_fp16")];
	tensor<fp16, [1024]> layers_2_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_2_operator_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814772032)))];
	tensor<fp16, [1, 1, 1024]> hidden_states_5_cast_fp16 = mul(x = var_971_cast_fp16_0, y = layers_2_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_5_cast_fp16")];
	tensor<int32, [3]> var_977 = const()[name = string("op_977"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_980_axes_0 = const()[name = string("op_980_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_978_cast_fp16 = transpose(perm = var_977, x = hidden_states_5_cast_fp16)[name = string("transpose_153")];
	tensor<fp16, [1, 1024, 1, 1]> var_980_cast_fp16 = expand_dims(axes = var_980_axes_0, x = var_978_cast_fp16)[name = string("op_980_cast_fp16")];
	string var_996_pad_type_0 = const()[name = string("op_996_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_996_strides_0 = const()[name = string("op_996_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_996_pad_0 = const()[name = string("op_996_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_996_dilations_0 = const()[name = string("op_996_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_996_groups_0 = const()[name = string("op_996_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> var_996 = conv(dilations = var_996_dilations_0, groups = var_996_groups_0, pad = var_996_pad_0, pad_type = var_996_pad_type_0, strides = var_996_strides_0, weight = layers_2_self_attn_q_proj_weight, x = var_980_cast_fp16)[name = string("op_996")];
	tensor<int32, [4]> var_1001 = const()[name = string("op_1001"), val = tensor<int32, [4]>([1, 16, 64, 1])];
	tensor<fp16, [1, 16, 64, 1]> var_1002 = reshape(shape = var_1001, x = var_996)[name = string("op_1002")];
	tensor<int32, [4]> var_1007 = const()[name = string("op_1007"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	string var_1024_pad_type_0 = const()[name = string("op_1024_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_1024_strides_0 = const()[name = string("op_1024_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_1024_pad_0 = const()[name = string("op_1024_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_1024_dilations_0 = const()[name = string("op_1024_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_1024_groups_0 = const()[name = string("op_1024_groups_0"), val = int32(1)];
	tensor<fp16, [1, 512, 1, 1]> var_1024 = conv(dilations = var_1024_dilations_0, groups = var_1024_groups_0, pad = var_1024_pad_0, pad_type = var_1024_pad_type_0, strides = var_1024_strides_0, weight = layers_2_self_attn_k_proj_weight, x = var_980_cast_fp16)[name = string("op_1024")];
	tensor<int32, [4]> var_1029 = const()[name = string("op_1029"), val = tensor<int32, [4]>([1, 8, 64, 1])];
	tensor<fp16, [1, 8, 64, 1]> var_1030 = reshape(shape = var_1029, x = var_1024)[name = string("op_1030")];
	tensor<int32, [4]> var_1035 = const()[name = string("op_1035"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	string var_1052_pad_type_0 = const()[name = string("op_1052_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_1052_strides_0 = const()[name = string("op_1052_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_1052_pad_0 = const()[name = string("op_1052_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_1052_dilations_0 = const()[name = string("op_1052_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_1052_groups_0 = const()[name = string("op_1052_groups_0"), val = int32(1)];
	tensor<fp16, [1, 512, 1, 1]> var_1052 = conv(dilations = var_1052_dilations_0, groups = var_1052_groups_0, pad = var_1052_pad_0, pad_type = var_1052_pad_type_0, strides = var_1052_strides_0, weight = layers_2_self_attn_v_proj_weight, x = var_980_cast_fp16)[name = string("op_1052")];
	tensor<int32, [4]> var_1057 = const()[name = string("op_1057"), val = tensor<int32, [4]>([1, 8, 64, 1])];
	tensor<fp16, [1, 8, 64, 1]> var_1058 = reshape(shape = var_1057, x = var_1052)[name = string("op_1058")];
	tensor<int32, [4]> var_1063 = const()[name = string("op_1063"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	fp16 const_5_promoted = const()[name = string("const_5_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 16, 1, 64]> var_1008 = transpose(perm = var_1007, x = var_1002)[name = string("transpose_152")];
	tensor<fp16, [1, 16, 1, 64]> var_1070 = mul(x = var_1008, y = const_5_promoted)[name = string("op_1070")];
	int32 var_1072 = const()[name = string("op_1072"), val = int32(-1)];
	bool input_33_interleave_0 = const()[name = string("input_33_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 16, 1, 128]> input_33 = concat(axis = var_1072, interleave = input_33_interleave_0, values = (var_1008, var_1070))[name = string("input_33")];
	tensor<int32, [1]> normed_15_axes_0 = const()[name = string("normed_15_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1078_to_fp16 = const()[name = string("op_1078_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 16, 1, 128]> normed_15_cast_fp16 = layer_norm(axes = normed_15_axes_0, epsilon = var_1078_to_fp16, x = input_33)[name = string("normed_15_cast_fp16")];
	tensor<int32, [2]> var_1081_split_sizes_0 = const()[name = string("op_1081_split_sizes_0"), val = tensor<int32, [2]>([64, 64])];
	int32 var_1081_axis_0 = const()[name = string("op_1081_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 16, 1, 64]> var_1081_0, tensor<fp16, [1, 16, 1, 64]> var_1081_1 = split(axis = var_1081_axis_0, split_sizes = var_1081_split_sizes_0, x = normed_15_cast_fp16)[name = string("op_1081")];
	tensor<fp16, [1, 16, 1, 64]> q_1 = mul(x = var_1081_0, y = layers_2_self_attn_q_layernorm_weight)[name = string("q_1")];
	fp16 const_6_promoted = const()[name = string("const_6_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 64]> var_1036 = transpose(perm = var_1035, x = var_1030)[name = string("transpose_151")];
	tensor<fp16, [1, 8, 1, 64]> var_1084 = mul(x = var_1036, y = const_6_promoted)[name = string("op_1084")];
	int32 var_1086 = const()[name = string("op_1086"), val = int32(-1)];
	bool input_35_interleave_0 = const()[name = string("input_35_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 128]> input_35 = concat(axis = var_1086, interleave = input_35_interleave_0, values = (var_1036, var_1084))[name = string("input_35")];
	tensor<int32, [1]> normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1092_to_fp16 = const()[name = string("op_1092_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 8, 1, 128]> normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_1092_to_fp16, x = input_35)[name = string("normed_17_cast_fp16")];
	tensor<int32, [2]> var_1095_split_sizes_0 = const()[name = string("op_1095_split_sizes_0"), val = tensor<int32, [2]>([64, 64])];
	int32 var_1095_axis_0 = const()[name = string("op_1095_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 64]> var_1095_0, tensor<fp16, [1, 8, 1, 64]> var_1095_1 = split(axis = var_1095_axis_0, split_sizes = var_1095_split_sizes_0, x = normed_17_cast_fp16)[name = string("op_1095")];
	tensor<fp16, [1, 8, 1, 64]> k_1 = mul(x = var_1095_0, y = layers_2_self_attn_k_layernorm_weight)[name = string("k_1")];
	tensor<fp16, [1, 16, 1, 64]> var_1098 = mul(x = q_1, y = cos)[name = string("op_1098")];
	tensor<int32, [2]> var_1099_split_sizes_0 = const()[name = string("op_1099_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
	int32 var_1099_axis_0 = const()[name = string("op_1099_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 16, 1, 32]> var_1099_0, tensor<fp16, [1, 16, 1, 32]> var_1099_1 = split(axis = var_1099_axis_0, split_sizes = var_1099_split_sizes_0, x = q_1)[name = string("op_1099")];
	fp16 const_7_promoted = const()[name = string("const_7_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 16, 1, 32]> var_1101 = mul(x = var_1099_1, y = const_7_promoted)[name = string("op_1101")];
	int32 var_1103 = const()[name = string("op_1103"), val = int32(-1)];
	bool var_1104_interleave_0 = const()[name = string("op_1104_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 16, 1, 64]> var_1104 = concat(axis = var_1103, interleave = var_1104_interleave_0, values = (var_1101, var_1099_0))[name = string("op_1104")];
	tensor<fp16, [1, 16, 1, 64]> var_1105 = mul(x = var_1104, y = sin)[name = string("op_1105")];
	tensor<fp16, [1, 16, 1, 64]> q_3 = add(x = var_1098, y = var_1105)[name = string("q_3")];
	tensor<fp16, [1, 8, 1, 64]> var_1108 = mul(x = k_1, y = cos)[name = string("op_1108")];
	tensor<int32, [2]> var_1109_split_sizes_0 = const()[name = string("op_1109_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
	int32 var_1109_axis_0 = const()[name = string("op_1109_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 32]> var_1109_0, tensor<fp16, [1, 8, 1, 32]> var_1109_1 = split(axis = var_1109_axis_0, split_sizes = var_1109_split_sizes_0, x = k_1)[name = string("op_1109")];
	fp16 const_8_promoted = const()[name = string("const_8_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 32]> var_1111 = mul(x = var_1109_1, y = const_8_promoted)[name = string("op_1111")];
	int32 var_1113 = const()[name = string("op_1113"), val = int32(-1)];
	bool var_1114_interleave_0 = const()[name = string("op_1114_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 64]> var_1114 = concat(axis = var_1113, interleave = var_1114_interleave_0, values = (var_1111, var_1109_0))[name = string("op_1114")];
	tensor<fp16, [1, 8, 1, 64]> var_1115 = mul(x = var_1114, y = sin)[name = string("op_1115")];
	tensor<fp16, [1, 8, 1, 64]> k_3 = add(x = var_1108, y = var_1115)[name = string("k_3")];
	tensor<fp16, [12, 8, 2048, 64]> read_state_0 = read_state(input = kv_cache_0)[name = string("read_state_0")];
	tensor<int32, [4]> var_1120_begin_0 = const()[name = string("op_1120_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [4]> var_1120_end_0 = const()[name = string("op_1120_end_0"), val = tensor<int32, [4]>([1, 8, 2048, 64])];
	tensor<bool, [4]> var_1120_end_mask_0 = const()[name = string("op_1120_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> var_1120_squeeze_mask_0 = const()[name = string("op_1120_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [8, 2048, 64]> var_1120_cast_fp16 = slice_by_index(begin = var_1120_begin_0, end = var_1120_end_0, end_mask = var_1120_end_mask_0, squeeze_mask = var_1120_squeeze_mask_0, x = read_state_0)[name = string("op_1120_cast_fp16")];
	tensor<int32, [1]> K_cache_1_axes_0 = const()[name = string("K_cache_1_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 8, 2048, 64]> K_cache_1_cast_fp16 = expand_dims(axes = K_cache_1_axes_0, x = var_1120_cast_fp16)[name = string("K_cache_1_cast_fp16")];
	tensor<int32, [4]> var_1125_begin_0 = const()[name = string("op_1125_begin_0"), val = tensor<int32, [4]>([6, 0, 0, 0])];
	tensor<int32, [4]> var_1125_end_0 = const()[name = string("op_1125_end_0"), val = tensor<int32, [4]>([7, 8, 2048, 64])];
	tensor<bool, [4]> var_1125_end_mask_0 = const()[name = string("op_1125_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> var_1125_squeeze_mask_0 = const()[name = string("op_1125_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [8, 2048, 64]> var_1125_cast_fp16 = slice_by_index(begin = var_1125_begin_0, end = var_1125_end_0, end_mask = var_1125_end_mask_0, squeeze_mask = var_1125_squeeze_mask_0, x = read_state_0)[name = string("op_1125_cast_fp16")];
	tensor<int32, [1]> V_cache_1_axes_0 = const()[name = string("V_cache_1_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 8, 2048, 64]> V_cache_1_cast_fp16 = expand_dims(axes = V_cache_1_axes_0, x = var_1125_cast_fp16)[name = string("V_cache_1_cast_fp16")];
	tensor<int32, [4]> k_b_1_reps_0 = const()[name = string("k_b_1_reps_0"), val = tensor<int32, [4]>([1, 1, 2048, 1])];
	tensor<fp16, [1, 8, 2048, 64]> k_b_1 = tile(reps = k_b_1_reps_0, x = k_3)[name = string("k_b_1")];
	tensor<int32, [4]> v_b_1_reps_0 = const()[name = string("v_b_1_reps_0"), val = tensor<int32, [4]>([1, 1, 2048, 1])];
	tensor<fp16, [1, 8, 1, 64]> var_1064 = transpose(perm = var_1063, x = var_1058)[name = string("transpose_150")];
	tensor<fp16, [1, 8, 2048, 64]> v_b_1 = tile(reps = v_b_1_reps_0, x = var_1064)[name = string("v_b_1")];
	fp16 var_1130_promoted_to_fp16 = const()[name = string("op_1130_promoted_to_fp16"), val = fp16(0x1p+0)];
	tensor<fp16, [1, 1, 2048, 1]> var_1132_cast_fp16 = sub(x = var_1130_promoted_to_fp16, y = update_mask)[name = string("op_1132_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> var_1133_cast_fp16 = mul(x = K_cache_1_cast_fp16, y = var_1132_cast_fp16)[name = string("op_1133_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> var_1134_cast_fp16 = mul(x = k_b_1, y = update_mask)[name = string("op_1134_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> K_new_1_cast_fp16 = add(x = var_1133_cast_fp16, y = var_1134_cast_fp16)[name = string("K_new_1_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> var_1140_cast_fp16 = mul(x = V_cache_1_cast_fp16, y = var_1132_cast_fp16)[name = string("op_1140_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> var_1141_cast_fp16 = mul(x = v_b_1, y = update_mask)[name = string("op_1141_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> V_new_1_cast_fp16 = add(x = var_1140_cast_fp16, y = var_1141_cast_fp16)[name = string("V_new_1_cast_fp16")];
	tensor<int32, [1]> var_1145_axes_0 = const()[name = string("op_1145_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [8, 2048, 64]> var_1145_cast_fp16 = squeeze(axes = var_1145_axes_0, x = K_new_1_cast_fp16)[name = string("op_1145_cast_fp16")];
	tensor<int32, [4]> concat_0 = const()[name = string("concat_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [4]> concat_1 = const()[name = string("concat_1"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [4]> kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [12, 8, 2048, 64]> kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_0, begin_mask = kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_1, end_mask = kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_1_stride_0, update = var_1145_cast_fp16, x = read_state_0)[name = string("kv_cache_0_internal_tensor_assign_1_cast_fp16")];
	write_state(data = kv_cache_0_internal_tensor_assign_1_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_12_write_state")];
	tensor<fp16, [12, 8, 2048, 64]> coreml_update_state_12 = read_state(input = kv_cache_0)[name = string("coreml_update_state_12")];
	tensor<int32, [1]> var_1152_axes_0 = const()[name = string("op_1152_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [8, 2048, 64]> var_1152_cast_fp16 = squeeze(axes = var_1152_axes_0, x = V_new_1_cast_fp16)[name = string("op_1152_cast_fp16")];
	tensor<int32, [4]> concat_2 = const()[name = string("concat_2"), val = tensor<int32, [4]>([6, 0, 0, 0])];
	tensor<int32, [4]> concat_3 = const()[name = string("concat_3"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [4]> kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [12, 8, 2048, 64]> kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_2, begin_mask = kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_3, end_mask = kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_2_stride_0, update = var_1152_cast_fp16, x = coreml_update_state_12)[name = string("kv_cache_0_internal_tensor_assign_2_cast_fp16")];
	write_state(data = kv_cache_0_internal_tensor_assign_2_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_13_write_state")];
	tensor<fp16, [12, 8, 2048, 64]> coreml_update_state_13 = read_state(input = kv_cache_0)[name = string("coreml_update_state_13")];
	tensor<int32, [4]> transpose_0_perm_0 = const()[name = string("transpose_0_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	tensor<int32, [4]> tile_0_reps_0 = const()[name = string("tile_0_reps_0"), val = tensor<int32, [4]>([2, 1, 1, 1])];
	tensor<fp16, [8, 1, 2048, 64]> transpose_0_cast_fp16 = transpose(perm = transpose_0_perm_0, x = K_new_1_cast_fp16)[name = string("transpose_149")];
	tensor<fp16, [16, 1, 2048, 64]> tile_0_cast_fp16 = tile(reps = tile_0_reps_0, x = transpose_0_cast_fp16)[name = string("tile_0_cast_fp16")];
	tensor<int32, [5]> concat_4 = const()[name = string("concat_4"), val = tensor<int32, [5]>([2, 8, 1, 2048, 64])];
	tensor<fp16, [2, 8, 1, 2048, 64]> reshape_0_cast_fp16 = reshape(shape = concat_4, x = tile_0_cast_fp16)[name = string("reshape_0_cast_fp16")];
	tensor<int32, [5]> transpose_1_perm_0 = const()[name = string("transpose_1_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])];
	tensor<int32, [4]> concat_5 = const()[name = string("concat_5"), val = tensor<int32, [4]>([-1, 1, 2048, 64])];
	tensor<fp16, [8, 2, 1, 2048, 64]> transpose_1_cast_fp16 = transpose(perm = transpose_1_perm_0, x = reshape_0_cast_fp16)[name = string("transpose_148")];
	tensor<fp16, [16, 1, 2048, 64]> reshape_1_cast_fp16 = reshape(shape = concat_5, x = transpose_1_cast_fp16)[name = string("reshape_1_cast_fp16")];
	tensor<int32, [4]> transpose_24_perm_0 = const()[name = string("transpose_24_perm_0"), val = tensor<int32, [4]>([1, 0, 3, 2])];
	tensor<int32, [4]> transpose_2_perm_0 = const()[name = string("transpose_2_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	tensor<int32, [4]> tile_1_reps_0 = const()[name = string("tile_1_reps_0"), val = tensor<int32, [4]>([2, 1, 1, 1])];
	tensor<fp16, [8, 1, 2048, 64]> transpose_2_cast_fp16 = transpose(perm = transpose_2_perm_0, x = V_new_1_cast_fp16)[name = string("transpose_146")];
	tensor<fp16, [16, 1, 2048, 64]> tile_1_cast_fp16 = tile(reps = tile_1_reps_0, x = transpose_2_cast_fp16)[name = string("tile_1_cast_fp16")];
	tensor<int32, [5]> concat_6 = const()[name = string("concat_6"), val = tensor<int32, [5]>([2, 8, 1, 2048, 64])];
	tensor<fp16, [2, 8, 1, 2048, 64]> reshape_2_cast_fp16 = reshape(shape = concat_6, x = tile_1_cast_fp16)[name = string("reshape_2_cast_fp16")];
	tensor<int32, [5]> transpose_3_perm_0 = const()[name = string("transpose_3_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])];
	tensor<int32, [4]> concat_7 = const()[name = string("concat_7"), val = tensor<int32, [4]>([-1, 1, 2048, 64])];
	tensor<fp16, [8, 2, 1, 2048, 64]> transpose_3_cast_fp16 = transpose(perm = transpose_3_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_145")];
	tensor<fp16, [16, 1, 2048, 64]> reshape_3_cast_fp16 = reshape(shape = concat_7, x = transpose_3_cast_fp16)[name = string("reshape_3_cast_fp16")];
	tensor<int32, [4]> V_e_1_perm_0 = const()[name = string("V_e_1_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	bool var_1179_transpose_x_0 = const()[name = string("op_1179_transpose_x_0"), val = bool(false)];
	bool var_1179_transpose_y_0 = const()[name = string("op_1179_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 16, 64, 2048]> transpose_24_cast_fp16 = transpose(perm = transpose_24_perm_0, x = reshape_1_cast_fp16)[name = string("transpose_147")];
	tensor<fp16, [1, 16, 1, 2048]> var_1179_cast_fp16 = matmul(transpose_x = var_1179_transpose_x_0, transpose_y = var_1179_transpose_y_0, x = q_3, y = transpose_24_cast_fp16)[name = string("op_1179_cast_fp16")];
	fp16 var_1180_to_fp16 = const()[name = string("op_1180_to_fp16"), val = fp16(0x1p-3)];
	tensor<fp16, [1, 16, 1, 2048]> attn_1_cast_fp16 = mul(x = var_1179_cast_fp16, y = var_1180_to_fp16)[name = string("attn_1_cast_fp16")];
	tensor<fp16, [1, 16, 1, 2048]> attn_3_cast_fp16 = add(x = attn_1_cast_fp16, y = causal_mask)[name = string("attn_3_cast_fp16")];
	int32 var_1189 = const()[name = string("op_1189"), val = int32(-1)];
	tensor<fp16, [1, 16, 1, 2048]> var_1191_cast_fp16 = softmax(axis = var_1189, x = attn_3_cast_fp16)[name = string("op_1191_cast_fp16")];
	bool var_1207_transpose_x_0 = const()[name = string("op_1207_transpose_x_0"), val = bool(false)];
	bool var_1207_transpose_y_0 = const()[name = string("op_1207_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 16, 2048, 64]> V_e_1_cast_fp16 = transpose(perm = V_e_1_perm_0, x = reshape_3_cast_fp16)[name = string("transpose_144")];
	tensor<fp16, [1, 16, 1, 64]> var_1207_cast_fp16 = matmul(transpose_x = var_1207_transpose_x_0, transpose_y = var_1207_transpose_y_0, x = var_1191_cast_fp16, y = V_e_1_cast_fp16)[name = string("op_1207_cast_fp16")];
	tensor<int32, [4]> var_1217 = const()[name = string("op_1217"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_1224 = const()[name = string("op_1224"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 16, 64]> var_1218 = transpose(perm = var_1217, x = var_1207_cast_fp16)[name = string("transpose_143")];
	tensor<fp16, [1, 1, 1024]> out_3 = reshape(shape = var_1224, x = var_1218)[name = string("out_3")];
	tensor<int32, [3]> var_1229 = const()[name = string("op_1229"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1024, 1024, 1]> squeeze_0 = const()[name = string("squeeze_0"), val = tensor<fp16, [1024, 1024, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814774144)))];
	string var_1245_pad_type_0 = const()[name = string("op_1245_pad_type_0"), val = string("valid")];
	int32 var_1245_groups_0 = const()[name = string("op_1245_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_1245_strides_0 = const()[name = string("op_1245_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_1245_pad_0 = const()[name = string("op_1245_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_1245_dilations_0 = const()[name = string("op_1245_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1, 1024, 1]> var_1230 = transpose(perm = var_1229, x = out_3)[name = string("transpose_142")];
	tensor<fp16, [1, 1024, 1]> var_1245 = conv(dilations = var_1245_dilations_0, groups = var_1245_groups_0, pad = var_1245_pad_0, pad_type = var_1245_pad_type_0, strides = var_1245_strides_0, weight = squeeze_0, x = var_1230)[name = string("op_1245")];
	tensor<int32, [3]> var_1249 = const()[name = string("op_1249"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> op_out_5 = transpose(perm = var_1249, x = var_1245)[name = string("transpose_141")];
	tensor<fp16, [1, 1, 1024]> x_15_cast_fp16 = add(x = x_9_cast_fp16, y = op_out_5)[name = string("x_15_cast_fp16")];
	fp16 const_9_promoted_to_fp16 = const()[name = string("const_9_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_1253_cast_fp16 = mul(x = x_15_cast_fp16, y = const_9_promoted_to_fp16)[name = string("op_1253_cast_fp16")];
	int32 var_1255 = const()[name = string("op_1255"), val = int32(-1)];
	bool input_39_interleave_0 = const()[name = string("input_39_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_39_cast_fp16 = concat(axis = var_1255, interleave = input_39_interleave_0, values = (x_15_cast_fp16, var_1253_cast_fp16))[name = string("input_39_cast_fp16")];
	tensor<int32, [1]> normed_19_axes_0 = const()[name = string("normed_19_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1261_to_fp16 = const()[name = string("op_1261_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_19_cast_fp16 = layer_norm(axes = normed_19_axes_0, epsilon = var_1261_to_fp16, x = input_39_cast_fp16)[name = string("normed_19_cast_fp16")];
	tensor<int32, [2]> var_1264_split_sizes_0 = const()[name = string("op_1264_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_1264_axis_0 = const()[name = string("op_1264_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_1264_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_1264_cast_fp16_1 = split(axis = var_1264_axis_0, split_sizes = var_1264_split_sizes_0, x = normed_19_cast_fp16)[name = string("op_1264_cast_fp16")];
	tensor<fp16, [1024]> layers_2_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_2_ffn_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816871360)))];
	tensor<fp16, [1, 1, 1024]> normed_21_cast_fp16 = mul(x = var_1264_cast_fp16_0, y = layers_2_ffn_norm_weight_promoted_to_fp16)[name = string("normed_21_cast_fp16")];
	tensor<int32, [3]> var_1270 = const()[name = string("op_1270"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_1273_axes_0 = const()[name = string("op_1273_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_1271_cast_fp16 = transpose(perm = var_1270, x = normed_21_cast_fp16)[name = string("transpose_140")];
	tensor<fp16, [1, 1024, 1, 1]> var_1273_cast_fp16 = expand_dims(axes = var_1273_axes_0, x = var_1271_cast_fp16)[name = string("op_1273_cast_fp16")];
	string input_43_pad_type_0 = const()[name = string("input_43_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> input_43_strides_0 = const()[name = string("input_43_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> input_43_pad_0 = const()[name = string("input_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> input_43_dilations_0 = const()[name = string("input_43_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 input_43_groups_0 = const()[name = string("input_43_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> input_43 = conv(dilations = input_43_dilations_0, groups = input_43_groups_0, pad = input_43_pad_0, pad_type = input_43_pad_type_0, strides = input_43_strides_0, weight = layers_2_feed_forward_w1_weight, x = var_1273_cast_fp16)[name = string("input_43")];
	string b_5_pad_type_0 = const()[name = string("b_5_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> b_5_strides_0 = const()[name = string("b_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> b_5_pad_0 = const()[name = string("b_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> b_5_dilations_0 = const()[name = string("b_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 b_5_groups_0 = const()[name = string("b_5_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> b_5 = conv(dilations = b_5_dilations_0, groups = b_5_groups_0, pad = b_5_pad_0, pad_type = b_5_pad_type_0, strides = b_5_strides_0, weight = layers_2_feed_forward_w3_weight, x = var_1273_cast_fp16)[name = string("b_5")];
	tensor<fp16, [1, 4608, 1, 1]> var_1301 = silu(x = input_43)[name = string("op_1301")];
	tensor<fp16, [1, 4608, 1, 1]> input_45 = mul(x = var_1301, y = b_5)[name = string("input_45")];
	string mlp_9_pad_type_0 = const()[name = string("mlp_9_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_9_strides_0 = const()[name = string("mlp_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_9_pad_0 = const()[name = string("mlp_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_9_dilations_0 = const()[name = string("mlp_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_9_groups_0 = const()[name = string("mlp_9_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> mlp_9 = conv(dilations = mlp_9_dilations_0, groups = mlp_9_groups_0, pad = mlp_9_pad_0, pad_type = mlp_9_pad_type_0, strides = mlp_9_strides_0, weight = layers_2_feed_forward_w2_weight, x = input_45)[name = string("mlp_9")];
	tensor<int32, [1]> var_1315_axes_0 = const()[name = string("op_1315_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_1315 = squeeze(axes = var_1315_axes_0, x = mlp_9)[name = string("op_1315")];
	tensor<int32, [3]> var_1319 = const()[name = string("op_1319"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> mlp_11 = transpose(perm = var_1319, x = var_1315)[name = string("transpose_139")];
	tensor<fp16, [1, 1, 1024]> x_17_cast_fp16 = add(x = x_15_cast_fp16, y = mlp_11)[name = string("x_17_cast_fp16")];
	fp16 const_10_promoted_to_fp16 = const()[name = string("const_10_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_1323_cast_fp16 = mul(x = x_17_cast_fp16, y = const_10_promoted_to_fp16)[name = string("op_1323_cast_fp16")];
	int32 var_1325 = const()[name = string("op_1325"), val = int32(-1)];
	bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_47_cast_fp16 = concat(axis = var_1325, interleave = input_47_interleave_0, values = (x_17_cast_fp16, var_1323_cast_fp16))[name = string("input_47_cast_fp16")];
	tensor<int32, [1]> normed_23_axes_0 = const()[name = string("normed_23_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1331_to_fp16 = const()[name = string("op_1331_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_23_cast_fp16 = layer_norm(axes = normed_23_axes_0, epsilon = var_1331_to_fp16, x = input_47_cast_fp16)[name = string("normed_23_cast_fp16")];
	tensor<int32, [2]> var_1334_split_sizes_0 = const()[name = string("op_1334_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_1334_axis_0 = const()[name = string("op_1334_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_1334_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_1334_cast_fp16_1 = split(axis = var_1334_axis_0, split_sizes = var_1334_split_sizes_0, x = normed_23_cast_fp16)[name = string("op_1334_cast_fp16")];
	tensor<fp16, [1024]> layers_3_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_3_operator_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816873472)))];
	tensor<fp16, [1, 1, 1024]> hidden_states_7_cast_fp16 = mul(x = var_1334_cast_fp16_0, y = layers_3_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_7_cast_fp16")];
	tensor<int32, [3]> var_1340 = const()[name = string("op_1340"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_1343_axes_0 = const()[name = string("op_1343_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_1341_cast_fp16 = transpose(perm = var_1340, x = hidden_states_7_cast_fp16)[name = string("transpose_138")];
	tensor<fp16, [1, 1024, 1, 1]> var_1343_cast_fp16 = expand_dims(axes = var_1343_axes_0, x = var_1341_cast_fp16)[name = string("op_1343_cast_fp16")];
	string BCx_5_pad_type_0 = const()[name = string("BCx_5_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> BCx_5_strides_0 = const()[name = string("BCx_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> BCx_5_pad_0 = const()[name = string("BCx_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> BCx_5_dilations_0 = const()[name = string("BCx_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 BCx_5_groups_0 = const()[name = string("BCx_5_groups_0"), val = int32(1)];
	tensor<fp16, [1, 3072, 1, 1]> BCx_5 = conv(dilations = BCx_5_dilations_0, groups = BCx_5_groups_0, pad = BCx_5_pad_0, pad_type = BCx_5_pad_type_0, strides = BCx_5_strides_0, weight = layers_3_conv_in_proj_weight, x = var_1343_cast_fp16)[name = string("BCx_5")];
	tensor<int32, [3]> var_1360_split_sizes_0 = const()[name = string("op_1360_split_sizes_0"), val = tensor<int32, [3]>([1024, 1024, 1024])];
	int32 var_1360_axis_0 = const()[name = string("op_1360_axis_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> var_1360_0, tensor<fp16, [1, 1024, 1, 1]> var_1360_1, tensor<fp16, [1, 1024, 1, 1]> var_1360_2 = split(axis = var_1360_axis_0, split_sizes = var_1360_split_sizes_0, x = BCx_5)[name = string("op_1360")];
	tensor<fp16, [1, 1024, 1, 1]> Bx_5 = mul(x = var_1360_0, y = var_1360_2)[name = string("Bx_5")];
	tensor<int32, [3]> var_1366_begin_0 = const()[name = string("op_1366_begin_0"), val = tensor<int32, [3]>([2, 0, 0])];
	tensor<int32, [3]> var_1366_end_0 = const()[name = string("op_1366_end_0"), val = tensor<int32, [3]>([3, 1024, 3])];
	tensor<bool, [3]> var_1366_end_mask_0 = const()[name = string("op_1366_end_mask_0"), val = tensor<bool, [3]>([false, true, true])];
	tensor<bool, [3]> var_1366_squeeze_mask_0 = const()[name = string("op_1366_squeeze_mask_0"), val = tensor<bool, [3]>([true, false, false])];
	tensor<fp16, [1024, 3]> var_1366_cast_fp16 = slice_by_index(begin = var_1366_begin_0, end = var_1366_end_0, end_mask = var_1366_end_mask_0, squeeze_mask = var_1366_squeeze_mask_0, x = conv_state_in)[name = string("op_1366_cast_fp16")];
	tensor<int32, [1]> var_1368_axes_0 = const()[name = string("op_1368_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 1024, 3]> var_1368_cast_fp16 = expand_dims(axes = var_1368_axes_0, x = var_1366_cast_fp16)[name = string("op_1368_cast_fp16")];
	tensor<int32, [1]> slot_5_axes_0 = const()[name = string("slot_5_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1, 3]> slot_5_cast_fp16 = expand_dims(axes = slot_5_axes_0, x = var_1368_cast_fp16)[name = string("slot_5_cast_fp16")];
	tensor<int32, [4]> live_tail_5_begin_0 = const()[name = string("live_tail_5_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1])];
	tensor<int32, [4]> live_tail_5_end_0 = const()[name = string("live_tail_5_end_0"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
	tensor<bool, [4]> live_tail_5_end_mask_0 = const()[name = string("live_tail_5_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
	tensor<fp16, [1, 1024, 1, 2]> live_tail_5_cast_fp16 = slice_by_index(begin = live_tail_5_begin_0, end = live_tail_5_end_0, end_mask = live_tail_5_end_mask_0, x = slot_5_cast_fp16)[name = string("live_tail_5_cast_fp16")];
	int32 var_1377 = const()[name = string("op_1377"), val = int32(-1)];
	bool new_state_5_interleave_0 = const()[name = string("new_state_5_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1024, 1, 3]> new_state_5_cast_fp16 = concat(axis = var_1377, interleave = new_state_5_interleave_0, values = (live_tail_5_cast_fp16, Bx_5))[name = string("new_state_5_cast_fp16")];
	tensor<int32, [1]> var_1380_axes_0 = const()[name = string("op_1380_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1024, 1, 3]> var_1380_cast_fp16 = squeeze(axes = var_1380_axes_0, x = new_state_5_cast_fp16)[name = string("op_1380_cast_fp16")];
	tensor<int32, [1]> var_1382_axes_0 = const()[name = string("op_1382_axes_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1024, 3]> var_1382_cast_fp16 = squeeze(axes = var_1382_axes_0, x = var_1380_cast_fp16)[name = string("op_1382_cast_fp16")];
	string conv_out_5_pad_type_0 = const()[name = string("conv_out_5_pad_type_0"), val = string("valid")];
	int32 conv_out_5_groups_0 = const()[name = string("conv_out_5_groups_0"), val = int32(1024)];
	tensor<int32, [2]> conv_out_5_strides_0 = const()[name = string("conv_out_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> conv_out_5_pad_0 = const()[name = string("conv_out_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> conv_out_5_dilations_0 = const()[name = string("conv_out_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<fp16, [1024, 1, 1, 3]> layers_3_conv_conv_weight_promoted_to_fp16 = const()[name = string("layers_3_conv_conv_weight_promoted_to_fp16"), val = tensor<fp16, [1024, 1, 1, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816875584)))];
	tensor<fp16, [1, 1024, 1, 1]> conv_out_5_cast_fp16 = conv(dilations = conv_out_5_dilations_0, groups = conv_out_5_groups_0, pad = conv_out_5_pad_0, pad_type = conv_out_5_pad_type_0, strides = conv_out_5_strides_0, weight = layers_3_conv_conv_weight_promoted_to_fp16, x = new_state_5_cast_fp16)[name = string("conv_out_5_cast_fp16")];
	tensor<fp16, [1, 1024, 1, 1]> input_51_cast_fp16 = mul(x = var_1360_1, y = conv_out_5_cast_fp16)[name = string("input_51_cast_fp16")];
	string y_5_pad_type_0 = const()[name = string("y_5_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> y_5_strides_0 = const()[name = string("y_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> y_5_pad_0 = const()[name = string("y_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> y_5_dilations_0 = const()[name = string("y_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 y_5_groups_0 = const()[name = string("y_5_groups_0"), val = int32(1)];
	tensor<fp16, [1024, 1024, 1, 1]> layers_3_conv_out_proj_weight_promoted_to_fp16 = const()[name = string("layers_3_conv_out_proj_weight_promoted_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816881792)))];
	tensor<fp16, [1, 1024, 1, 1]> y_5_cast_fp16 = conv(dilations = y_5_dilations_0, groups = y_5_groups_0, pad = y_5_pad_0, pad_type = y_5_pad_type_0, strides = y_5_strides_0, weight = layers_3_conv_out_proj_weight_promoted_to_fp16, x = input_51_cast_fp16)[name = string("y_5_cast_fp16")];
	tensor<int32, [1]> var_1408_axes_0 = const()[name = string("op_1408_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_1408_cast_fp16 = squeeze(axes = var_1408_axes_0, x = y_5_cast_fp16)[name = string("op_1408_cast_fp16")];
	tensor<int32, [3]> var_1412 = const()[name = string("op_1412"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> op_out_7_cast_fp16 = transpose(perm = var_1412, x = var_1408_cast_fp16)[name = string("transpose_137")];
	tensor<fp16, [1, 1, 1024]> x_19_cast_fp16 = add(x = x_17_cast_fp16, y = op_out_7_cast_fp16)[name = string("x_19_cast_fp16")];
	fp16 const_11_promoted_to_fp16 = const()[name = string("const_11_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_1416_cast_fp16 = mul(x = x_19_cast_fp16, y = const_11_promoted_to_fp16)[name = string("op_1416_cast_fp16")];
	int32 var_1418 = const()[name = string("op_1418"), val = int32(-1)];
	bool input_53_interleave_0 = const()[name = string("input_53_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_53_cast_fp16 = concat(axis = var_1418, interleave = input_53_interleave_0, values = (x_19_cast_fp16, var_1416_cast_fp16))[name = string("input_53_cast_fp16")];
	tensor<int32, [1]> normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1424_to_fp16 = const()[name = string("op_1424_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_1424_to_fp16, x = input_53_cast_fp16)[name = string("normed_25_cast_fp16")];
	tensor<int32, [2]> var_1427_split_sizes_0 = const()[name = string("op_1427_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_1427_axis_0 = const()[name = string("op_1427_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_1427_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_1427_cast_fp16_1 = split(axis = var_1427_axis_0, split_sizes = var_1427_split_sizes_0, x = normed_25_cast_fp16)[name = string("op_1427_cast_fp16")];
	tensor<fp16, [1024]> layers_3_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_3_ffn_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(818979008)))];
	tensor<fp16, [1, 1, 1024]> normed_27_cast_fp16 = mul(x = var_1427_cast_fp16_0, y = layers_3_ffn_norm_weight_promoted_to_fp16)[name = string("normed_27_cast_fp16")];
	tensor<int32, [3]> var_1433 = const()[name = string("op_1433"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_1436_axes_0 = const()[name = string("op_1436_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_1434_cast_fp16 = transpose(perm = var_1433, x = normed_27_cast_fp16)[name = string("transpose_136")];
	tensor<fp16, [1, 1024, 1, 1]> var_1436_cast_fp16 = expand_dims(axes = var_1436_axes_0, x = var_1434_cast_fp16)[name = string("op_1436_cast_fp16")];
	string input_57_pad_type_0 = const()[name = string("input_57_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> input_57_strides_0 = const()[name = string("input_57_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> input_57_pad_0 = const()[name = string("input_57_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> input_57_dilations_0 = const()[name = string("input_57_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 input_57_groups_0 = const()[name = string("input_57_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> input_57 = conv(dilations = input_57_dilations_0, groups = input_57_groups_0, pad = input_57_pad_0, pad_type = input_57_pad_type_0, strides = input_57_strides_0, weight = layers_3_feed_forward_w1_weight, x = var_1436_cast_fp16)[name = string("input_57")];
	string b_7_pad_type_0 = const()[name = string("b_7_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> b_7_strides_0 = const()[name = string("b_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> b_7_pad_0 = const()[name = string("b_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> b_7_dilations_0 = const()[name = string("b_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 b_7_groups_0 = const()[name = string("b_7_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> b_7 = conv(dilations = b_7_dilations_0, groups = b_7_groups_0, pad = b_7_pad_0, pad_type = b_7_pad_type_0, strides = b_7_strides_0, weight = layers_3_feed_forward_w3_weight, x = var_1436_cast_fp16)[name = string("b_7")];
	tensor<fp16, [1, 4608, 1, 1]> var_1464 = silu(x = input_57)[name = string("op_1464")];
	tensor<fp16, [1, 4608, 1, 1]> input_59 = mul(x = var_1464, y = b_7)[name = string("input_59")];
	string mlp_13_pad_type_0 = const()[name = string("mlp_13_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_13_strides_0 = const()[name = string("mlp_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_13_pad_0 = const()[name = string("mlp_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_13_dilations_0 = const()[name = string("mlp_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_13_groups_0 = const()[name = string("mlp_13_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> mlp_13 = conv(dilations = mlp_13_dilations_0, groups = mlp_13_groups_0, pad = mlp_13_pad_0, pad_type = mlp_13_pad_type_0, strides = mlp_13_strides_0, weight = layers_3_feed_forward_w2_weight, x = input_59)[name = string("mlp_13")];
	tensor<int32, [1]> var_1478_axes_0 = const()[name = string("op_1478_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_1478 = squeeze(axes = var_1478_axes_0, x = mlp_13)[name = string("op_1478")];
	tensor<int32, [3]> var_1482 = const()[name = string("op_1482"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> mlp_15 = transpose(perm = var_1482, x = var_1478)[name = string("transpose_135")];
	tensor<fp16, [1, 1, 1024]> x_21_cast_fp16 = add(x = x_19_cast_fp16, y = mlp_15)[name = string("x_21_cast_fp16")];
	fp16 const_12_promoted_to_fp16 = const()[name = string("const_12_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_1486_cast_fp16 = mul(x = x_21_cast_fp16, y = const_12_promoted_to_fp16)[name = string("op_1486_cast_fp16")];
	int32 var_1488 = const()[name = string("op_1488"), val = int32(-1)];
	bool input_61_interleave_0 = const()[name = string("input_61_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_61_cast_fp16 = concat(axis = var_1488, interleave = input_61_interleave_0, values = (x_21_cast_fp16, var_1486_cast_fp16))[name = string("input_61_cast_fp16")];
	tensor<int32, [1]> normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1494_to_fp16 = const()[name = string("op_1494_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_1494_to_fp16, x = input_61_cast_fp16)[name = string("normed_29_cast_fp16")];
	tensor<int32, [2]> var_1497_split_sizes_0 = const()[name = string("op_1497_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_1497_axis_0 = const()[name = string("op_1497_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_1497_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_1497_cast_fp16_1 = split(axis = var_1497_axis_0, split_sizes = var_1497_split_sizes_0, x = normed_29_cast_fp16)[name = string("op_1497_cast_fp16")];
	tensor<fp16, [1024]> layers_4_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_4_operator_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(818981120)))];
	tensor<fp16, [1, 1, 1024]> hidden_states_9_cast_fp16 = mul(x = var_1497_cast_fp16_0, y = layers_4_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_9_cast_fp16")];
	tensor<int32, [3]> var_1503 = const()[name = string("op_1503"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_1506_axes_0 = const()[name = string("op_1506_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_1504_cast_fp16 = transpose(perm = var_1503, x = hidden_states_9_cast_fp16)[name = string("transpose_134")];
	tensor<fp16, [1, 1024, 1, 1]> var_1506_cast_fp16 = expand_dims(axes = var_1506_axes_0, x = var_1504_cast_fp16)[name = string("op_1506_cast_fp16")];
	string BCx_7_pad_type_0 = const()[name = string("BCx_7_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> BCx_7_strides_0 = const()[name = string("BCx_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> BCx_7_pad_0 = const()[name = string("BCx_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> BCx_7_dilations_0 = const()[name = string("BCx_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 BCx_7_groups_0 = const()[name = string("BCx_7_groups_0"), val = int32(1)];
	tensor<fp16, [1, 3072, 1, 1]> BCx_7 = conv(dilations = BCx_7_dilations_0, groups = BCx_7_groups_0, pad = BCx_7_pad_0, pad_type = BCx_7_pad_type_0, strides = BCx_7_strides_0, weight = layers_4_conv_in_proj_weight, x = var_1506_cast_fp16)[name = string("BCx_7")];
	tensor<int32, [3]> var_1523_split_sizes_0 = const()[name = string("op_1523_split_sizes_0"), val = tensor<int32, [3]>([1024, 1024, 1024])];
	int32 var_1523_axis_0 = const()[name = string("op_1523_axis_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> var_1523_0, tensor<fp16, [1, 1024, 1, 1]> var_1523_1, tensor<fp16, [1, 1024, 1, 1]> var_1523_2 = split(axis = var_1523_axis_0, split_sizes = var_1523_split_sizes_0, x = BCx_7)[name = string("op_1523")];
	tensor<fp16, [1, 1024, 1, 1]> Bx_7 = mul(x = var_1523_0, y = var_1523_2)[name = string("Bx_7")];
	tensor<int32, [3]> var_1529_begin_0 = const()[name = string("op_1529_begin_0"), val = tensor<int32, [3]>([3, 0, 0])];
	tensor<int32, [3]> var_1529_end_0 = const()[name = string("op_1529_end_0"), val = tensor<int32, [3]>([4, 1024, 3])];
	tensor<bool, [3]> var_1529_end_mask_0 = const()[name = string("op_1529_end_mask_0"), val = tensor<bool, [3]>([false, true, true])];
	tensor<bool, [3]> var_1529_squeeze_mask_0 = const()[name = string("op_1529_squeeze_mask_0"), val = tensor<bool, [3]>([true, false, false])];
	tensor<fp16, [1024, 3]> var_1529_cast_fp16 = slice_by_index(begin = var_1529_begin_0, end = var_1529_end_0, end_mask = var_1529_end_mask_0, squeeze_mask = var_1529_squeeze_mask_0, x = conv_state_in)[name = string("op_1529_cast_fp16")];
	tensor<int32, [1]> var_1531_axes_0 = const()[name = string("op_1531_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 1024, 3]> var_1531_cast_fp16 = expand_dims(axes = var_1531_axes_0, x = var_1529_cast_fp16)[name = string("op_1531_cast_fp16")];
	tensor<int32, [1]> slot_7_axes_0 = const()[name = string("slot_7_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1, 3]> slot_7_cast_fp16 = expand_dims(axes = slot_7_axes_0, x = var_1531_cast_fp16)[name = string("slot_7_cast_fp16")];
	tensor<int32, [4]> live_tail_7_begin_0 = const()[name = string("live_tail_7_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1])];
	tensor<int32, [4]> live_tail_7_end_0 = const()[name = string("live_tail_7_end_0"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
	tensor<bool, [4]> live_tail_7_end_mask_0 = const()[name = string("live_tail_7_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
	tensor<fp16, [1, 1024, 1, 2]> live_tail_7_cast_fp16 = slice_by_index(begin = live_tail_7_begin_0, end = live_tail_7_end_0, end_mask = live_tail_7_end_mask_0, x = slot_7_cast_fp16)[name = string("live_tail_7_cast_fp16")];
	int32 var_1540 = const()[name = string("op_1540"), val = int32(-1)];
	bool new_state_7_interleave_0 = const()[name = string("new_state_7_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1024, 1, 3]> new_state_7_cast_fp16 = concat(axis = var_1540, interleave = new_state_7_interleave_0, values = (live_tail_7_cast_fp16, Bx_7))[name = string("new_state_7_cast_fp16")];
	tensor<int32, [1]> var_1543_axes_0 = const()[name = string("op_1543_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1024, 1, 3]> var_1543_cast_fp16 = squeeze(axes = var_1543_axes_0, x = new_state_7_cast_fp16)[name = string("op_1543_cast_fp16")];
	tensor<int32, [1]> var_1545_axes_0 = const()[name = string("op_1545_axes_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1024, 3]> var_1545_cast_fp16 = squeeze(axes = var_1545_axes_0, x = var_1543_cast_fp16)[name = string("op_1545_cast_fp16")];
	string conv_out_7_pad_type_0 = const()[name = string("conv_out_7_pad_type_0"), val = string("valid")];
	int32 conv_out_7_groups_0 = const()[name = string("conv_out_7_groups_0"), val = int32(1024)];
	tensor<int32, [2]> conv_out_7_strides_0 = const()[name = string("conv_out_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> conv_out_7_pad_0 = const()[name = string("conv_out_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> conv_out_7_dilations_0 = const()[name = string("conv_out_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<fp16, [1024, 1, 1, 3]> layers_4_conv_conv_weight_promoted_to_fp16 = const()[name = string("layers_4_conv_conv_weight_promoted_to_fp16"), val = tensor<fp16, [1024, 1, 1, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(818983232)))];
	tensor<fp16, [1, 1024, 1, 1]> conv_out_7_cast_fp16 = conv(dilations = conv_out_7_dilations_0, groups = conv_out_7_groups_0, pad = conv_out_7_pad_0, pad_type = conv_out_7_pad_type_0, strides = conv_out_7_strides_0, weight = layers_4_conv_conv_weight_promoted_to_fp16, x = new_state_7_cast_fp16)[name = string("conv_out_7_cast_fp16")];
	tensor<fp16, [1, 1024, 1, 1]> input_65_cast_fp16 = mul(x = var_1523_1, y = conv_out_7_cast_fp16)[name = string("input_65_cast_fp16")];
	string y_7_pad_type_0 = const()[name = string("y_7_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> y_7_strides_0 = const()[name = string("y_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> y_7_pad_0 = const()[name = string("y_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> y_7_dilations_0 = const()[name = string("y_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 y_7_groups_0 = const()[name = string("y_7_groups_0"), val = int32(1)];
	tensor<fp16, [1024, 1024, 1, 1]> layers_4_conv_out_proj_weight_promoted_to_fp16 = const()[name = string("layers_4_conv_out_proj_weight_promoted_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(818989440)))];
	tensor<fp16, [1, 1024, 1, 1]> y_7_cast_fp16 = conv(dilations = y_7_dilations_0, groups = y_7_groups_0, pad = y_7_pad_0, pad_type = y_7_pad_type_0, strides = y_7_strides_0, weight = layers_4_conv_out_proj_weight_promoted_to_fp16, x = input_65_cast_fp16)[name = string("y_7_cast_fp16")];
	tensor<int32, [1]> var_1571_axes_0 = const()[name = string("op_1571_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_1571_cast_fp16 = squeeze(axes = var_1571_axes_0, x = y_7_cast_fp16)[name = string("op_1571_cast_fp16")];
	tensor<int32, [3]> var_1575 = const()[name = string("op_1575"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> op_out_9_cast_fp16 = transpose(perm = var_1575, x = var_1571_cast_fp16)[name = string("transpose_133")];
	tensor<fp16, [1, 1, 1024]> x_23_cast_fp16 = add(x = x_21_cast_fp16, y = op_out_9_cast_fp16)[name = string("x_23_cast_fp16")];
	fp16 const_13_promoted_to_fp16 = const()[name = string("const_13_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_1579_cast_fp16 = mul(x = x_23_cast_fp16, y = const_13_promoted_to_fp16)[name = string("op_1579_cast_fp16")];
	int32 var_1581 = const()[name = string("op_1581"), val = int32(-1)];
	bool input_67_interleave_0 = const()[name = string("input_67_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_67_cast_fp16 = concat(axis = var_1581, interleave = input_67_interleave_0, values = (x_23_cast_fp16, var_1579_cast_fp16))[name = string("input_67_cast_fp16")];
	tensor<int32, [1]> normed_31_axes_0 = const()[name = string("normed_31_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1587_to_fp16 = const()[name = string("op_1587_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_31_cast_fp16 = layer_norm(axes = normed_31_axes_0, epsilon = var_1587_to_fp16, x = input_67_cast_fp16)[name = string("normed_31_cast_fp16")];
	tensor<int32, [2]> var_1590_split_sizes_0 = const()[name = string("op_1590_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_1590_axis_0 = const()[name = string("op_1590_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_1590_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_1590_cast_fp16_1 = split(axis = var_1590_axis_0, split_sizes = var_1590_split_sizes_0, x = normed_31_cast_fp16)[name = string("op_1590_cast_fp16")];
	tensor<fp16, [1024]> layers_4_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_4_ffn_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(821086656)))];
	tensor<fp16, [1, 1, 1024]> normed_33_cast_fp16 = mul(x = var_1590_cast_fp16_0, y = layers_4_ffn_norm_weight_promoted_to_fp16)[name = string("normed_33_cast_fp16")];
	tensor<int32, [3]> var_1596 = const()[name = string("op_1596"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_1599_axes_0 = const()[name = string("op_1599_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_1597_cast_fp16 = transpose(perm = var_1596, x = normed_33_cast_fp16)[name = string("transpose_132")];
	tensor<fp16, [1, 1024, 1, 1]> var_1599_cast_fp16 = expand_dims(axes = var_1599_axes_0, x = var_1597_cast_fp16)[name = string("op_1599_cast_fp16")];
	string input_71_pad_type_0 = const()[name = string("input_71_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> input_71_strides_0 = const()[name = string("input_71_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> input_71_pad_0 = const()[name = string("input_71_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> input_71_dilations_0 = const()[name = string("input_71_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 input_71_groups_0 = const()[name = string("input_71_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> input_71 = conv(dilations = input_71_dilations_0, groups = input_71_groups_0, pad = input_71_pad_0, pad_type = input_71_pad_type_0, strides = input_71_strides_0, weight = layers_4_feed_forward_w1_weight, x = var_1599_cast_fp16)[name = string("input_71")];
	string b_9_pad_type_0 = const()[name = string("b_9_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> b_9_strides_0 = const()[name = string("b_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> b_9_pad_0 = const()[name = string("b_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> b_9_dilations_0 = const()[name = string("b_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 b_9_groups_0 = const()[name = string("b_9_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> b_9 = conv(dilations = b_9_dilations_0, groups = b_9_groups_0, pad = b_9_pad_0, pad_type = b_9_pad_type_0, strides = b_9_strides_0, weight = layers_4_feed_forward_w3_weight, x = var_1599_cast_fp16)[name = string("b_9")];
	tensor<fp16, [1, 4608, 1, 1]> var_1627 = silu(x = input_71)[name = string("op_1627")];
	tensor<fp16, [1, 4608, 1, 1]> input_73 = mul(x = var_1627, y = b_9)[name = string("input_73")];
	string mlp_17_pad_type_0 = const()[name = string("mlp_17_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_17_strides_0 = const()[name = string("mlp_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_17_pad_0 = const()[name = string("mlp_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_17_dilations_0 = const()[name = string("mlp_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_17_groups_0 = const()[name = string("mlp_17_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> mlp_17 = conv(dilations = mlp_17_dilations_0, groups = mlp_17_groups_0, pad = mlp_17_pad_0, pad_type = mlp_17_pad_type_0, strides = mlp_17_strides_0, weight = layers_4_feed_forward_w2_weight, x = input_73)[name = string("mlp_17")];
	tensor<int32, [1]> var_1641_axes_0 = const()[name = string("op_1641_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_1641 = squeeze(axes = var_1641_axes_0, x = mlp_17)[name = string("op_1641")];
	tensor<int32, [3]> var_1645 = const()[name = string("op_1645"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> mlp_19 = transpose(perm = var_1645, x = var_1641)[name = string("transpose_131")];
	tensor<fp16, [1, 1, 1024]> x_25_cast_fp16 = add(x = x_23_cast_fp16, y = mlp_19)[name = string("x_25_cast_fp16")];
	fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_1649_cast_fp16 = mul(x = x_25_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_1649_cast_fp16")];
	int32 var_1651 = const()[name = string("op_1651"), val = int32(-1)];
	bool input_75_interleave_0 = const()[name = string("input_75_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_75_cast_fp16 = concat(axis = var_1651, interleave = input_75_interleave_0, values = (x_25_cast_fp16, var_1649_cast_fp16))[name = string("input_75_cast_fp16")];
	tensor<int32, [1]> normed_35_axes_0 = const()[name = string("normed_35_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1657_to_fp16 = const()[name = string("op_1657_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_35_cast_fp16 = layer_norm(axes = normed_35_axes_0, epsilon = var_1657_to_fp16, x = input_75_cast_fp16)[name = string("normed_35_cast_fp16")];
	tensor<int32, [2]> var_1660_split_sizes_0 = const()[name = string("op_1660_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_1660_axis_0 = const()[name = string("op_1660_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_1660_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_1660_cast_fp16_1 = split(axis = var_1660_axis_0, split_sizes = var_1660_split_sizes_0, x = normed_35_cast_fp16)[name = string("op_1660_cast_fp16")];
	tensor<fp16, [1024]> layers_5_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_5_operator_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(821088768)))];
	tensor<fp16, [1, 1, 1024]> hidden_states_11_cast_fp16 = mul(x = var_1660_cast_fp16_0, y = layers_5_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_11_cast_fp16")];
	tensor<int32, [3]> var_1666 = const()[name = string("op_1666"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_1669_axes_0 = const()[name = string("op_1669_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_1667_cast_fp16 = transpose(perm = var_1666, x = hidden_states_11_cast_fp16)[name = string("transpose_130")];
	tensor<fp16, [1, 1024, 1, 1]> var_1669_cast_fp16 = expand_dims(axes = var_1669_axes_0, x = var_1667_cast_fp16)[name = string("op_1669_cast_fp16")];
	string var_1685_pad_type_0 = const()[name = string("op_1685_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_1685_strides_0 = const()[name = string("op_1685_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_1685_pad_0 = const()[name = string("op_1685_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_1685_dilations_0 = const()[name = string("op_1685_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_1685_groups_0 = const()[name = string("op_1685_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> var_1685 = conv(dilations = var_1685_dilations_0, groups = var_1685_groups_0, pad = var_1685_pad_0, pad_type = var_1685_pad_type_0, strides = var_1685_strides_0, weight = layers_5_self_attn_q_proj_weight, x = var_1669_cast_fp16)[name = string("op_1685")];
	tensor<int32, [4]> var_1690 = const()[name = string("op_1690"), val = tensor<int32, [4]>([1, 16, 64, 1])];
	tensor<fp16, [1, 16, 64, 1]> var_1691 = reshape(shape = var_1690, x = var_1685)[name = string("op_1691")];
	tensor<int32, [4]> var_1696 = const()[name = string("op_1696"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	string var_1713_pad_type_0 = const()[name = string("op_1713_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_1713_strides_0 = const()[name = string("op_1713_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_1713_pad_0 = const()[name = string("op_1713_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_1713_dilations_0 = const()[name = string("op_1713_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_1713_groups_0 = const()[name = string("op_1713_groups_0"), val = int32(1)];
	tensor<fp16, [1, 512, 1, 1]> var_1713 = conv(dilations = var_1713_dilations_0, groups = var_1713_groups_0, pad = var_1713_pad_0, pad_type = var_1713_pad_type_0, strides = var_1713_strides_0, weight = layers_5_self_attn_k_proj_weight, x = var_1669_cast_fp16)[name = string("op_1713")];
	tensor<int32, [4]> var_1718 = const()[name = string("op_1718"), val = tensor<int32, [4]>([1, 8, 64, 1])];
	tensor<fp16, [1, 8, 64, 1]> var_1719 = reshape(shape = var_1718, x = var_1713)[name = string("op_1719")];
	tensor<int32, [4]> var_1724 = const()[name = string("op_1724"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	string var_1741_pad_type_0 = const()[name = string("op_1741_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_1741_strides_0 = const()[name = string("op_1741_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_1741_pad_0 = const()[name = string("op_1741_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_1741_dilations_0 = const()[name = string("op_1741_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_1741_groups_0 = const()[name = string("op_1741_groups_0"), val = int32(1)];
	tensor<fp16, [1, 512, 1, 1]> var_1741 = conv(dilations = var_1741_dilations_0, groups = var_1741_groups_0, pad = var_1741_pad_0, pad_type = var_1741_pad_type_0, strides = var_1741_strides_0, weight = layers_5_self_attn_v_proj_weight, x = var_1669_cast_fp16)[name = string("op_1741")];
	tensor<int32, [4]> var_1746 = const()[name = string("op_1746"), val = tensor<int32, [4]>([1, 8, 64, 1])];
	tensor<fp16, [1, 8, 64, 1]> var_1747 = reshape(shape = var_1746, x = var_1741)[name = string("op_1747")];
	tensor<int32, [4]> var_1752 = const()[name = string("op_1752"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	fp16 const_15_promoted = const()[name = string("const_15_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 16, 1, 64]> var_1697 = transpose(perm = var_1696, x = var_1691)[name = string("transpose_129")];
	tensor<fp16, [1, 16, 1, 64]> var_1759 = mul(x = var_1697, y = const_15_promoted)[name = string("op_1759")];
	int32 var_1761 = const()[name = string("op_1761"), val = int32(-1)];
	bool input_79_interleave_0 = const()[name = string("input_79_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 16, 1, 128]> input_79 = concat(axis = var_1761, interleave = input_79_interleave_0, values = (var_1697, var_1759))[name = string("input_79")];
	tensor<int32, [1]> normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1767_to_fp16 = const()[name = string("op_1767_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 16, 1, 128]> normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_1767_to_fp16, x = input_79)[name = string("normed_37_cast_fp16")];
	tensor<int32, [2]> var_1770_split_sizes_0 = const()[name = string("op_1770_split_sizes_0"), val = tensor<int32, [2]>([64, 64])];
	int32 var_1770_axis_0 = const()[name = string("op_1770_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 16, 1, 64]> var_1770_0, tensor<fp16, [1, 16, 1, 64]> var_1770_1 = split(axis = var_1770_axis_0, split_sizes = var_1770_split_sizes_0, x = normed_37_cast_fp16)[name = string("op_1770")];
	tensor<fp16, [1, 16, 1, 64]> q_5 = mul(x = var_1770_0, y = layers_5_self_attn_q_layernorm_weight)[name = string("q_5")];
	fp16 const_16_promoted = const()[name = string("const_16_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 64]> var_1725 = transpose(perm = var_1724, x = var_1719)[name = string("transpose_128")];
	tensor<fp16, [1, 8, 1, 64]> var_1773 = mul(x = var_1725, y = const_16_promoted)[name = string("op_1773")];
	int32 var_1775 = const()[name = string("op_1775"), val = int32(-1)];
	bool input_81_interleave_0 = const()[name = string("input_81_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 128]> input_81 = concat(axis = var_1775, interleave = input_81_interleave_0, values = (var_1725, var_1773))[name = string("input_81")];
	tensor<int32, [1]> normed_39_axes_0 = const()[name = string("normed_39_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1781_to_fp16 = const()[name = string("op_1781_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 8, 1, 128]> normed_39_cast_fp16 = layer_norm(axes = normed_39_axes_0, epsilon = var_1781_to_fp16, x = input_81)[name = string("normed_39_cast_fp16")];
	tensor<int32, [2]> var_1784_split_sizes_0 = const()[name = string("op_1784_split_sizes_0"), val = tensor<int32, [2]>([64, 64])];
	int32 var_1784_axis_0 = const()[name = string("op_1784_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 64]> var_1784_0, tensor<fp16, [1, 8, 1, 64]> var_1784_1 = split(axis = var_1784_axis_0, split_sizes = var_1784_split_sizes_0, x = normed_39_cast_fp16)[name = string("op_1784")];
	tensor<fp16, [1, 8, 1, 64]> k_5 = mul(x = var_1784_0, y = layers_5_self_attn_k_layernorm_weight)[name = string("k_5")];
	tensor<fp16, [1, 16, 1, 64]> var_1787 = mul(x = q_5, y = cos)[name = string("op_1787")];
	tensor<int32, [2]> var_1788_split_sizes_0 = const()[name = string("op_1788_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
	int32 var_1788_axis_0 = const()[name = string("op_1788_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 16, 1, 32]> var_1788_0, tensor<fp16, [1, 16, 1, 32]> var_1788_1 = split(axis = var_1788_axis_0, split_sizes = var_1788_split_sizes_0, x = q_5)[name = string("op_1788")];
	fp16 const_17_promoted = const()[name = string("const_17_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 16, 1, 32]> var_1790 = mul(x = var_1788_1, y = const_17_promoted)[name = string("op_1790")];
	int32 var_1792 = const()[name = string("op_1792"), val = int32(-1)];
	bool var_1793_interleave_0 = const()[name = string("op_1793_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 16, 1, 64]> var_1793 = concat(axis = var_1792, interleave = var_1793_interleave_0, values = (var_1790, var_1788_0))[name = string("op_1793")];
	tensor<fp16, [1, 16, 1, 64]> var_1794 = mul(x = var_1793, y = sin)[name = string("op_1794")];
	tensor<fp16, [1, 16, 1, 64]> q_7 = add(x = var_1787, y = var_1794)[name = string("q_7")];
	tensor<fp16, [1, 8, 1, 64]> var_1797 = mul(x = k_5, y = cos)[name = string("op_1797")];
	tensor<int32, [2]> var_1798_split_sizes_0 = const()[name = string("op_1798_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
	int32 var_1798_axis_0 = const()[name = string("op_1798_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 32]> var_1798_0, tensor<fp16, [1, 8, 1, 32]> var_1798_1 = split(axis = var_1798_axis_0, split_sizes = var_1798_split_sizes_0, x = k_5)[name = string("op_1798")];
	fp16 const_18_promoted = const()[name = string("const_18_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 32]> var_1800 = mul(x = var_1798_1, y = const_18_promoted)[name = string("op_1800")];
	int32 var_1802 = const()[name = string("op_1802"), val = int32(-1)];
	bool var_1803_interleave_0 = const()[name = string("op_1803_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 64]> var_1803 = concat(axis = var_1802, interleave = var_1803_interleave_0, values = (var_1800, var_1798_0))[name = string("op_1803")];
	tensor<fp16, [1, 8, 1, 64]> var_1804 = mul(x = var_1803, y = sin)[name = string("op_1804")];
	tensor<fp16, [1, 8, 1, 64]> k_7 = add(x = var_1797, y = var_1804)[name = string("k_7")];
	tensor<int32, [4]> var_1809_begin_0 = const()[name = string("op_1809_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
	tensor<int32, [4]> var_1809_end_0 = const()[name = string("op_1809_end_0"), val = tensor<int32, [4]>([2, 8, 2048, 64])];
	tensor<bool, [4]> var_1809_end_mask_0 = const()[name = string("op_1809_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> var_1809_squeeze_mask_0 = const()[name = string("op_1809_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [8, 2048, 64]> var_1809_cast_fp16 = slice_by_index(begin = var_1809_begin_0, end = var_1809_end_0, end_mask = var_1809_end_mask_0, squeeze_mask = var_1809_squeeze_mask_0, x = coreml_update_state_13)[name = string("op_1809_cast_fp16")];
	tensor<int32, [1]> K_cache_3_axes_0 = const()[name = string("K_cache_3_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 8, 2048, 64]> K_cache_3_cast_fp16 = expand_dims(axes = K_cache_3_axes_0, x = var_1809_cast_fp16)[name = string("K_cache_3_cast_fp16")];
	tensor<int32, [4]> var_1814_begin_0 = const()[name = string("op_1814_begin_0"), val = tensor<int32, [4]>([7, 0, 0, 0])];
	tensor<int32, [4]> var_1814_end_0 = const()[name = string("op_1814_end_0"), val = tensor<int32, [4]>([8, 8, 2048, 64])];
	tensor<bool, [4]> var_1814_end_mask_0 = const()[name = string("op_1814_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> var_1814_squeeze_mask_0 = const()[name = string("op_1814_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [8, 2048, 64]> var_1814_cast_fp16 = slice_by_index(begin = var_1814_begin_0, end = var_1814_end_0, end_mask = var_1814_end_mask_0, squeeze_mask = var_1814_squeeze_mask_0, x = coreml_update_state_13)[name = string("op_1814_cast_fp16")];
	tensor<int32, [1]> V_cache_3_axes_0 = const()[name = string("V_cache_3_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 8, 2048, 64]> V_cache_3_cast_fp16 = expand_dims(axes = V_cache_3_axes_0, x = var_1814_cast_fp16)[name = string("V_cache_3_cast_fp16")];
	tensor<int32, [4]> k_b_3_reps_0 = const()[name = string("k_b_3_reps_0"), val = tensor<int32, [4]>([1, 1, 2048, 1])];
	tensor<fp16, [1, 8, 2048, 64]> k_b_3 = tile(reps = k_b_3_reps_0, x = k_7)[name = string("k_b_3")];
	tensor<int32, [4]> v_b_3_reps_0 = const()[name = string("v_b_3_reps_0"), val = tensor<int32, [4]>([1, 1, 2048, 1])];
	tensor<fp16, [1, 8, 1, 64]> var_1753 = transpose(perm = var_1752, x = var_1747)[name = string("transpose_127")];
	tensor<fp16, [1, 8, 2048, 64]> v_b_3 = tile(reps = v_b_3_reps_0, x = var_1753)[name = string("v_b_3")];
	tensor<fp16, [1, 8, 2048, 64]> var_1822_cast_fp16 = mul(x = K_cache_3_cast_fp16, y = var_1132_cast_fp16)[name = string("op_1822_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> var_1823_cast_fp16 = mul(x = k_b_3, y = update_mask)[name = string("op_1823_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> K_new_3_cast_fp16 = add(x = var_1822_cast_fp16, y = var_1823_cast_fp16)[name = string("K_new_3_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> var_1829_cast_fp16 = mul(x = V_cache_3_cast_fp16, y = var_1132_cast_fp16)[name = string("op_1829_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> var_1830_cast_fp16 = mul(x = v_b_3, y = update_mask)[name = string("op_1830_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> V_new_3_cast_fp16 = add(x = var_1829_cast_fp16, y = var_1830_cast_fp16)[name = string("V_new_3_cast_fp16")];
	tensor<int32, [1]> var_1834_axes_0 = const()[name = string("op_1834_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [8, 2048, 64]> var_1834_cast_fp16 = squeeze(axes = var_1834_axes_0, x = K_new_3_cast_fp16)[name = string("op_1834_cast_fp16")];
	tensor<int32, [4]> concat_8 = const()[name = string("concat_8"), val = tensor<int32, [4]>([1, 0, 0, 0])];
	tensor<int32, [4]> concat_9 = const()[name = string("concat_9"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [4]> kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [12, 8, 2048, 64]> kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_8, begin_mask = kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_9, end_mask = kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_3_stride_0, update = var_1834_cast_fp16, x = coreml_update_state_13)[name = string("kv_cache_0_internal_tensor_assign_3_cast_fp16")];
	write_state(data = kv_cache_0_internal_tensor_assign_3_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_14_write_state")];
	tensor<fp16, [12, 8, 2048, 64]> coreml_update_state_14 = read_state(input = kv_cache_0)[name = string("coreml_update_state_14")];
	tensor<int32, [1]> var_1841_axes_0 = const()[name = string("op_1841_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [8, 2048, 64]> var_1841_cast_fp16 = squeeze(axes = var_1841_axes_0, x = V_new_3_cast_fp16)[name = string("op_1841_cast_fp16")];
	tensor<int32, [4]> concat_10 = const()[name = string("concat_10"), val = tensor<int32, [4]>([7, 0, 0, 0])];
	tensor<int32, [4]> concat_11 = const()[name = string("concat_11"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [4]> kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [12, 8, 2048, 64]> kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_10, begin_mask = kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_11, end_mask = kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_4_stride_0, update = var_1841_cast_fp16, x = coreml_update_state_14)[name = string("kv_cache_0_internal_tensor_assign_4_cast_fp16")];
	write_state(data = kv_cache_0_internal_tensor_assign_4_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_15_write_state")];
	tensor<fp16, [12, 8, 2048, 64]> coreml_update_state_15 = read_state(input = kv_cache_0)[name = string("coreml_update_state_15")];
	tensor<int32, [4]> transpose_4_perm_0 = const()[name = string("transpose_4_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	tensor<int32, [4]> tile_2_reps_0 = const()[name = string("tile_2_reps_0"), val = tensor<int32, [4]>([2, 1, 1, 1])];
	tensor<fp16, [8, 1, 2048, 64]> transpose_4_cast_fp16 = transpose(perm = transpose_4_perm_0, x = K_new_3_cast_fp16)[name = string("transpose_126")];
	tensor<fp16, [16, 1, 2048, 64]> tile_2_cast_fp16 = tile(reps = tile_2_reps_0, x = transpose_4_cast_fp16)[name = string("tile_2_cast_fp16")];
	tensor<int32, [5]> concat_12 = const()[name = string("concat_12"), val = tensor<int32, [5]>([2, 8, 1, 2048, 64])];
	tensor<fp16, [2, 8, 1, 2048, 64]> reshape_4_cast_fp16 = reshape(shape = concat_12, x = tile_2_cast_fp16)[name = string("reshape_4_cast_fp16")];
	tensor<int32, [5]> transpose_5_perm_0 = const()[name = string("transpose_5_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])];
	tensor<int32, [4]> concat_13 = const()[name = string("concat_13"), val = tensor<int32, [4]>([-1, 1, 2048, 64])];
	tensor<fp16, [8, 2, 1, 2048, 64]> transpose_5_cast_fp16 = transpose(perm = transpose_5_perm_0, x = reshape_4_cast_fp16)[name = string("transpose_125")];
	tensor<fp16, [16, 1, 2048, 64]> reshape_5_cast_fp16 = reshape(shape = concat_13, x = transpose_5_cast_fp16)[name = string("reshape_5_cast_fp16")];
	tensor<int32, [4]> transpose_25_perm_0 = const()[name = string("transpose_25_perm_0"), val = tensor<int32, [4]>([1, 0, 3, 2])];
	tensor<int32, [4]> transpose_6_perm_0 = const()[name = string("transpose_6_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	tensor<int32, [4]> tile_3_reps_0 = const()[name = string("tile_3_reps_0"), val = tensor<int32, [4]>([2, 1, 1, 1])];
	tensor<fp16, [8, 1, 2048, 64]> transpose_6_cast_fp16 = transpose(perm = transpose_6_perm_0, x = V_new_3_cast_fp16)[name = string("transpose_123")];
	tensor<fp16, [16, 1, 2048, 64]> tile_3_cast_fp16 = tile(reps = tile_3_reps_0, x = transpose_6_cast_fp16)[name = string("tile_3_cast_fp16")];
	tensor<int32, [5]> concat_14 = const()[name = string("concat_14"), val = tensor<int32, [5]>([2, 8, 1, 2048, 64])];
	tensor<fp16, [2, 8, 1, 2048, 64]> reshape_6_cast_fp16 = reshape(shape = concat_14, x = tile_3_cast_fp16)[name = string("reshape_6_cast_fp16")];
	tensor<int32, [5]> transpose_7_perm_0 = const()[name = string("transpose_7_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])];
	tensor<int32, [4]> concat_15 = const()[name = string("concat_15"), val = tensor<int32, [4]>([-1, 1, 2048, 64])];
	tensor<fp16, [8, 2, 1, 2048, 64]> transpose_7_cast_fp16 = transpose(perm = transpose_7_perm_0, x = reshape_6_cast_fp16)[name = string("transpose_122")];
	tensor<fp16, [16, 1, 2048, 64]> reshape_7_cast_fp16 = reshape(shape = concat_15, x = transpose_7_cast_fp16)[name = string("reshape_7_cast_fp16")];
	tensor<int32, [4]> V_e_3_perm_0 = const()[name = string("V_e_3_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	bool var_1868_transpose_x_0 = const()[name = string("op_1868_transpose_x_0"), val = bool(false)];
	bool var_1868_transpose_y_0 = const()[name = string("op_1868_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 16, 64, 2048]> transpose_25_cast_fp16 = transpose(perm = transpose_25_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_124")];
	tensor<fp16, [1, 16, 1, 2048]> var_1868_cast_fp16 = matmul(transpose_x = var_1868_transpose_x_0, transpose_y = var_1868_transpose_y_0, x = q_7, y = transpose_25_cast_fp16)[name = string("op_1868_cast_fp16")];
	fp16 var_1869_to_fp16 = const()[name = string("op_1869_to_fp16"), val = fp16(0x1p-3)];
	tensor<fp16, [1, 16, 1, 2048]> attn_7_cast_fp16 = mul(x = var_1868_cast_fp16, y = var_1869_to_fp16)[name = string("attn_7_cast_fp16")];
	tensor<fp16, [1, 16, 1, 2048]> attn_9_cast_fp16 = add(x = attn_7_cast_fp16, y = causal_mask)[name = string("attn_9_cast_fp16")];
	int32 var_1878 = const()[name = string("op_1878"), val = int32(-1)];
	tensor<fp16, [1, 16, 1, 2048]> var_1880_cast_fp16 = softmax(axis = var_1878, x = attn_9_cast_fp16)[name = string("op_1880_cast_fp16")];
	bool var_1896_transpose_x_0 = const()[name = string("op_1896_transpose_x_0"), val = bool(false)];
	bool var_1896_transpose_y_0 = const()[name = string("op_1896_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 16, 2048, 64]> V_e_3_cast_fp16 = transpose(perm = V_e_3_perm_0, x = reshape_7_cast_fp16)[name = string("transpose_121")];
	tensor<fp16, [1, 16, 1, 64]> var_1896_cast_fp16 = matmul(transpose_x = var_1896_transpose_x_0, transpose_y = var_1896_transpose_y_0, x = var_1880_cast_fp16, y = V_e_3_cast_fp16)[name = string("op_1896_cast_fp16")];
	tensor<int32, [4]> var_1906 = const()[name = string("op_1906"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_1913 = const()[name = string("op_1913"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 16, 64]> var_1907 = transpose(perm = var_1906, x = var_1896_cast_fp16)[name = string("transpose_120")];
	tensor<fp16, [1, 1, 1024]> out_7 = reshape(shape = var_1913, x = var_1907)[name = string("out_7")];
	tensor<int32, [3]> var_1918 = const()[name = string("op_1918"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1024, 1024, 1]> squeeze_1 = const()[name = string("squeeze_1"), val = tensor<fp16, [1024, 1024, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(821090880)))];
	string var_1934_pad_type_0 = const()[name = string("op_1934_pad_type_0"), val = string("valid")];
	int32 var_1934_groups_0 = const()[name = string("op_1934_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_1934_strides_0 = const()[name = string("op_1934_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_1934_pad_0 = const()[name = string("op_1934_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_1934_dilations_0 = const()[name = string("op_1934_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1, 1024, 1]> var_1919 = transpose(perm = var_1918, x = out_7)[name = string("transpose_119")];
	tensor<fp16, [1, 1024, 1]> var_1934 = conv(dilations = var_1934_dilations_0, groups = var_1934_groups_0, pad = var_1934_pad_0, pad_type = var_1934_pad_type_0, strides = var_1934_strides_0, weight = squeeze_1, x = var_1919)[name = string("op_1934")];
	tensor<int32, [3]> var_1938 = const()[name = string("op_1938"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> op_out_11 = transpose(perm = var_1938, x = var_1934)[name = string("transpose_118")];
	tensor<fp16, [1, 1, 1024]> x_31_cast_fp16 = add(x = x_25_cast_fp16, y = op_out_11)[name = string("x_31_cast_fp16")];
	fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_1942_cast_fp16 = mul(x = x_31_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_1942_cast_fp16")];
	int32 var_1944 = const()[name = string("op_1944"), val = int32(-1)];
	bool input_85_interleave_0 = const()[name = string("input_85_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_85_cast_fp16 = concat(axis = var_1944, interleave = input_85_interleave_0, values = (x_31_cast_fp16, var_1942_cast_fp16))[name = string("input_85_cast_fp16")];
	tensor<int32, [1]> normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1950_to_fp16 = const()[name = string("op_1950_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_1950_to_fp16, x = input_85_cast_fp16)[name = string("normed_41_cast_fp16")];
	tensor<int32, [2]> var_1953_split_sizes_0 = const()[name = string("op_1953_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_1953_axis_0 = const()[name = string("op_1953_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_1953_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_1953_cast_fp16_1 = split(axis = var_1953_axis_0, split_sizes = var_1953_split_sizes_0, x = normed_41_cast_fp16)[name = string("op_1953_cast_fp16")];
	tensor<fp16, [1024]> layers_5_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_5_ffn_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(823188096)))];
	tensor<fp16, [1, 1, 1024]> normed_43_cast_fp16 = mul(x = var_1953_cast_fp16_0, y = layers_5_ffn_norm_weight_promoted_to_fp16)[name = string("normed_43_cast_fp16")];
	tensor<int32, [3]> var_1959 = const()[name = string("op_1959"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_1962_axes_0 = const()[name = string("op_1962_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_1960_cast_fp16 = transpose(perm = var_1959, x = normed_43_cast_fp16)[name = string("transpose_117")];
	tensor<fp16, [1, 1024, 1, 1]> var_1962_cast_fp16 = expand_dims(axes = var_1962_axes_0, x = var_1960_cast_fp16)[name = string("op_1962_cast_fp16")];
	string input_89_pad_type_0 = const()[name = string("input_89_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> input_89_strides_0 = const()[name = string("input_89_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> input_89_pad_0 = const()[name = string("input_89_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> input_89_dilations_0 = const()[name = string("input_89_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 input_89_groups_0 = const()[name = string("input_89_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> input_89 = conv(dilations = input_89_dilations_0, groups = input_89_groups_0, pad = input_89_pad_0, pad_type = input_89_pad_type_0, strides = input_89_strides_0, weight = layers_5_feed_forward_w1_weight, x = var_1962_cast_fp16)[name = string("input_89")];
	string b_11_pad_type_0 = const()[name = string("b_11_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> b_11_strides_0 = const()[name = string("b_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> b_11_pad_0 = const()[name = string("b_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> b_11_dilations_0 = const()[name = string("b_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 b_11_groups_0 = const()[name = string("b_11_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> b_11 = conv(dilations = b_11_dilations_0, groups = b_11_groups_0, pad = b_11_pad_0, pad_type = b_11_pad_type_0, strides = b_11_strides_0, weight = layers_5_feed_forward_w3_weight, x = var_1962_cast_fp16)[name = string("b_11")];
	tensor<fp16, [1, 4608, 1, 1]> var_1990 = silu(x = input_89)[name = string("op_1990")];
	tensor<fp16, [1, 4608, 1, 1]> input_91 = mul(x = var_1990, y = b_11)[name = string("input_91")];
	string mlp_21_pad_type_0 = const()[name = string("mlp_21_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_21_strides_0 = const()[name = string("mlp_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_21_pad_0 = const()[name = string("mlp_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_21_dilations_0 = const()[name = string("mlp_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_21_groups_0 = const()[name = string("mlp_21_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> mlp_21 = conv(dilations = mlp_21_dilations_0, groups = mlp_21_groups_0, pad = mlp_21_pad_0, pad_type = mlp_21_pad_type_0, strides = mlp_21_strides_0, weight = layers_5_feed_forward_w2_weight, x = input_91)[name = string("mlp_21")];
	tensor<int32, [1]> var_2004_axes_0 = const()[name = string("op_2004_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_2004 = squeeze(axes = var_2004_axes_0, x = mlp_21)[name = string("op_2004")];
	tensor<int32, [3]> var_2008 = const()[name = string("op_2008"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> mlp_23 = transpose(perm = var_2008, x = var_2004)[name = string("transpose_116")];
	tensor<fp16, [1, 1, 1024]> x_33_cast_fp16 = add(x = x_31_cast_fp16, y = mlp_23)[name = string("x_33_cast_fp16")];
	fp16 const_20_promoted_to_fp16 = const()[name = string("const_20_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_2012_cast_fp16 = mul(x = x_33_cast_fp16, y = const_20_promoted_to_fp16)[name = string("op_2012_cast_fp16")];
	int32 var_2014 = const()[name = string("op_2014"), val = int32(-1)];
	bool input_93_interleave_0 = const()[name = string("input_93_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_93_cast_fp16 = concat(axis = var_2014, interleave = input_93_interleave_0, values = (x_33_cast_fp16, var_2012_cast_fp16))[name = string("input_93_cast_fp16")];
	tensor<int32, [1]> normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2020_to_fp16 = const()[name = string("op_2020_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_2020_to_fp16, x = input_93_cast_fp16)[name = string("normed_45_cast_fp16")];
	tensor<int32, [2]> var_2023_split_sizes_0 = const()[name = string("op_2023_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_2023_axis_0 = const()[name = string("op_2023_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_2023_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_2023_cast_fp16_1 = split(axis = var_2023_axis_0, split_sizes = var_2023_split_sizes_0, x = normed_45_cast_fp16)[name = string("op_2023_cast_fp16")];
	tensor<fp16, [1024]> layers_6_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_6_operator_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(823190208)))];
	tensor<fp16, [1, 1, 1024]> hidden_states_13_cast_fp16 = mul(x = var_2023_cast_fp16_0, y = layers_6_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_13_cast_fp16")];
	tensor<int32, [3]> var_2029 = const()[name = string("op_2029"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_2032_axes_0 = const()[name = string("op_2032_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_2030_cast_fp16 = transpose(perm = var_2029, x = hidden_states_13_cast_fp16)[name = string("transpose_115")];
	tensor<fp16, [1, 1024, 1, 1]> var_2032_cast_fp16 = expand_dims(axes = var_2032_axes_0, x = var_2030_cast_fp16)[name = string("op_2032_cast_fp16")];
	string BCx_9_pad_type_0 = const()[name = string("BCx_9_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> BCx_9_strides_0 = const()[name = string("BCx_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> BCx_9_pad_0 = const()[name = string("BCx_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> BCx_9_dilations_0 = const()[name = string("BCx_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 BCx_9_groups_0 = const()[name = string("BCx_9_groups_0"), val = int32(1)];
	tensor<fp16, [1, 3072, 1, 1]> BCx_9 = conv(dilations = BCx_9_dilations_0, groups = BCx_9_groups_0, pad = BCx_9_pad_0, pad_type = BCx_9_pad_type_0, strides = BCx_9_strides_0, weight = layers_6_conv_in_proj_weight, x = var_2032_cast_fp16)[name = string("BCx_9")];
	tensor<int32, [3]> var_2049_split_sizes_0 = const()[name = string("op_2049_split_sizes_0"), val = tensor<int32, [3]>([1024, 1024, 1024])];
	int32 var_2049_axis_0 = const()[name = string("op_2049_axis_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> var_2049_0, tensor<fp16, [1, 1024, 1, 1]> var_2049_1, tensor<fp16, [1, 1024, 1, 1]> var_2049_2 = split(axis = var_2049_axis_0, split_sizes = var_2049_split_sizes_0, x = BCx_9)[name = string("op_2049")];
	tensor<fp16, [1, 1024, 1, 1]> Bx_9 = mul(x = var_2049_0, y = var_2049_2)[name = string("Bx_9")];
	tensor<int32, [3]> var_2055_begin_0 = const()[name = string("op_2055_begin_0"), val = tensor<int32, [3]>([4, 0, 0])];
	tensor<int32, [3]> var_2055_end_0 = const()[name = string("op_2055_end_0"), val = tensor<int32, [3]>([5, 1024, 3])];
	tensor<bool, [3]> var_2055_end_mask_0 = const()[name = string("op_2055_end_mask_0"), val = tensor<bool, [3]>([false, true, true])];
	tensor<bool, [3]> var_2055_squeeze_mask_0 = const()[name = string("op_2055_squeeze_mask_0"), val = tensor<bool, [3]>([true, false, false])];
	tensor<fp16, [1024, 3]> var_2055_cast_fp16 = slice_by_index(begin = var_2055_begin_0, end = var_2055_end_0, end_mask = var_2055_end_mask_0, squeeze_mask = var_2055_squeeze_mask_0, x = conv_state_in)[name = string("op_2055_cast_fp16")];
	tensor<int32, [1]> var_2057_axes_0 = const()[name = string("op_2057_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 1024, 3]> var_2057_cast_fp16 = expand_dims(axes = var_2057_axes_0, x = var_2055_cast_fp16)[name = string("op_2057_cast_fp16")];
	tensor<int32, [1]> slot_9_axes_0 = const()[name = string("slot_9_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1, 3]> slot_9_cast_fp16 = expand_dims(axes = slot_9_axes_0, x = var_2057_cast_fp16)[name = string("slot_9_cast_fp16")];
	tensor<int32, [4]> live_tail_9_begin_0 = const()[name = string("live_tail_9_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1])];
	tensor<int32, [4]> live_tail_9_end_0 = const()[name = string("live_tail_9_end_0"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
	tensor<bool, [4]> live_tail_9_end_mask_0 = const()[name = string("live_tail_9_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
	tensor<fp16, [1, 1024, 1, 2]> live_tail_9_cast_fp16 = slice_by_index(begin = live_tail_9_begin_0, end = live_tail_9_end_0, end_mask = live_tail_9_end_mask_0, x = slot_9_cast_fp16)[name = string("live_tail_9_cast_fp16")];
	int32 var_2066 = const()[name = string("op_2066"), val = int32(-1)];
	bool new_state_9_interleave_0 = const()[name = string("new_state_9_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1024, 1, 3]> new_state_9_cast_fp16 = concat(axis = var_2066, interleave = new_state_9_interleave_0, values = (live_tail_9_cast_fp16, Bx_9))[name = string("new_state_9_cast_fp16")];
	tensor<int32, [1]> var_2069_axes_0 = const()[name = string("op_2069_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1024, 1, 3]> var_2069_cast_fp16 = squeeze(axes = var_2069_axes_0, x = new_state_9_cast_fp16)[name = string("op_2069_cast_fp16")];
	tensor<int32, [1]> var_2071_axes_0 = const()[name = string("op_2071_axes_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1024, 3]> var_2071_cast_fp16 = squeeze(axes = var_2071_axes_0, x = var_2069_cast_fp16)[name = string("op_2071_cast_fp16")];
	string conv_out_9_pad_type_0 = const()[name = string("conv_out_9_pad_type_0"), val = string("valid")];
	int32 conv_out_9_groups_0 = const()[name = string("conv_out_9_groups_0"), val = int32(1024)];
	tensor<int32, [2]> conv_out_9_strides_0 = const()[name = string("conv_out_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> conv_out_9_pad_0 = const()[name = string("conv_out_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> conv_out_9_dilations_0 = const()[name = string("conv_out_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<fp16, [1024, 1, 1, 3]> layers_6_conv_conv_weight_promoted_to_fp16 = const()[name = string("layers_6_conv_conv_weight_promoted_to_fp16"), val = tensor<fp16, [1024, 1, 1, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(823192320)))];
	tensor<fp16, [1, 1024, 1, 1]> conv_out_9_cast_fp16 = conv(dilations = conv_out_9_dilations_0, groups = conv_out_9_groups_0, pad = conv_out_9_pad_0, pad_type = conv_out_9_pad_type_0, strides = conv_out_9_strides_0, weight = layers_6_conv_conv_weight_promoted_to_fp16, x = new_state_9_cast_fp16)[name = string("conv_out_9_cast_fp16")];
	tensor<fp16, [1, 1024, 1, 1]> input_97_cast_fp16 = mul(x = var_2049_1, y = conv_out_9_cast_fp16)[name = string("input_97_cast_fp16")];
	string y_9_pad_type_0 = const()[name = string("y_9_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> y_9_strides_0 = const()[name = string("y_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> y_9_pad_0 = const()[name = string("y_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> y_9_dilations_0 = const()[name = string("y_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 y_9_groups_0 = const()[name = string("y_9_groups_0"), val = int32(1)];
	tensor<fp16, [1024, 1024, 1, 1]> layers_6_conv_out_proj_weight_promoted_to_fp16 = const()[name = string("layers_6_conv_out_proj_weight_promoted_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(823198528)))];
	tensor<fp16, [1, 1024, 1, 1]> y_9_cast_fp16 = conv(dilations = y_9_dilations_0, groups = y_9_groups_0, pad = y_9_pad_0, pad_type = y_9_pad_type_0, strides = y_9_strides_0, weight = layers_6_conv_out_proj_weight_promoted_to_fp16, x = input_97_cast_fp16)[name = string("y_9_cast_fp16")];
	tensor<int32, [1]> var_2097_axes_0 = const()[name = string("op_2097_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_2097_cast_fp16 = squeeze(axes = var_2097_axes_0, x = y_9_cast_fp16)[name = string("op_2097_cast_fp16")];
	tensor<int32, [3]> var_2101 = const()[name = string("op_2101"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> op_out_13_cast_fp16 = transpose(perm = var_2101, x = var_2097_cast_fp16)[name = string("transpose_114")];
	tensor<fp16, [1, 1, 1024]> x_35_cast_fp16 = add(x = x_33_cast_fp16, y = op_out_13_cast_fp16)[name = string("x_35_cast_fp16")];
	fp16 const_21_promoted_to_fp16 = const()[name = string("const_21_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_2105_cast_fp16 = mul(x = x_35_cast_fp16, y = const_21_promoted_to_fp16)[name = string("op_2105_cast_fp16")];
	int32 var_2107 = const()[name = string("op_2107"), val = int32(-1)];
	bool input_99_interleave_0 = const()[name = string("input_99_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_99_cast_fp16 = concat(axis = var_2107, interleave = input_99_interleave_0, values = (x_35_cast_fp16, var_2105_cast_fp16))[name = string("input_99_cast_fp16")];
	tensor<int32, [1]> normed_47_axes_0 = const()[name = string("normed_47_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2113_to_fp16 = const()[name = string("op_2113_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_47_cast_fp16 = layer_norm(axes = normed_47_axes_0, epsilon = var_2113_to_fp16, x = input_99_cast_fp16)[name = string("normed_47_cast_fp16")];
	tensor<int32, [2]> var_2116_split_sizes_0 = const()[name = string("op_2116_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_2116_axis_0 = const()[name = string("op_2116_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_2116_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_2116_cast_fp16_1 = split(axis = var_2116_axis_0, split_sizes = var_2116_split_sizes_0, x = normed_47_cast_fp16)[name = string("op_2116_cast_fp16")];
	tensor<fp16, [1024]> layers_6_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_6_ffn_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(825295744)))];
	tensor<fp16, [1, 1, 1024]> normed_49_cast_fp16 = mul(x = var_2116_cast_fp16_0, y = layers_6_ffn_norm_weight_promoted_to_fp16)[name = string("normed_49_cast_fp16")];
	tensor<int32, [3]> var_2122 = const()[name = string("op_2122"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_2125_axes_0 = const()[name = string("op_2125_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_2123_cast_fp16 = transpose(perm = var_2122, x = normed_49_cast_fp16)[name = string("transpose_113")];
	tensor<fp16, [1, 1024, 1, 1]> var_2125_cast_fp16 = expand_dims(axes = var_2125_axes_0, x = var_2123_cast_fp16)[name = string("op_2125_cast_fp16")];
	string input_103_pad_type_0 = const()[name = string("input_103_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> input_103_strides_0 = const()[name = string("input_103_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> input_103_pad_0 = const()[name = string("input_103_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> input_103_dilations_0 = const()[name = string("input_103_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 input_103_groups_0 = const()[name = string("input_103_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> input_103 = conv(dilations = input_103_dilations_0, groups = input_103_groups_0, pad = input_103_pad_0, pad_type = input_103_pad_type_0, strides = input_103_strides_0, weight = layers_6_feed_forward_w1_weight, x = var_2125_cast_fp16)[name = string("input_103")];
	string b_13_pad_type_0 = const()[name = string("b_13_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> b_13_strides_0 = const()[name = string("b_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> b_13_pad_0 = const()[name = string("b_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> b_13_dilations_0 = const()[name = string("b_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 b_13_groups_0 = const()[name = string("b_13_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> b_13 = conv(dilations = b_13_dilations_0, groups = b_13_groups_0, pad = b_13_pad_0, pad_type = b_13_pad_type_0, strides = b_13_strides_0, weight = layers_6_feed_forward_w3_weight, x = var_2125_cast_fp16)[name = string("b_13")];
	tensor<fp16, [1, 4608, 1, 1]> var_2153 = silu(x = input_103)[name = string("op_2153")];
	tensor<fp16, [1, 4608, 1, 1]> input_105 = mul(x = var_2153, y = b_13)[name = string("input_105")];
	string mlp_25_pad_type_0 = const()[name = string("mlp_25_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_25_strides_0 = const()[name = string("mlp_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_25_pad_0 = const()[name = string("mlp_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_25_dilations_0 = const()[name = string("mlp_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_25_groups_0 = const()[name = string("mlp_25_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> mlp_25 = conv(dilations = mlp_25_dilations_0, groups = mlp_25_groups_0, pad = mlp_25_pad_0, pad_type = mlp_25_pad_type_0, strides = mlp_25_strides_0, weight = layers_6_feed_forward_w2_weight, x = input_105)[name = string("mlp_25")];
	tensor<int32, [1]> var_2167_axes_0 = const()[name = string("op_2167_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_2167 = squeeze(axes = var_2167_axes_0, x = mlp_25)[name = string("op_2167")];
	tensor<int32, [3]> var_2171 = const()[name = string("op_2171"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> mlp_27 = transpose(perm = var_2171, x = var_2167)[name = string("transpose_112")];
	tensor<fp16, [1, 1, 1024]> x_37_cast_fp16 = add(x = x_35_cast_fp16, y = mlp_27)[name = string("x_37_cast_fp16")];
	fp16 const_22_promoted_to_fp16 = const()[name = string("const_22_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_2175_cast_fp16 = mul(x = x_37_cast_fp16, y = const_22_promoted_to_fp16)[name = string("op_2175_cast_fp16")];
	int32 var_2177 = const()[name = string("op_2177"), val = int32(-1)];
	bool input_107_interleave_0 = const()[name = string("input_107_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_107_cast_fp16 = concat(axis = var_2177, interleave = input_107_interleave_0, values = (x_37_cast_fp16, var_2175_cast_fp16))[name = string("input_107_cast_fp16")];
	tensor<int32, [1]> normed_51_axes_0 = const()[name = string("normed_51_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2183_to_fp16 = const()[name = string("op_2183_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_51_cast_fp16 = layer_norm(axes = normed_51_axes_0, epsilon = var_2183_to_fp16, x = input_107_cast_fp16)[name = string("normed_51_cast_fp16")];
	tensor<int32, [2]> var_2186_split_sizes_0 = const()[name = string("op_2186_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_2186_axis_0 = const()[name = string("op_2186_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_2186_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_2186_cast_fp16_1 = split(axis = var_2186_axis_0, split_sizes = var_2186_split_sizes_0, x = normed_51_cast_fp16)[name = string("op_2186_cast_fp16")];
	tensor<fp16, [1024]> layers_7_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_7_operator_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(825297856)))];
	tensor<fp16, [1, 1, 1024]> hidden_states_15_cast_fp16 = mul(x = var_2186_cast_fp16_0, y = layers_7_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_15_cast_fp16")];
	tensor<int32, [3]> var_2192 = const()[name = string("op_2192"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_2195_axes_0 = const()[name = string("op_2195_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_2193_cast_fp16 = transpose(perm = var_2192, x = hidden_states_15_cast_fp16)[name = string("transpose_111")];
	tensor<fp16, [1, 1024, 1, 1]> var_2195_cast_fp16 = expand_dims(axes = var_2195_axes_0, x = var_2193_cast_fp16)[name = string("op_2195_cast_fp16")];
	string BCx_11_pad_type_0 = const()[name = string("BCx_11_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> BCx_11_strides_0 = const()[name = string("BCx_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> BCx_11_pad_0 = const()[name = string("BCx_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> BCx_11_dilations_0 = const()[name = string("BCx_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 BCx_11_groups_0 = const()[name = string("BCx_11_groups_0"), val = int32(1)];
	tensor<fp16, [1, 3072, 1, 1]> BCx_11 = conv(dilations = BCx_11_dilations_0, groups = BCx_11_groups_0, pad = BCx_11_pad_0, pad_type = BCx_11_pad_type_0, strides = BCx_11_strides_0, weight = layers_7_conv_in_proj_weight, x = var_2195_cast_fp16)[name = string("BCx_11")];
	tensor<int32, [3]> var_2212_split_sizes_0 = const()[name = string("op_2212_split_sizes_0"), val = tensor<int32, [3]>([1024, 1024, 1024])];
	int32 var_2212_axis_0 = const()[name = string("op_2212_axis_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> var_2212_0, tensor<fp16, [1, 1024, 1, 1]> var_2212_1, tensor<fp16, [1, 1024, 1, 1]> var_2212_2 = split(axis = var_2212_axis_0, split_sizes = var_2212_split_sizes_0, x = BCx_11)[name = string("op_2212")];
	tensor<fp16, [1, 1024, 1, 1]> Bx_11 = mul(x = var_2212_0, y = var_2212_2)[name = string("Bx_11")];
	tensor<int32, [3]> var_2218_begin_0 = const()[name = string("op_2218_begin_0"), val = tensor<int32, [3]>([5, 0, 0])];
	tensor<int32, [3]> var_2218_end_0 = const()[name = string("op_2218_end_0"), val = tensor<int32, [3]>([6, 1024, 3])];
	tensor<bool, [3]> var_2218_end_mask_0 = const()[name = string("op_2218_end_mask_0"), val = tensor<bool, [3]>([false, true, true])];
	tensor<bool, [3]> var_2218_squeeze_mask_0 = const()[name = string("op_2218_squeeze_mask_0"), val = tensor<bool, [3]>([true, false, false])];
	tensor<fp16, [1024, 3]> var_2218_cast_fp16 = slice_by_index(begin = var_2218_begin_0, end = var_2218_end_0, end_mask = var_2218_end_mask_0, squeeze_mask = var_2218_squeeze_mask_0, x = conv_state_in)[name = string("op_2218_cast_fp16")];
	tensor<int32, [1]> var_2220_axes_0 = const()[name = string("op_2220_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 1024, 3]> var_2220_cast_fp16 = expand_dims(axes = var_2220_axes_0, x = var_2218_cast_fp16)[name = string("op_2220_cast_fp16")];
	tensor<int32, [1]> slot_11_axes_0 = const()[name = string("slot_11_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1, 3]> slot_11_cast_fp16 = expand_dims(axes = slot_11_axes_0, x = var_2220_cast_fp16)[name = string("slot_11_cast_fp16")];
	tensor<int32, [4]> live_tail_11_begin_0 = const()[name = string("live_tail_11_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1])];
	tensor<int32, [4]> live_tail_11_end_0 = const()[name = string("live_tail_11_end_0"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
	tensor<bool, [4]> live_tail_11_end_mask_0 = const()[name = string("live_tail_11_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
	tensor<fp16, [1, 1024, 1, 2]> live_tail_11_cast_fp16 = slice_by_index(begin = live_tail_11_begin_0, end = live_tail_11_end_0, end_mask = live_tail_11_end_mask_0, x = slot_11_cast_fp16)[name = string("live_tail_11_cast_fp16")];
	int32 var_2229 = const()[name = string("op_2229"), val = int32(-1)];
	bool new_state_11_interleave_0 = const()[name = string("new_state_11_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1024, 1, 3]> new_state_11_cast_fp16 = concat(axis = var_2229, interleave = new_state_11_interleave_0, values = (live_tail_11_cast_fp16, Bx_11))[name = string("new_state_11_cast_fp16")];
	tensor<int32, [1]> var_2232_axes_0 = const()[name = string("op_2232_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1024, 1, 3]> var_2232_cast_fp16 = squeeze(axes = var_2232_axes_0, x = new_state_11_cast_fp16)[name = string("op_2232_cast_fp16")];
	tensor<int32, [1]> var_2234_axes_0 = const()[name = string("op_2234_axes_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1024, 3]> var_2234_cast_fp16 = squeeze(axes = var_2234_axes_0, x = var_2232_cast_fp16)[name = string("op_2234_cast_fp16")];
	string conv_out_11_pad_type_0 = const()[name = string("conv_out_11_pad_type_0"), val = string("valid")];
	int32 conv_out_11_groups_0 = const()[name = string("conv_out_11_groups_0"), val = int32(1024)];
	tensor<int32, [2]> conv_out_11_strides_0 = const()[name = string("conv_out_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> conv_out_11_pad_0 = const()[name = string("conv_out_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> conv_out_11_dilations_0 = const()[name = string("conv_out_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<fp16, [1024, 1, 1, 3]> layers_7_conv_conv_weight_promoted_to_fp16 = const()[name = string("layers_7_conv_conv_weight_promoted_to_fp16"), val = tensor<fp16, [1024, 1, 1, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(825299968)))];
	tensor<fp16, [1, 1024, 1, 1]> conv_out_11_cast_fp16 = conv(dilations = conv_out_11_dilations_0, groups = conv_out_11_groups_0, pad = conv_out_11_pad_0, pad_type = conv_out_11_pad_type_0, strides = conv_out_11_strides_0, weight = layers_7_conv_conv_weight_promoted_to_fp16, x = new_state_11_cast_fp16)[name = string("conv_out_11_cast_fp16")];
	tensor<fp16, [1, 1024, 1, 1]> input_111_cast_fp16 = mul(x = var_2212_1, y = conv_out_11_cast_fp16)[name = string("input_111_cast_fp16")];
	string y_11_pad_type_0 = const()[name = string("y_11_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> y_11_strides_0 = const()[name = string("y_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> y_11_pad_0 = const()[name = string("y_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> y_11_dilations_0 = const()[name = string("y_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 y_11_groups_0 = const()[name = string("y_11_groups_0"), val = int32(1)];
	tensor<fp16, [1024, 1024, 1, 1]> layers_7_conv_out_proj_weight_promoted_to_fp16 = const()[name = string("layers_7_conv_out_proj_weight_promoted_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(825306176)))];
	tensor<fp16, [1, 1024, 1, 1]> y_11_cast_fp16 = conv(dilations = y_11_dilations_0, groups = y_11_groups_0, pad = y_11_pad_0, pad_type = y_11_pad_type_0, strides = y_11_strides_0, weight = layers_7_conv_out_proj_weight_promoted_to_fp16, x = input_111_cast_fp16)[name = string("y_11_cast_fp16")];
	tensor<int32, [1]> var_2260_axes_0 = const()[name = string("op_2260_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_2260_cast_fp16 = squeeze(axes = var_2260_axes_0, x = y_11_cast_fp16)[name = string("op_2260_cast_fp16")];
	tensor<int32, [3]> var_2264 = const()[name = string("op_2264"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> op_out_15_cast_fp16 = transpose(perm = var_2264, x = var_2260_cast_fp16)[name = string("transpose_110")];
	tensor<fp16, [1, 1, 1024]> x_39_cast_fp16 = add(x = x_37_cast_fp16, y = op_out_15_cast_fp16)[name = string("x_39_cast_fp16")];
	fp16 const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_2268_cast_fp16 = mul(x = x_39_cast_fp16, y = const_23_promoted_to_fp16)[name = string("op_2268_cast_fp16")];
	int32 var_2270 = const()[name = string("op_2270"), val = int32(-1)];
	bool input_113_interleave_0 = const()[name = string("input_113_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_113_cast_fp16 = concat(axis = var_2270, interleave = input_113_interleave_0, values = (x_39_cast_fp16, var_2268_cast_fp16))[name = string("input_113_cast_fp16")];
	tensor<int32, [1]> normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2276_to_fp16 = const()[name = string("op_2276_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_2276_to_fp16, x = input_113_cast_fp16)[name = string("normed_53_cast_fp16")];
	tensor<int32, [2]> var_2279_split_sizes_0 = const()[name = string("op_2279_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_2279_axis_0 = const()[name = string("op_2279_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_2279_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_2279_cast_fp16_1 = split(axis = var_2279_axis_0, split_sizes = var_2279_split_sizes_0, x = normed_53_cast_fp16)[name = string("op_2279_cast_fp16")];
	tensor<fp16, [1024]> layers_7_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_7_ffn_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(827403392)))];
	tensor<fp16, [1, 1, 1024]> normed_55_cast_fp16 = mul(x = var_2279_cast_fp16_0, y = layers_7_ffn_norm_weight_promoted_to_fp16)[name = string("normed_55_cast_fp16")];
	tensor<int32, [3]> var_2285 = const()[name = string("op_2285"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_2288_axes_0 = const()[name = string("op_2288_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_2286_cast_fp16 = transpose(perm = var_2285, x = normed_55_cast_fp16)[name = string("transpose_109")];
	tensor<fp16, [1, 1024, 1, 1]> var_2288_cast_fp16 = expand_dims(axes = var_2288_axes_0, x = var_2286_cast_fp16)[name = string("op_2288_cast_fp16")];
	string input_117_pad_type_0 = const()[name = string("input_117_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> input_117_strides_0 = const()[name = string("input_117_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> input_117_pad_0 = const()[name = string("input_117_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> input_117_dilations_0 = const()[name = string("input_117_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 input_117_groups_0 = const()[name = string("input_117_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> input_117 = conv(dilations = input_117_dilations_0, groups = input_117_groups_0, pad = input_117_pad_0, pad_type = input_117_pad_type_0, strides = input_117_strides_0, weight = layers_7_feed_forward_w1_weight, x = var_2288_cast_fp16)[name = string("input_117")];
	string b_15_pad_type_0 = const()[name = string("b_15_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> b_15_strides_0 = const()[name = string("b_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> b_15_pad_0 = const()[name = string("b_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> b_15_dilations_0 = const()[name = string("b_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 b_15_groups_0 = const()[name = string("b_15_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> b_15 = conv(dilations = b_15_dilations_0, groups = b_15_groups_0, pad = b_15_pad_0, pad_type = b_15_pad_type_0, strides = b_15_strides_0, weight = layers_7_feed_forward_w3_weight, x = var_2288_cast_fp16)[name = string("b_15")];
	tensor<fp16, [1, 4608, 1, 1]> var_2316 = silu(x = input_117)[name = string("op_2316")];
	tensor<fp16, [1, 4608, 1, 1]> input_119 = mul(x = var_2316, y = b_15)[name = string("input_119")];
	string mlp_29_pad_type_0 = const()[name = string("mlp_29_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_29_strides_0 = const()[name = string("mlp_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_29_pad_0 = const()[name = string("mlp_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_29_dilations_0 = const()[name = string("mlp_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_29_groups_0 = const()[name = string("mlp_29_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> mlp_29 = conv(dilations = mlp_29_dilations_0, groups = mlp_29_groups_0, pad = mlp_29_pad_0, pad_type = mlp_29_pad_type_0, strides = mlp_29_strides_0, weight = layers_7_feed_forward_w2_weight, x = input_119)[name = string("mlp_29")];
	tensor<int32, [1]> var_2330_axes_0 = const()[name = string("op_2330_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_2330 = squeeze(axes = var_2330_axes_0, x = mlp_29)[name = string("op_2330")];
	tensor<int32, [3]> var_2334 = const()[name = string("op_2334"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> mlp_31 = transpose(perm = var_2334, x = var_2330)[name = string("transpose_108")];
	tensor<fp16, [1, 1, 1024]> x_41_cast_fp16 = add(x = x_39_cast_fp16, y = mlp_31)[name = string("x_41_cast_fp16")];
	fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_2338_cast_fp16 = mul(x = x_41_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_2338_cast_fp16")];
	int32 var_2340 = const()[name = string("op_2340"), val = int32(-1)];
	bool input_121_interleave_0 = const()[name = string("input_121_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_121_cast_fp16 = concat(axis = var_2340, interleave = input_121_interleave_0, values = (x_41_cast_fp16, var_2338_cast_fp16))[name = string("input_121_cast_fp16")];
	tensor<int32, [1]> normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2346_to_fp16 = const()[name = string("op_2346_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_2346_to_fp16, x = input_121_cast_fp16)[name = string("normed_57_cast_fp16")];
	tensor<int32, [2]> var_2349_split_sizes_0 = const()[name = string("op_2349_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_2349_axis_0 = const()[name = string("op_2349_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_2349_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_2349_cast_fp16_1 = split(axis = var_2349_axis_0, split_sizes = var_2349_split_sizes_0, x = normed_57_cast_fp16)[name = string("op_2349_cast_fp16")];
	tensor<fp16, [1024]> layers_8_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_8_operator_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(827405504)))];
	tensor<fp16, [1, 1, 1024]> hidden_states_17_cast_fp16 = mul(x = var_2349_cast_fp16_0, y = layers_8_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_17_cast_fp16")];
	tensor<int32, [3]> var_2355 = const()[name = string("op_2355"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_2358_axes_0 = const()[name = string("op_2358_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_2356_cast_fp16 = transpose(perm = var_2355, x = hidden_states_17_cast_fp16)[name = string("transpose_107")];
	tensor<fp16, [1, 1024, 1, 1]> var_2358_cast_fp16 = expand_dims(axes = var_2358_axes_0, x = var_2356_cast_fp16)[name = string("op_2358_cast_fp16")];
	string var_2374_pad_type_0 = const()[name = string("op_2374_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_2374_strides_0 = const()[name = string("op_2374_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_2374_pad_0 = const()[name = string("op_2374_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_2374_dilations_0 = const()[name = string("op_2374_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_2374_groups_0 = const()[name = string("op_2374_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> var_2374 = conv(dilations = var_2374_dilations_0, groups = var_2374_groups_0, pad = var_2374_pad_0, pad_type = var_2374_pad_type_0, strides = var_2374_strides_0, weight = layers_8_self_attn_q_proj_weight, x = var_2358_cast_fp16)[name = string("op_2374")];
	tensor<int32, [4]> var_2379 = const()[name = string("op_2379"), val = tensor<int32, [4]>([1, 16, 64, 1])];
	tensor<fp16, [1, 16, 64, 1]> var_2380 = reshape(shape = var_2379, x = var_2374)[name = string("op_2380")];
	tensor<int32, [4]> var_2385 = const()[name = string("op_2385"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	string var_2402_pad_type_0 = const()[name = string("op_2402_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_2402_strides_0 = const()[name = string("op_2402_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_2402_pad_0 = const()[name = string("op_2402_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_2402_dilations_0 = const()[name = string("op_2402_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_2402_groups_0 = const()[name = string("op_2402_groups_0"), val = int32(1)];
	tensor<fp16, [1, 512, 1, 1]> var_2402 = conv(dilations = var_2402_dilations_0, groups = var_2402_groups_0, pad = var_2402_pad_0, pad_type = var_2402_pad_type_0, strides = var_2402_strides_0, weight = layers_8_self_attn_k_proj_weight, x = var_2358_cast_fp16)[name = string("op_2402")];
	tensor<int32, [4]> var_2407 = const()[name = string("op_2407"), val = tensor<int32, [4]>([1, 8, 64, 1])];
	tensor<fp16, [1, 8, 64, 1]> var_2408 = reshape(shape = var_2407, x = var_2402)[name = string("op_2408")];
	tensor<int32, [4]> var_2413 = const()[name = string("op_2413"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	string var_2430_pad_type_0 = const()[name = string("op_2430_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_2430_strides_0 = const()[name = string("op_2430_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_2430_pad_0 = const()[name = string("op_2430_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_2430_dilations_0 = const()[name = string("op_2430_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_2430_groups_0 = const()[name = string("op_2430_groups_0"), val = int32(1)];
	tensor<fp16, [1, 512, 1, 1]> var_2430 = conv(dilations = var_2430_dilations_0, groups = var_2430_groups_0, pad = var_2430_pad_0, pad_type = var_2430_pad_type_0, strides = var_2430_strides_0, weight = layers_8_self_attn_v_proj_weight, x = var_2358_cast_fp16)[name = string("op_2430")];
	tensor<int32, [4]> var_2435 = const()[name = string("op_2435"), val = tensor<int32, [4]>([1, 8, 64, 1])];
	tensor<fp16, [1, 8, 64, 1]> var_2436 = reshape(shape = var_2435, x = var_2430)[name = string("op_2436")];
	tensor<int32, [4]> var_2441 = const()[name = string("op_2441"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	fp16 const_25_promoted = const()[name = string("const_25_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 16, 1, 64]> var_2386 = transpose(perm = var_2385, x = var_2380)[name = string("transpose_106")];
	tensor<fp16, [1, 16, 1, 64]> var_2448 = mul(x = var_2386, y = const_25_promoted)[name = string("op_2448")];
	int32 var_2450 = const()[name = string("op_2450"), val = int32(-1)];
	bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 16, 1, 128]> input_125 = concat(axis = var_2450, interleave = input_125_interleave_0, values = (var_2386, var_2448))[name = string("input_125")];
	tensor<int32, [1]> normed_59_axes_0 = const()[name = string("normed_59_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2456_to_fp16 = const()[name = string("op_2456_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 16, 1, 128]> normed_59_cast_fp16 = layer_norm(axes = normed_59_axes_0, epsilon = var_2456_to_fp16, x = input_125)[name = string("normed_59_cast_fp16")];
	tensor<int32, [2]> var_2459_split_sizes_0 = const()[name = string("op_2459_split_sizes_0"), val = tensor<int32, [2]>([64, 64])];
	int32 var_2459_axis_0 = const()[name = string("op_2459_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 16, 1, 64]> var_2459_0, tensor<fp16, [1, 16, 1, 64]> var_2459_1 = split(axis = var_2459_axis_0, split_sizes = var_2459_split_sizes_0, x = normed_59_cast_fp16)[name = string("op_2459")];
	tensor<fp16, [1, 16, 1, 64]> q_9 = mul(x = var_2459_0, y = layers_8_self_attn_q_layernorm_weight)[name = string("q_9")];
	fp16 const_26_promoted = const()[name = string("const_26_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 64]> var_2414 = transpose(perm = var_2413, x = var_2408)[name = string("transpose_105")];
	tensor<fp16, [1, 8, 1, 64]> var_2462 = mul(x = var_2414, y = const_26_promoted)[name = string("op_2462")];
	int32 var_2464 = const()[name = string("op_2464"), val = int32(-1)];
	bool input_127_interleave_0 = const()[name = string("input_127_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 128]> input_127 = concat(axis = var_2464, interleave = input_127_interleave_0, values = (var_2414, var_2462))[name = string("input_127")];
	tensor<int32, [1]> normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2470_to_fp16 = const()[name = string("op_2470_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 8, 1, 128]> normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_2470_to_fp16, x = input_127)[name = string("normed_61_cast_fp16")];
	tensor<int32, [2]> var_2473_split_sizes_0 = const()[name = string("op_2473_split_sizes_0"), val = tensor<int32, [2]>([64, 64])];
	int32 var_2473_axis_0 = const()[name = string("op_2473_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 64]> var_2473_0, tensor<fp16, [1, 8, 1, 64]> var_2473_1 = split(axis = var_2473_axis_0, split_sizes = var_2473_split_sizes_0, x = normed_61_cast_fp16)[name = string("op_2473")];
	tensor<fp16, [1, 8, 1, 64]> k_9 = mul(x = var_2473_0, y = layers_8_self_attn_k_layernorm_weight)[name = string("k_9")];
	tensor<fp16, [1, 16, 1, 64]> var_2476 = mul(x = q_9, y = cos)[name = string("op_2476")];
	tensor<int32, [2]> var_2477_split_sizes_0 = const()[name = string("op_2477_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
	int32 var_2477_axis_0 = const()[name = string("op_2477_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 16, 1, 32]> var_2477_0, tensor<fp16, [1, 16, 1, 32]> var_2477_1 = split(axis = var_2477_axis_0, split_sizes = var_2477_split_sizes_0, x = q_9)[name = string("op_2477")];
	fp16 const_27_promoted = const()[name = string("const_27_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 16, 1, 32]> var_2479 = mul(x = var_2477_1, y = const_27_promoted)[name = string("op_2479")];
	int32 var_2481 = const()[name = string("op_2481"), val = int32(-1)];
	bool var_2482_interleave_0 = const()[name = string("op_2482_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 16, 1, 64]> var_2482 = concat(axis = var_2481, interleave = var_2482_interleave_0, values = (var_2479, var_2477_0))[name = string("op_2482")];
	tensor<fp16, [1, 16, 1, 64]> var_2483 = mul(x = var_2482, y = sin)[name = string("op_2483")];
	tensor<fp16, [1, 16, 1, 64]> q_11 = add(x = var_2476, y = var_2483)[name = string("q_11")];
	tensor<fp16, [1, 8, 1, 64]> var_2486 = mul(x = k_9, y = cos)[name = string("op_2486")];
	tensor<int32, [2]> var_2487_split_sizes_0 = const()[name = string("op_2487_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
	int32 var_2487_axis_0 = const()[name = string("op_2487_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 32]> var_2487_0, tensor<fp16, [1, 8, 1, 32]> var_2487_1 = split(axis = var_2487_axis_0, split_sizes = var_2487_split_sizes_0, x = k_9)[name = string("op_2487")];
	fp16 const_28_promoted = const()[name = string("const_28_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 32]> var_2489 = mul(x = var_2487_1, y = const_28_promoted)[name = string("op_2489")];
	int32 var_2491 = const()[name = string("op_2491"), val = int32(-1)];
	bool var_2492_interleave_0 = const()[name = string("op_2492_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 64]> var_2492 = concat(axis = var_2491, interleave = var_2492_interleave_0, values = (var_2489, var_2487_0))[name = string("op_2492")];
	tensor<fp16, [1, 8, 1, 64]> var_2493 = mul(x = var_2492, y = sin)[name = string("op_2493")];
	tensor<fp16, [1, 8, 1, 64]> k_11 = add(x = var_2486, y = var_2493)[name = string("k_11")];
	tensor<int32, [4]> var_2498_begin_0 = const()[name = string("op_2498_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
	tensor<int32, [4]> var_2498_end_0 = const()[name = string("op_2498_end_0"), val = tensor<int32, [4]>([3, 8, 2048, 64])];
	tensor<bool, [4]> var_2498_end_mask_0 = const()[name = string("op_2498_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> var_2498_squeeze_mask_0 = const()[name = string("op_2498_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [8, 2048, 64]> var_2498_cast_fp16 = slice_by_index(begin = var_2498_begin_0, end = var_2498_end_0, end_mask = var_2498_end_mask_0, squeeze_mask = var_2498_squeeze_mask_0, x = coreml_update_state_15)[name = string("op_2498_cast_fp16")];
	tensor<int32, [1]> K_cache_5_axes_0 = const()[name = string("K_cache_5_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 8, 2048, 64]> K_cache_5_cast_fp16 = expand_dims(axes = K_cache_5_axes_0, x = var_2498_cast_fp16)[name = string("K_cache_5_cast_fp16")];
	tensor<int32, [4]> var_2503_begin_0 = const()[name = string("op_2503_begin_0"), val = tensor<int32, [4]>([8, 0, 0, 0])];
	tensor<int32, [4]> var_2503_end_0 = const()[name = string("op_2503_end_0"), val = tensor<int32, [4]>([9, 8, 2048, 64])];
	tensor<bool, [4]> var_2503_end_mask_0 = const()[name = string("op_2503_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> var_2503_squeeze_mask_0 = const()[name = string("op_2503_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [8, 2048, 64]> var_2503_cast_fp16 = slice_by_index(begin = var_2503_begin_0, end = var_2503_end_0, end_mask = var_2503_end_mask_0, squeeze_mask = var_2503_squeeze_mask_0, x = coreml_update_state_15)[name = string("op_2503_cast_fp16")];
	tensor<int32, [1]> V_cache_5_axes_0 = const()[name = string("V_cache_5_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 8, 2048, 64]> V_cache_5_cast_fp16 = expand_dims(axes = V_cache_5_axes_0, x = var_2503_cast_fp16)[name = string("V_cache_5_cast_fp16")];
	tensor<int32, [4]> k_b_5_reps_0 = const()[name = string("k_b_5_reps_0"), val = tensor<int32, [4]>([1, 1, 2048, 1])];
	tensor<fp16, [1, 8, 2048, 64]> k_b_5 = tile(reps = k_b_5_reps_0, x = k_11)[name = string("k_b_5")];
	tensor<int32, [4]> v_b_5_reps_0 = const()[name = string("v_b_5_reps_0"), val = tensor<int32, [4]>([1, 1, 2048, 1])];
	tensor<fp16, [1, 8, 1, 64]> var_2442 = transpose(perm = var_2441, x = var_2436)[name = string("transpose_104")];
	tensor<fp16, [1, 8, 2048, 64]> v_b_5 = tile(reps = v_b_5_reps_0, x = var_2442)[name = string("v_b_5")];
	tensor<fp16, [1, 8, 2048, 64]> var_2511_cast_fp16 = mul(x = K_cache_5_cast_fp16, y = var_1132_cast_fp16)[name = string("op_2511_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> var_2512_cast_fp16 = mul(x = k_b_5, y = update_mask)[name = string("op_2512_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> K_new_5_cast_fp16 = add(x = var_2511_cast_fp16, y = var_2512_cast_fp16)[name = string("K_new_5_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> var_2518_cast_fp16 = mul(x = V_cache_5_cast_fp16, y = var_1132_cast_fp16)[name = string("op_2518_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> var_2519_cast_fp16 = mul(x = v_b_5, y = update_mask)[name = string("op_2519_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> V_new_5_cast_fp16 = add(x = var_2518_cast_fp16, y = var_2519_cast_fp16)[name = string("V_new_5_cast_fp16")];
	tensor<int32, [1]> var_2523_axes_0 = const()[name = string("op_2523_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [8, 2048, 64]> var_2523_cast_fp16 = squeeze(axes = var_2523_axes_0, x = K_new_5_cast_fp16)[name = string("op_2523_cast_fp16")];
	tensor<int32, [4]> concat_16 = const()[name = string("concat_16"), val = tensor<int32, [4]>([2, 0, 0, 0])];
	tensor<int32, [4]> concat_17 = const()[name = string("concat_17"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [4]> kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [12, 8, 2048, 64]> kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_16, begin_mask = kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_17, end_mask = kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_5_stride_0, update = var_2523_cast_fp16, x = coreml_update_state_15)[name = string("kv_cache_0_internal_tensor_assign_5_cast_fp16")];
	write_state(data = kv_cache_0_internal_tensor_assign_5_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_16_write_state")];
	tensor<fp16, [12, 8, 2048, 64]> coreml_update_state_16 = read_state(input = kv_cache_0)[name = string("coreml_update_state_16")];
	tensor<int32, [1]> var_2530_axes_0 = const()[name = string("op_2530_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [8, 2048, 64]> var_2530_cast_fp16 = squeeze(axes = var_2530_axes_0, x = V_new_5_cast_fp16)[name = string("op_2530_cast_fp16")];
	tensor<int32, [4]> concat_18 = const()[name = string("concat_18"), val = tensor<int32, [4]>([8, 0, 0, 0])];
	tensor<int32, [4]> concat_19 = const()[name = string("concat_19"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [4]> kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [12, 8, 2048, 64]> kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_18, begin_mask = kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_19, end_mask = kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_6_stride_0, update = var_2530_cast_fp16, x = coreml_update_state_16)[name = string("kv_cache_0_internal_tensor_assign_6_cast_fp16")];
	write_state(data = kv_cache_0_internal_tensor_assign_6_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_17_write_state")];
	tensor<fp16, [12, 8, 2048, 64]> coreml_update_state_17 = read_state(input = kv_cache_0)[name = string("coreml_update_state_17")];
	tensor<int32, [4]> transpose_8_perm_0 = const()[name = string("transpose_8_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	tensor<int32, [4]> tile_4_reps_0 = const()[name = string("tile_4_reps_0"), val = tensor<int32, [4]>([2, 1, 1, 1])];
	tensor<fp16, [8, 1, 2048, 64]> transpose_8_cast_fp16 = transpose(perm = transpose_8_perm_0, x = K_new_5_cast_fp16)[name = string("transpose_103")];
	tensor<fp16, [16, 1, 2048, 64]> tile_4_cast_fp16 = tile(reps = tile_4_reps_0, x = transpose_8_cast_fp16)[name = string("tile_4_cast_fp16")];
	tensor<int32, [5]> concat_20 = const()[name = string("concat_20"), val = tensor<int32, [5]>([2, 8, 1, 2048, 64])];
	tensor<fp16, [2, 8, 1, 2048, 64]> reshape_8_cast_fp16 = reshape(shape = concat_20, x = tile_4_cast_fp16)[name = string("reshape_8_cast_fp16")];
	tensor<int32, [5]> transpose_9_perm_0 = const()[name = string("transpose_9_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])];
	tensor<int32, [4]> concat_21 = const()[name = string("concat_21"), val = tensor<int32, [4]>([-1, 1, 2048, 64])];
	tensor<fp16, [8, 2, 1, 2048, 64]> transpose_9_cast_fp16 = transpose(perm = transpose_9_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_102")];
	tensor<fp16, [16, 1, 2048, 64]> reshape_9_cast_fp16 = reshape(shape = concat_21, x = transpose_9_cast_fp16)[name = string("reshape_9_cast_fp16")];
	tensor<int32, [4]> transpose_26_perm_0 = const()[name = string("transpose_26_perm_0"), val = tensor<int32, [4]>([1, 0, 3, 2])];
	tensor<int32, [4]> transpose_10_perm_0 = const()[name = string("transpose_10_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	tensor<int32, [4]> tile_5_reps_0 = const()[name = string("tile_5_reps_0"), val = tensor<int32, [4]>([2, 1, 1, 1])];
	tensor<fp16, [8, 1, 2048, 64]> transpose_10_cast_fp16 = transpose(perm = transpose_10_perm_0, x = V_new_5_cast_fp16)[name = string("transpose_100")];
	tensor<fp16, [16, 1, 2048, 64]> tile_5_cast_fp16 = tile(reps = tile_5_reps_0, x = transpose_10_cast_fp16)[name = string("tile_5_cast_fp16")];
	tensor<int32, [5]> concat_22 = const()[name = string("concat_22"), val = tensor<int32, [5]>([2, 8, 1, 2048, 64])];
	tensor<fp16, [2, 8, 1, 2048, 64]> reshape_10_cast_fp16 = reshape(shape = concat_22, x = tile_5_cast_fp16)[name = string("reshape_10_cast_fp16")];
	tensor<int32, [5]> transpose_11_perm_0 = const()[name = string("transpose_11_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])];
	tensor<int32, [4]> concat_23 = const()[name = string("concat_23"), val = tensor<int32, [4]>([-1, 1, 2048, 64])];
	tensor<fp16, [8, 2, 1, 2048, 64]> transpose_11_cast_fp16 = transpose(perm = transpose_11_perm_0, x = reshape_10_cast_fp16)[name = string("transpose_99")];
	tensor<fp16, [16, 1, 2048, 64]> reshape_11_cast_fp16 = reshape(shape = concat_23, x = transpose_11_cast_fp16)[name = string("reshape_11_cast_fp16")];
	tensor<int32, [4]> V_e_5_perm_0 = const()[name = string("V_e_5_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	bool var_2557_transpose_x_0 = const()[name = string("op_2557_transpose_x_0"), val = bool(false)];
	bool var_2557_transpose_y_0 = const()[name = string("op_2557_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 16, 64, 2048]> transpose_26_cast_fp16 = transpose(perm = transpose_26_perm_0, x = reshape_9_cast_fp16)[name = string("transpose_101")];
	tensor<fp16, [1, 16, 1, 2048]> var_2557_cast_fp16 = matmul(transpose_x = var_2557_transpose_x_0, transpose_y = var_2557_transpose_y_0, x = q_11, y = transpose_26_cast_fp16)[name = string("op_2557_cast_fp16")];
	fp16 var_2558_to_fp16 = const()[name = string("op_2558_to_fp16"), val = fp16(0x1p-3)];
	tensor<fp16, [1, 16, 1, 2048]> attn_13_cast_fp16 = mul(x = var_2557_cast_fp16, y = var_2558_to_fp16)[name = string("attn_13_cast_fp16")];
	tensor<fp16, [1, 16, 1, 2048]> attn_15_cast_fp16 = add(x = attn_13_cast_fp16, y = causal_mask)[name = string("attn_15_cast_fp16")];
	int32 var_2567 = const()[name = string("op_2567"), val = int32(-1)];
	tensor<fp16, [1, 16, 1, 2048]> var_2569_cast_fp16 = softmax(axis = var_2567, x = attn_15_cast_fp16)[name = string("op_2569_cast_fp16")];
	bool var_2585_transpose_x_0 = const()[name = string("op_2585_transpose_x_0"), val = bool(false)];
	bool var_2585_transpose_y_0 = const()[name = string("op_2585_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 16, 2048, 64]> V_e_5_cast_fp16 = transpose(perm = V_e_5_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_98")];
	tensor<fp16, [1, 16, 1, 64]> var_2585_cast_fp16 = matmul(transpose_x = var_2585_transpose_x_0, transpose_y = var_2585_transpose_y_0, x = var_2569_cast_fp16, y = V_e_5_cast_fp16)[name = string("op_2585_cast_fp16")];
	tensor<int32, [4]> var_2595 = const()[name = string("op_2595"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_2602 = const()[name = string("op_2602"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 16, 64]> var_2596 = transpose(perm = var_2595, x = var_2585_cast_fp16)[name = string("transpose_97")];
	tensor<fp16, [1, 1, 1024]> out_11 = reshape(shape = var_2602, x = var_2596)[name = string("out_11")];
	tensor<int32, [3]> var_2607 = const()[name = string("op_2607"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1024, 1024, 1]> squeeze_2 = const()[name = string("squeeze_2"), val = tensor<fp16, [1024, 1024, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(827407616)))];
	string var_2623_pad_type_0 = const()[name = string("op_2623_pad_type_0"), val = string("valid")];
	int32 var_2623_groups_0 = const()[name = string("op_2623_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_2623_strides_0 = const()[name = string("op_2623_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_2623_pad_0 = const()[name = string("op_2623_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_2623_dilations_0 = const()[name = string("op_2623_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1, 1024, 1]> var_2608 = transpose(perm = var_2607, x = out_11)[name = string("transpose_96")];
	tensor<fp16, [1, 1024, 1]> var_2623 = conv(dilations = var_2623_dilations_0, groups = var_2623_groups_0, pad = var_2623_pad_0, pad_type = var_2623_pad_type_0, strides = var_2623_strides_0, weight = squeeze_2, x = var_2608)[name = string("op_2623")];
	tensor<int32, [3]> var_2627 = const()[name = string("op_2627"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> op_out_17 = transpose(perm = var_2627, x = var_2623)[name = string("transpose_95")];
	tensor<fp16, [1, 1, 1024]> x_47_cast_fp16 = add(x = x_41_cast_fp16, y = op_out_17)[name = string("x_47_cast_fp16")];
	fp16 const_29_promoted_to_fp16 = const()[name = string("const_29_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_2631_cast_fp16 = mul(x = x_47_cast_fp16, y = const_29_promoted_to_fp16)[name = string("op_2631_cast_fp16")];
	int32 var_2633 = const()[name = string("op_2633"), val = int32(-1)];
	bool input_131_interleave_0 = const()[name = string("input_131_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_131_cast_fp16 = concat(axis = var_2633, interleave = input_131_interleave_0, values = (x_47_cast_fp16, var_2631_cast_fp16))[name = string("input_131_cast_fp16")];
	tensor<int32, [1]> normed_63_axes_0 = const()[name = string("normed_63_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2639_to_fp16 = const()[name = string("op_2639_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_63_cast_fp16 = layer_norm(axes = normed_63_axes_0, epsilon = var_2639_to_fp16, x = input_131_cast_fp16)[name = string("normed_63_cast_fp16")];
	tensor<int32, [2]> var_2642_split_sizes_0 = const()[name = string("op_2642_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_2642_axis_0 = const()[name = string("op_2642_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_2642_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_2642_cast_fp16_1 = split(axis = var_2642_axis_0, split_sizes = var_2642_split_sizes_0, x = normed_63_cast_fp16)[name = string("op_2642_cast_fp16")];
	tensor<fp16, [1024]> layers_8_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_8_ffn_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(829504832)))];
	tensor<fp16, [1, 1, 1024]> normed_65_cast_fp16 = mul(x = var_2642_cast_fp16_0, y = layers_8_ffn_norm_weight_promoted_to_fp16)[name = string("normed_65_cast_fp16")];
	tensor<int32, [3]> var_2648 = const()[name = string("op_2648"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_2651_axes_0 = const()[name = string("op_2651_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_2649_cast_fp16 = transpose(perm = var_2648, x = normed_65_cast_fp16)[name = string("transpose_94")];
	tensor<fp16, [1, 1024, 1, 1]> var_2651_cast_fp16 = expand_dims(axes = var_2651_axes_0, x = var_2649_cast_fp16)[name = string("op_2651_cast_fp16")];
	string input_135_pad_type_0 = const()[name = string("input_135_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> input_135_strides_0 = const()[name = string("input_135_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> input_135_pad_0 = const()[name = string("input_135_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> input_135_dilations_0 = const()[name = string("input_135_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 input_135_groups_0 = const()[name = string("input_135_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> input_135 = conv(dilations = input_135_dilations_0, groups = input_135_groups_0, pad = input_135_pad_0, pad_type = input_135_pad_type_0, strides = input_135_strides_0, weight = layers_8_feed_forward_w1_weight, x = var_2651_cast_fp16)[name = string("input_135")];
	string b_17_pad_type_0 = const()[name = string("b_17_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> b_17_strides_0 = const()[name = string("b_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> b_17_pad_0 = const()[name = string("b_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> b_17_dilations_0 = const()[name = string("b_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 b_17_groups_0 = const()[name = string("b_17_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> b_17 = conv(dilations = b_17_dilations_0, groups = b_17_groups_0, pad = b_17_pad_0, pad_type = b_17_pad_type_0, strides = b_17_strides_0, weight = layers_8_feed_forward_w3_weight, x = var_2651_cast_fp16)[name = string("b_17")];
	tensor<fp16, [1, 4608, 1, 1]> var_2679 = silu(x = input_135)[name = string("op_2679")];
	tensor<fp16, [1, 4608, 1, 1]> input_137 = mul(x = var_2679, y = b_17)[name = string("input_137")];
	string mlp_33_pad_type_0 = const()[name = string("mlp_33_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_33_strides_0 = const()[name = string("mlp_33_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_33_pad_0 = const()[name = string("mlp_33_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_33_dilations_0 = const()[name = string("mlp_33_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_33_groups_0 = const()[name = string("mlp_33_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> mlp_33 = conv(dilations = mlp_33_dilations_0, groups = mlp_33_groups_0, pad = mlp_33_pad_0, pad_type = mlp_33_pad_type_0, strides = mlp_33_strides_0, weight = layers_8_feed_forward_w2_weight, x = input_137)[name = string("mlp_33")];
	tensor<int32, [1]> var_2693_axes_0 = const()[name = string("op_2693_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_2693 = squeeze(axes = var_2693_axes_0, x = mlp_33)[name = string("op_2693")];
	tensor<int32, [3]> var_2697 = const()[name = string("op_2697"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> mlp_35 = transpose(perm = var_2697, x = var_2693)[name = string("transpose_93")];
	tensor<fp16, [1, 1, 1024]> x_49_cast_fp16 = add(x = x_47_cast_fp16, y = mlp_35)[name = string("x_49_cast_fp16")];
	fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_2701_cast_fp16 = mul(x = x_49_cast_fp16, y = const_30_promoted_to_fp16)[name = string("op_2701_cast_fp16")];
	int32 var_2703 = const()[name = string("op_2703"), val = int32(-1)];
	bool input_139_interleave_0 = const()[name = string("input_139_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_139_cast_fp16 = concat(axis = var_2703, interleave = input_139_interleave_0, values = (x_49_cast_fp16, var_2701_cast_fp16))[name = string("input_139_cast_fp16")];
	tensor<int32, [1]> normed_67_axes_0 = const()[name = string("normed_67_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2709_to_fp16 = const()[name = string("op_2709_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_67_cast_fp16 = layer_norm(axes = normed_67_axes_0, epsilon = var_2709_to_fp16, x = input_139_cast_fp16)[name = string("normed_67_cast_fp16")];
	tensor<int32, [2]> var_2712_split_sizes_0 = const()[name = string("op_2712_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_2712_axis_0 = const()[name = string("op_2712_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_2712_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_2712_cast_fp16_1 = split(axis = var_2712_axis_0, split_sizes = var_2712_split_sizes_0, x = normed_67_cast_fp16)[name = string("op_2712_cast_fp16")];
	tensor<fp16, [1024]> layers_9_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_9_operator_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(829506944)))];
	tensor<fp16, [1, 1, 1024]> hidden_states_19_cast_fp16 = mul(x = var_2712_cast_fp16_0, y = layers_9_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_19_cast_fp16")];
	tensor<int32, [3]> var_2718 = const()[name = string("op_2718"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_2721_axes_0 = const()[name = string("op_2721_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_2719_cast_fp16 = transpose(perm = var_2718, x = hidden_states_19_cast_fp16)[name = string("transpose_92")];
	tensor<fp16, [1, 1024, 1, 1]> var_2721_cast_fp16 = expand_dims(axes = var_2721_axes_0, x = var_2719_cast_fp16)[name = string("op_2721_cast_fp16")];
	string BCx_13_pad_type_0 = const()[name = string("BCx_13_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> BCx_13_strides_0 = const()[name = string("BCx_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> BCx_13_pad_0 = const()[name = string("BCx_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> BCx_13_dilations_0 = const()[name = string("BCx_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 BCx_13_groups_0 = const()[name = string("BCx_13_groups_0"), val = int32(1)];
	tensor<fp16, [1, 3072, 1, 1]> BCx_13 = conv(dilations = BCx_13_dilations_0, groups = BCx_13_groups_0, pad = BCx_13_pad_0, pad_type = BCx_13_pad_type_0, strides = BCx_13_strides_0, weight = layers_9_conv_in_proj_weight, x = var_2721_cast_fp16)[name = string("BCx_13")];
	tensor<int32, [3]> var_2738_split_sizes_0 = const()[name = string("op_2738_split_sizes_0"), val = tensor<int32, [3]>([1024, 1024, 1024])];
	int32 var_2738_axis_0 = const()[name = string("op_2738_axis_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> var_2738_0, tensor<fp16, [1, 1024, 1, 1]> var_2738_1, tensor<fp16, [1, 1024, 1, 1]> var_2738_2 = split(axis = var_2738_axis_0, split_sizes = var_2738_split_sizes_0, x = BCx_13)[name = string("op_2738")];
	tensor<fp16, [1, 1024, 1, 1]> Bx_13 = mul(x = var_2738_0, y = var_2738_2)[name = string("Bx_13")];
	tensor<int32, [3]> var_2744_begin_0 = const()[name = string("op_2744_begin_0"), val = tensor<int32, [3]>([6, 0, 0])];
	tensor<int32, [3]> var_2744_end_0 = const()[name = string("op_2744_end_0"), val = tensor<int32, [3]>([7, 1024, 3])];
	tensor<bool, [3]> var_2744_end_mask_0 = const()[name = string("op_2744_end_mask_0"), val = tensor<bool, [3]>([false, true, true])];
	tensor<bool, [3]> var_2744_squeeze_mask_0 = const()[name = string("op_2744_squeeze_mask_0"), val = tensor<bool, [3]>([true, false, false])];
	tensor<fp16, [1024, 3]> var_2744_cast_fp16 = slice_by_index(begin = var_2744_begin_0, end = var_2744_end_0, end_mask = var_2744_end_mask_0, squeeze_mask = var_2744_squeeze_mask_0, x = conv_state_in)[name = string("op_2744_cast_fp16")];
	tensor<int32, [1]> var_2746_axes_0 = const()[name = string("op_2746_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 1024, 3]> var_2746_cast_fp16 = expand_dims(axes = var_2746_axes_0, x = var_2744_cast_fp16)[name = string("op_2746_cast_fp16")];
	tensor<int32, [1]> slot_13_axes_0 = const()[name = string("slot_13_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1, 3]> slot_13_cast_fp16 = expand_dims(axes = slot_13_axes_0, x = var_2746_cast_fp16)[name = string("slot_13_cast_fp16")];
	tensor<int32, [4]> live_tail_13_begin_0 = const()[name = string("live_tail_13_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1])];
	tensor<int32, [4]> live_tail_13_end_0 = const()[name = string("live_tail_13_end_0"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
	tensor<bool, [4]> live_tail_13_end_mask_0 = const()[name = string("live_tail_13_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
	tensor<fp16, [1, 1024, 1, 2]> live_tail_13_cast_fp16 = slice_by_index(begin = live_tail_13_begin_0, end = live_tail_13_end_0, end_mask = live_tail_13_end_mask_0, x = slot_13_cast_fp16)[name = string("live_tail_13_cast_fp16")];
	int32 var_2755 = const()[name = string("op_2755"), val = int32(-1)];
	bool new_state_13_interleave_0 = const()[name = string("new_state_13_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1024, 1, 3]> new_state_13_cast_fp16 = concat(axis = var_2755, interleave = new_state_13_interleave_0, values = (live_tail_13_cast_fp16, Bx_13))[name = string("new_state_13_cast_fp16")];
	tensor<int32, [1]> var_2758_axes_0 = const()[name = string("op_2758_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1024, 1, 3]> var_2758_cast_fp16 = squeeze(axes = var_2758_axes_0, x = new_state_13_cast_fp16)[name = string("op_2758_cast_fp16")];
	tensor<int32, [1]> var_2760_axes_0 = const()[name = string("op_2760_axes_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1024, 3]> var_2760_cast_fp16 = squeeze(axes = var_2760_axes_0, x = var_2758_cast_fp16)[name = string("op_2760_cast_fp16")];
	string conv_out_13_pad_type_0 = const()[name = string("conv_out_13_pad_type_0"), val = string("valid")];
	int32 conv_out_13_groups_0 = const()[name = string("conv_out_13_groups_0"), val = int32(1024)];
	tensor<int32, [2]> conv_out_13_strides_0 = const()[name = string("conv_out_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> conv_out_13_pad_0 = const()[name = string("conv_out_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> conv_out_13_dilations_0 = const()[name = string("conv_out_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<fp16, [1024, 1, 1, 3]> layers_9_conv_conv_weight_promoted_to_fp16 = const()[name = string("layers_9_conv_conv_weight_promoted_to_fp16"), val = tensor<fp16, [1024, 1, 1, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(829509056)))];
	tensor<fp16, [1, 1024, 1, 1]> conv_out_13_cast_fp16 = conv(dilations = conv_out_13_dilations_0, groups = conv_out_13_groups_0, pad = conv_out_13_pad_0, pad_type = conv_out_13_pad_type_0, strides = conv_out_13_strides_0, weight = layers_9_conv_conv_weight_promoted_to_fp16, x = new_state_13_cast_fp16)[name = string("conv_out_13_cast_fp16")];
	tensor<fp16, [1, 1024, 1, 1]> input_143_cast_fp16 = mul(x = var_2738_1, y = conv_out_13_cast_fp16)[name = string("input_143_cast_fp16")];
	string y_13_pad_type_0 = const()[name = string("y_13_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> y_13_strides_0 = const()[name = string("y_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> y_13_pad_0 = const()[name = string("y_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> y_13_dilations_0 = const()[name = string("y_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 y_13_groups_0 = const()[name = string("y_13_groups_0"), val = int32(1)];
	tensor<fp16, [1024, 1024, 1, 1]> layers_9_conv_out_proj_weight_promoted_to_fp16 = const()[name = string("layers_9_conv_out_proj_weight_promoted_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(829515264)))];
	tensor<fp16, [1, 1024, 1, 1]> y_13_cast_fp16 = conv(dilations = y_13_dilations_0, groups = y_13_groups_0, pad = y_13_pad_0, pad_type = y_13_pad_type_0, strides = y_13_strides_0, weight = layers_9_conv_out_proj_weight_promoted_to_fp16, x = input_143_cast_fp16)[name = string("y_13_cast_fp16")];
	tensor<int32, [1]> var_2786_axes_0 = const()[name = string("op_2786_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_2786_cast_fp16 = squeeze(axes = var_2786_axes_0, x = y_13_cast_fp16)[name = string("op_2786_cast_fp16")];
	tensor<int32, [3]> var_2790 = const()[name = string("op_2790"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> op_out_19_cast_fp16 = transpose(perm = var_2790, x = var_2786_cast_fp16)[name = string("transpose_91")];
	tensor<fp16, [1, 1, 1024]> x_51_cast_fp16 = add(x = x_49_cast_fp16, y = op_out_19_cast_fp16)[name = string("x_51_cast_fp16")];
	fp16 const_31_promoted_to_fp16 = const()[name = string("const_31_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_2794_cast_fp16 = mul(x = x_51_cast_fp16, y = const_31_promoted_to_fp16)[name = string("op_2794_cast_fp16")];
	int32 var_2796 = const()[name = string("op_2796"), val = int32(-1)];
	bool input_145_interleave_0 = const()[name = string("input_145_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_145_cast_fp16 = concat(axis = var_2796, interleave = input_145_interleave_0, values = (x_51_cast_fp16, var_2794_cast_fp16))[name = string("input_145_cast_fp16")];
	tensor<int32, [1]> normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2802_to_fp16 = const()[name = string("op_2802_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_2802_to_fp16, x = input_145_cast_fp16)[name = string("normed_69_cast_fp16")];
	tensor<int32, [2]> var_2805_split_sizes_0 = const()[name = string("op_2805_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_2805_axis_0 = const()[name = string("op_2805_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_2805_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_2805_cast_fp16_1 = split(axis = var_2805_axis_0, split_sizes = var_2805_split_sizes_0, x = normed_69_cast_fp16)[name = string("op_2805_cast_fp16")];
	tensor<fp16, [1024]> layers_9_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_9_ffn_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(831612480)))];
	tensor<fp16, [1, 1, 1024]> normed_71_cast_fp16 = mul(x = var_2805_cast_fp16_0, y = layers_9_ffn_norm_weight_promoted_to_fp16)[name = string("normed_71_cast_fp16")];
	tensor<int32, [3]> var_2811 = const()[name = string("op_2811"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_2814_axes_0 = const()[name = string("op_2814_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_2812_cast_fp16 = transpose(perm = var_2811, x = normed_71_cast_fp16)[name = string("transpose_90")];
	tensor<fp16, [1, 1024, 1, 1]> var_2814_cast_fp16 = expand_dims(axes = var_2814_axes_0, x = var_2812_cast_fp16)[name = string("op_2814_cast_fp16")];
	string input_149_pad_type_0 = const()[name = string("input_149_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> input_149_strides_0 = const()[name = string("input_149_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> input_149_pad_0 = const()[name = string("input_149_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> input_149_dilations_0 = const()[name = string("input_149_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 input_149_groups_0 = const()[name = string("input_149_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> input_149 = conv(dilations = input_149_dilations_0, groups = input_149_groups_0, pad = input_149_pad_0, pad_type = input_149_pad_type_0, strides = input_149_strides_0, weight = layers_9_feed_forward_w1_weight, x = var_2814_cast_fp16)[name = string("input_149")];
	string b_19_pad_type_0 = const()[name = string("b_19_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> b_19_strides_0 = const()[name = string("b_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> b_19_pad_0 = const()[name = string("b_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> b_19_dilations_0 = const()[name = string("b_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 b_19_groups_0 = const()[name = string("b_19_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> b_19 = conv(dilations = b_19_dilations_0, groups = b_19_groups_0, pad = b_19_pad_0, pad_type = b_19_pad_type_0, strides = b_19_strides_0, weight = layers_9_feed_forward_w3_weight, x = var_2814_cast_fp16)[name = string("b_19")];
	tensor<fp16, [1, 4608, 1, 1]> var_2842 = silu(x = input_149)[name = string("op_2842")];
	tensor<fp16, [1, 4608, 1, 1]> input_151 = mul(x = var_2842, y = b_19)[name = string("input_151")];
	string mlp_37_pad_type_0 = const()[name = string("mlp_37_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_37_strides_0 = const()[name = string("mlp_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_37_pad_0 = const()[name = string("mlp_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_37_dilations_0 = const()[name = string("mlp_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_37_groups_0 = const()[name = string("mlp_37_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> mlp_37 = conv(dilations = mlp_37_dilations_0, groups = mlp_37_groups_0, pad = mlp_37_pad_0, pad_type = mlp_37_pad_type_0, strides = mlp_37_strides_0, weight = layers_9_feed_forward_w2_weight, x = input_151)[name = string("mlp_37")];
	tensor<int32, [1]> var_2856_axes_0 = const()[name = string("op_2856_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_2856 = squeeze(axes = var_2856_axes_0, x = mlp_37)[name = string("op_2856")];
	tensor<int32, [3]> var_2860 = const()[name = string("op_2860"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> mlp_39 = transpose(perm = var_2860, x = var_2856)[name = string("transpose_89")];
	tensor<fp16, [1, 1, 1024]> x_53_cast_fp16 = add(x = x_51_cast_fp16, y = mlp_39)[name = string("x_53_cast_fp16")];
	fp16 const_32_promoted_to_fp16 = const()[name = string("const_32_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_2864_cast_fp16 = mul(x = x_53_cast_fp16, y = const_32_promoted_to_fp16)[name = string("op_2864_cast_fp16")];
	int32 var_2866 = const()[name = string("op_2866"), val = int32(-1)];
	bool input_153_interleave_0 = const()[name = string("input_153_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_153_cast_fp16 = concat(axis = var_2866, interleave = input_153_interleave_0, values = (x_53_cast_fp16, var_2864_cast_fp16))[name = string("input_153_cast_fp16")];
	tensor<int32, [1]> normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2872_to_fp16 = const()[name = string("op_2872_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_2872_to_fp16, x = input_153_cast_fp16)[name = string("normed_73_cast_fp16")];
	tensor<int32, [2]> var_2875_split_sizes_0 = const()[name = string("op_2875_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_2875_axis_0 = const()[name = string("op_2875_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_2875_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_2875_cast_fp16_1 = split(axis = var_2875_axis_0, split_sizes = var_2875_split_sizes_0, x = normed_73_cast_fp16)[name = string("op_2875_cast_fp16")];
	tensor<fp16, [1024]> layers_10_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_10_operator_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(831614592)))];
	tensor<fp16, [1, 1, 1024]> hidden_states_21_cast_fp16 = mul(x = var_2875_cast_fp16_0, y = layers_10_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_21_cast_fp16")];
	tensor<int32, [3]> var_2881 = const()[name = string("op_2881"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_2884_axes_0 = const()[name = string("op_2884_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_2882_cast_fp16 = transpose(perm = var_2881, x = hidden_states_21_cast_fp16)[name = string("transpose_88")];
	tensor<fp16, [1, 1024, 1, 1]> var_2884_cast_fp16 = expand_dims(axes = var_2884_axes_0, x = var_2882_cast_fp16)[name = string("op_2884_cast_fp16")];
	string var_2900_pad_type_0 = const()[name = string("op_2900_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_2900_strides_0 = const()[name = string("op_2900_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_2900_pad_0 = const()[name = string("op_2900_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_2900_dilations_0 = const()[name = string("op_2900_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_2900_groups_0 = const()[name = string("op_2900_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> var_2900 = conv(dilations = var_2900_dilations_0, groups = var_2900_groups_0, pad = var_2900_pad_0, pad_type = var_2900_pad_type_0, strides = var_2900_strides_0, weight = layers_10_self_attn_q_proj_weight, x = var_2884_cast_fp16)[name = string("op_2900")];
	tensor<int32, [4]> var_2905 = const()[name = string("op_2905"), val = tensor<int32, [4]>([1, 16, 64, 1])];
	tensor<fp16, [1, 16, 64, 1]> var_2906 = reshape(shape = var_2905, x = var_2900)[name = string("op_2906")];
	tensor<int32, [4]> var_2911 = const()[name = string("op_2911"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	string var_2928_pad_type_0 = const()[name = string("op_2928_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_2928_strides_0 = const()[name = string("op_2928_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_2928_pad_0 = const()[name = string("op_2928_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_2928_dilations_0 = const()[name = string("op_2928_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_2928_groups_0 = const()[name = string("op_2928_groups_0"), val = int32(1)];
	tensor<fp16, [1, 512, 1, 1]> var_2928 = conv(dilations = var_2928_dilations_0, groups = var_2928_groups_0, pad = var_2928_pad_0, pad_type = var_2928_pad_type_0, strides = var_2928_strides_0, weight = layers_10_self_attn_k_proj_weight, x = var_2884_cast_fp16)[name = string("op_2928")];
	tensor<int32, [4]> var_2933 = const()[name = string("op_2933"), val = tensor<int32, [4]>([1, 8, 64, 1])];
	tensor<fp16, [1, 8, 64, 1]> var_2934 = reshape(shape = var_2933, x = var_2928)[name = string("op_2934")];
	tensor<int32, [4]> var_2939 = const()[name = string("op_2939"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	string var_2956_pad_type_0 = const()[name = string("op_2956_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_2956_strides_0 = const()[name = string("op_2956_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_2956_pad_0 = const()[name = string("op_2956_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_2956_dilations_0 = const()[name = string("op_2956_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_2956_groups_0 = const()[name = string("op_2956_groups_0"), val = int32(1)];
	tensor<fp16, [1, 512, 1, 1]> var_2956 = conv(dilations = var_2956_dilations_0, groups = var_2956_groups_0, pad = var_2956_pad_0, pad_type = var_2956_pad_type_0, strides = var_2956_strides_0, weight = layers_10_self_attn_v_proj_weight, x = var_2884_cast_fp16)[name = string("op_2956")];
	tensor<int32, [4]> var_2961 = const()[name = string("op_2961"), val = tensor<int32, [4]>([1, 8, 64, 1])];
	tensor<fp16, [1, 8, 64, 1]> var_2962 = reshape(shape = var_2961, x = var_2956)[name = string("op_2962")];
	tensor<int32, [4]> var_2967 = const()[name = string("op_2967"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	fp16 const_33_promoted = const()[name = string("const_33_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 16, 1, 64]> var_2912 = transpose(perm = var_2911, x = var_2906)[name = string("transpose_87")];
	tensor<fp16, [1, 16, 1, 64]> var_2974 = mul(x = var_2912, y = const_33_promoted)[name = string("op_2974")];
	int32 var_2976 = const()[name = string("op_2976"), val = int32(-1)];
	bool input_157_interleave_0 = const()[name = string("input_157_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 16, 1, 128]> input_157 = concat(axis = var_2976, interleave = input_157_interleave_0, values = (var_2912, var_2974))[name = string("input_157")];
	tensor<int32, [1]> normed_75_axes_0 = const()[name = string("normed_75_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2982_to_fp16 = const()[name = string("op_2982_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 16, 1, 128]> normed_75_cast_fp16 = layer_norm(axes = normed_75_axes_0, epsilon = var_2982_to_fp16, x = input_157)[name = string("normed_75_cast_fp16")];
	tensor<int32, [2]> var_2985_split_sizes_0 = const()[name = string("op_2985_split_sizes_0"), val = tensor<int32, [2]>([64, 64])];
	int32 var_2985_axis_0 = const()[name = string("op_2985_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 16, 1, 64]> var_2985_0, tensor<fp16, [1, 16, 1, 64]> var_2985_1 = split(axis = var_2985_axis_0, split_sizes = var_2985_split_sizes_0, x = normed_75_cast_fp16)[name = string("op_2985")];
	tensor<fp16, [1, 16, 1, 64]> q_13 = mul(x = var_2985_0, y = layers_10_self_attn_q_layernorm_weight)[name = string("q_13")];
	fp16 const_34_promoted = const()[name = string("const_34_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 64]> var_2940 = transpose(perm = var_2939, x = var_2934)[name = string("transpose_86")];
	tensor<fp16, [1, 8, 1, 64]> var_2988 = mul(x = var_2940, y = const_34_promoted)[name = string("op_2988")];
	int32 var_2990 = const()[name = string("op_2990"), val = int32(-1)];
	bool input_159_interleave_0 = const()[name = string("input_159_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 128]> input_159 = concat(axis = var_2990, interleave = input_159_interleave_0, values = (var_2940, var_2988))[name = string("input_159")];
	tensor<int32, [1]> normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2996_to_fp16 = const()[name = string("op_2996_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 8, 1, 128]> normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_2996_to_fp16, x = input_159)[name = string("normed_77_cast_fp16")];
	tensor<int32, [2]> var_2999_split_sizes_0 = const()[name = string("op_2999_split_sizes_0"), val = tensor<int32, [2]>([64, 64])];
	int32 var_2999_axis_0 = const()[name = string("op_2999_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 64]> var_2999_0, tensor<fp16, [1, 8, 1, 64]> var_2999_1 = split(axis = var_2999_axis_0, split_sizes = var_2999_split_sizes_0, x = normed_77_cast_fp16)[name = string("op_2999")];
	tensor<fp16, [1, 8, 1, 64]> k_13 = mul(x = var_2999_0, y = layers_10_self_attn_k_layernorm_weight)[name = string("k_13")];
	tensor<fp16, [1, 16, 1, 64]> var_3002 = mul(x = q_13, y = cos)[name = string("op_3002")];
	tensor<int32, [2]> var_3003_split_sizes_0 = const()[name = string("op_3003_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
	int32 var_3003_axis_0 = const()[name = string("op_3003_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 16, 1, 32]> var_3003_0, tensor<fp16, [1, 16, 1, 32]> var_3003_1 = split(axis = var_3003_axis_0, split_sizes = var_3003_split_sizes_0, x = q_13)[name = string("op_3003")];
	fp16 const_35_promoted = const()[name = string("const_35_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 16, 1, 32]> var_3005 = mul(x = var_3003_1, y = const_35_promoted)[name = string("op_3005")];
	int32 var_3007 = const()[name = string("op_3007"), val = int32(-1)];
	bool var_3008_interleave_0 = const()[name = string("op_3008_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 16, 1, 64]> var_3008 = concat(axis = var_3007, interleave = var_3008_interleave_0, values = (var_3005, var_3003_0))[name = string("op_3008")];
	tensor<fp16, [1, 16, 1, 64]> var_3009 = mul(x = var_3008, y = sin)[name = string("op_3009")];
	tensor<fp16, [1, 16, 1, 64]> q_15 = add(x = var_3002, y = var_3009)[name = string("q_15")];
	tensor<fp16, [1, 8, 1, 64]> var_3012 = mul(x = k_13, y = cos)[name = string("op_3012")];
	tensor<int32, [2]> var_3013_split_sizes_0 = const()[name = string("op_3013_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
	int32 var_3013_axis_0 = const()[name = string("op_3013_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 32]> var_3013_0, tensor<fp16, [1, 8, 1, 32]> var_3013_1 = split(axis = var_3013_axis_0, split_sizes = var_3013_split_sizes_0, x = k_13)[name = string("op_3013")];
	fp16 const_36_promoted = const()[name = string("const_36_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 32]> var_3015 = mul(x = var_3013_1, y = const_36_promoted)[name = string("op_3015")];
	int32 var_3017 = const()[name = string("op_3017"), val = int32(-1)];
	bool var_3018_interleave_0 = const()[name = string("op_3018_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 64]> var_3018 = concat(axis = var_3017, interleave = var_3018_interleave_0, values = (var_3015, var_3013_0))[name = string("op_3018")];
	tensor<fp16, [1, 8, 1, 64]> var_3019 = mul(x = var_3018, y = sin)[name = string("op_3019")];
	tensor<fp16, [1, 8, 1, 64]> k_15 = add(x = var_3012, y = var_3019)[name = string("k_15")];
	tensor<int32, [4]> var_3024_begin_0 = const()[name = string("op_3024_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
	tensor<int32, [4]> var_3024_end_0 = const()[name = string("op_3024_end_0"), val = tensor<int32, [4]>([4, 8, 2048, 64])];
	tensor<bool, [4]> var_3024_end_mask_0 = const()[name = string("op_3024_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> var_3024_squeeze_mask_0 = const()[name = string("op_3024_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [8, 2048, 64]> var_3024_cast_fp16 = slice_by_index(begin = var_3024_begin_0, end = var_3024_end_0, end_mask = var_3024_end_mask_0, squeeze_mask = var_3024_squeeze_mask_0, x = coreml_update_state_17)[name = string("op_3024_cast_fp16")];
	tensor<int32, [1]> K_cache_7_axes_0 = const()[name = string("K_cache_7_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 8, 2048, 64]> K_cache_7_cast_fp16 = expand_dims(axes = K_cache_7_axes_0, x = var_3024_cast_fp16)[name = string("K_cache_7_cast_fp16")];
	tensor<int32, [4]> var_3029_begin_0 = const()[name = string("op_3029_begin_0"), val = tensor<int32, [4]>([9, 0, 0, 0])];
	tensor<int32, [4]> var_3029_end_0 = const()[name = string("op_3029_end_0"), val = tensor<int32, [4]>([10, 8, 2048, 64])];
	tensor<bool, [4]> var_3029_end_mask_0 = const()[name = string("op_3029_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> var_3029_squeeze_mask_0 = const()[name = string("op_3029_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [8, 2048, 64]> var_3029_cast_fp16 = slice_by_index(begin = var_3029_begin_0, end = var_3029_end_0, end_mask = var_3029_end_mask_0, squeeze_mask = var_3029_squeeze_mask_0, x = coreml_update_state_17)[name = string("op_3029_cast_fp16")];
	tensor<int32, [1]> V_cache_7_axes_0 = const()[name = string("V_cache_7_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 8, 2048, 64]> V_cache_7_cast_fp16 = expand_dims(axes = V_cache_7_axes_0, x = var_3029_cast_fp16)[name = string("V_cache_7_cast_fp16")];
	tensor<int32, [4]> k_b_7_reps_0 = const()[name = string("k_b_7_reps_0"), val = tensor<int32, [4]>([1, 1, 2048, 1])];
	tensor<fp16, [1, 8, 2048, 64]> k_b_7 = tile(reps = k_b_7_reps_0, x = k_15)[name = string("k_b_7")];
	tensor<int32, [4]> v_b_7_reps_0 = const()[name = string("v_b_7_reps_0"), val = tensor<int32, [4]>([1, 1, 2048, 1])];
	tensor<fp16, [1, 8, 1, 64]> var_2968 = transpose(perm = var_2967, x = var_2962)[name = string("transpose_85")];
	tensor<fp16, [1, 8, 2048, 64]> v_b_7 = tile(reps = v_b_7_reps_0, x = var_2968)[name = string("v_b_7")];
	tensor<fp16, [1, 8, 2048, 64]> var_3037_cast_fp16 = mul(x = K_cache_7_cast_fp16, y = var_1132_cast_fp16)[name = string("op_3037_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> var_3038_cast_fp16 = mul(x = k_b_7, y = update_mask)[name = string("op_3038_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> K_new_7_cast_fp16 = add(x = var_3037_cast_fp16, y = var_3038_cast_fp16)[name = string("K_new_7_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> var_3044_cast_fp16 = mul(x = V_cache_7_cast_fp16, y = var_1132_cast_fp16)[name = string("op_3044_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> var_3045_cast_fp16 = mul(x = v_b_7, y = update_mask)[name = string("op_3045_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> V_new_7_cast_fp16 = add(x = var_3044_cast_fp16, y = var_3045_cast_fp16)[name = string("V_new_7_cast_fp16")];
	tensor<int32, [1]> var_3049_axes_0 = const()[name = string("op_3049_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [8, 2048, 64]> var_3049_cast_fp16 = squeeze(axes = var_3049_axes_0, x = K_new_7_cast_fp16)[name = string("op_3049_cast_fp16")];
	tensor<int32, [4]> concat_24 = const()[name = string("concat_24"), val = tensor<int32, [4]>([3, 0, 0, 0])];
	tensor<int32, [4]> concat_25 = const()[name = string("concat_25"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [4]> kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [12, 8, 2048, 64]> kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_24, begin_mask = kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_25, end_mask = kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_7_stride_0, update = var_3049_cast_fp16, x = coreml_update_state_17)[name = string("kv_cache_0_internal_tensor_assign_7_cast_fp16")];
	write_state(data = kv_cache_0_internal_tensor_assign_7_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_18_write_state")];
	tensor<fp16, [12, 8, 2048, 64]> coreml_update_state_18 = read_state(input = kv_cache_0)[name = string("coreml_update_state_18")];
	tensor<int32, [1]> var_3056_axes_0 = const()[name = string("op_3056_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [8, 2048, 64]> var_3056_cast_fp16 = squeeze(axes = var_3056_axes_0, x = V_new_7_cast_fp16)[name = string("op_3056_cast_fp16")];
	tensor<int32, [4]> concat_26 = const()[name = string("concat_26"), val = tensor<int32, [4]>([9, 0, 0, 0])];
	tensor<int32, [4]> concat_27 = const()[name = string("concat_27"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [4]> kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [12, 8, 2048, 64]> kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_26, begin_mask = kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_27, end_mask = kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_8_stride_0, update = var_3056_cast_fp16, x = coreml_update_state_18)[name = string("kv_cache_0_internal_tensor_assign_8_cast_fp16")];
	write_state(data = kv_cache_0_internal_tensor_assign_8_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_19_write_state")];
	tensor<fp16, [12, 8, 2048, 64]> coreml_update_state_19 = read_state(input = kv_cache_0)[name = string("coreml_update_state_19")];
	tensor<int32, [4]> transpose_12_perm_0 = const()[name = string("transpose_12_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	tensor<int32, [4]> tile_6_reps_0 = const()[name = string("tile_6_reps_0"), val = tensor<int32, [4]>([2, 1, 1, 1])];
	tensor<fp16, [8, 1, 2048, 64]> transpose_12_cast_fp16 = transpose(perm = transpose_12_perm_0, x = K_new_7_cast_fp16)[name = string("transpose_84")];
	tensor<fp16, [16, 1, 2048, 64]> tile_6_cast_fp16 = tile(reps = tile_6_reps_0, x = transpose_12_cast_fp16)[name = string("tile_6_cast_fp16")];
	tensor<int32, [5]> concat_28 = const()[name = string("concat_28"), val = tensor<int32, [5]>([2, 8, 1, 2048, 64])];
	tensor<fp16, [2, 8, 1, 2048, 64]> reshape_12_cast_fp16 = reshape(shape = concat_28, x = tile_6_cast_fp16)[name = string("reshape_12_cast_fp16")];
	tensor<int32, [5]> transpose_13_perm_0 = const()[name = string("transpose_13_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])];
	tensor<int32, [4]> concat_29 = const()[name = string("concat_29"), val = tensor<int32, [4]>([-1, 1, 2048, 64])];
	tensor<fp16, [8, 2, 1, 2048, 64]> transpose_13_cast_fp16 = transpose(perm = transpose_13_perm_0, x = reshape_12_cast_fp16)[name = string("transpose_83")];
	tensor<fp16, [16, 1, 2048, 64]> reshape_13_cast_fp16 = reshape(shape = concat_29, x = transpose_13_cast_fp16)[name = string("reshape_13_cast_fp16")];
	tensor<int32, [4]> transpose_27_perm_0 = const()[name = string("transpose_27_perm_0"), val = tensor<int32, [4]>([1, 0, 3, 2])];
	tensor<int32, [4]> transpose_14_perm_0 = const()[name = string("transpose_14_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	tensor<int32, [4]> tile_7_reps_0 = const()[name = string("tile_7_reps_0"), val = tensor<int32, [4]>([2, 1, 1, 1])];
	tensor<fp16, [8, 1, 2048, 64]> transpose_14_cast_fp16 = transpose(perm = transpose_14_perm_0, x = V_new_7_cast_fp16)[name = string("transpose_81")];
	tensor<fp16, [16, 1, 2048, 64]> tile_7_cast_fp16 = tile(reps = tile_7_reps_0, x = transpose_14_cast_fp16)[name = string("tile_7_cast_fp16")];
	tensor<int32, [5]> concat_30 = const()[name = string("concat_30"), val = tensor<int32, [5]>([2, 8, 1, 2048, 64])];
	tensor<fp16, [2, 8, 1, 2048, 64]> reshape_14_cast_fp16 = reshape(shape = concat_30, x = tile_7_cast_fp16)[name = string("reshape_14_cast_fp16")];
	tensor<int32, [5]> transpose_15_perm_0 = const()[name = string("transpose_15_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])];
	tensor<int32, [4]> concat_31 = const()[name = string("concat_31"), val = tensor<int32, [4]>([-1, 1, 2048, 64])];
	tensor<fp16, [8, 2, 1, 2048, 64]> transpose_15_cast_fp16 = transpose(perm = transpose_15_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_80")];
	tensor<fp16, [16, 1, 2048, 64]> reshape_15_cast_fp16 = reshape(shape = concat_31, x = transpose_15_cast_fp16)[name = string("reshape_15_cast_fp16")];
	tensor<int32, [4]> V_e_7_perm_0 = const()[name = string("V_e_7_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	bool var_3083_transpose_x_0 = const()[name = string("op_3083_transpose_x_0"), val = bool(false)];
	bool var_3083_transpose_y_0 = const()[name = string("op_3083_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 16, 64, 2048]> transpose_27_cast_fp16 = transpose(perm = transpose_27_perm_0, x = reshape_13_cast_fp16)[name = string("transpose_82")];
	tensor<fp16, [1, 16, 1, 2048]> var_3083_cast_fp16 = matmul(transpose_x = var_3083_transpose_x_0, transpose_y = var_3083_transpose_y_0, x = q_15, y = transpose_27_cast_fp16)[name = string("op_3083_cast_fp16")];
	fp16 var_3084_to_fp16 = const()[name = string("op_3084_to_fp16"), val = fp16(0x1p-3)];
	tensor<fp16, [1, 16, 1, 2048]> attn_19_cast_fp16 = mul(x = var_3083_cast_fp16, y = var_3084_to_fp16)[name = string("attn_19_cast_fp16")];
	tensor<fp16, [1, 16, 1, 2048]> attn_21_cast_fp16 = add(x = attn_19_cast_fp16, y = causal_mask)[name = string("attn_21_cast_fp16")];
	int32 var_3093 = const()[name = string("op_3093"), val = int32(-1)];
	tensor<fp16, [1, 16, 1, 2048]> var_3095_cast_fp16 = softmax(axis = var_3093, x = attn_21_cast_fp16)[name = string("op_3095_cast_fp16")];
	bool var_3111_transpose_x_0 = const()[name = string("op_3111_transpose_x_0"), val = bool(false)];
	bool var_3111_transpose_y_0 = const()[name = string("op_3111_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 16, 2048, 64]> V_e_7_cast_fp16 = transpose(perm = V_e_7_perm_0, x = reshape_15_cast_fp16)[name = string("transpose_79")];
	tensor<fp16, [1, 16, 1, 64]> var_3111_cast_fp16 = matmul(transpose_x = var_3111_transpose_x_0, transpose_y = var_3111_transpose_y_0, x = var_3095_cast_fp16, y = V_e_7_cast_fp16)[name = string("op_3111_cast_fp16")];
	tensor<int32, [4]> var_3121 = const()[name = string("op_3121"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_3128 = const()[name = string("op_3128"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 16, 64]> var_3122 = transpose(perm = var_3121, x = var_3111_cast_fp16)[name = string("transpose_78")];
	tensor<fp16, [1, 1, 1024]> out_15 = reshape(shape = var_3128, x = var_3122)[name = string("out_15")];
	tensor<int32, [3]> var_3133 = const()[name = string("op_3133"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1024, 1024, 1]> squeeze_3 = const()[name = string("squeeze_3"), val = tensor<fp16, [1024, 1024, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(831616704)))];
	string var_3149_pad_type_0 = const()[name = string("op_3149_pad_type_0"), val = string("valid")];
	int32 var_3149_groups_0 = const()[name = string("op_3149_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_3149_strides_0 = const()[name = string("op_3149_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_3149_pad_0 = const()[name = string("op_3149_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_3149_dilations_0 = const()[name = string("op_3149_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1, 1024, 1]> var_3134 = transpose(perm = var_3133, x = out_15)[name = string("transpose_77")];
	tensor<fp16, [1, 1024, 1]> var_3149 = conv(dilations = var_3149_dilations_0, groups = var_3149_groups_0, pad = var_3149_pad_0, pad_type = var_3149_pad_type_0, strides = var_3149_strides_0, weight = squeeze_3, x = var_3134)[name = string("op_3149")];
	tensor<int32, [3]> var_3153 = const()[name = string("op_3153"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> op_out_21 = transpose(perm = var_3153, x = var_3149)[name = string("transpose_76")];
	tensor<fp16, [1, 1, 1024]> x_59_cast_fp16 = add(x = x_53_cast_fp16, y = op_out_21)[name = string("x_59_cast_fp16")];
	fp16 const_37_promoted_to_fp16 = const()[name = string("const_37_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_3157_cast_fp16 = mul(x = x_59_cast_fp16, y = const_37_promoted_to_fp16)[name = string("op_3157_cast_fp16")];
	int32 var_3159 = const()[name = string("op_3159"), val = int32(-1)];
	bool input_163_interleave_0 = const()[name = string("input_163_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_163_cast_fp16 = concat(axis = var_3159, interleave = input_163_interleave_0, values = (x_59_cast_fp16, var_3157_cast_fp16))[name = string("input_163_cast_fp16")];
	tensor<int32, [1]> normed_79_axes_0 = const()[name = string("normed_79_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3165_to_fp16 = const()[name = string("op_3165_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_79_cast_fp16 = layer_norm(axes = normed_79_axes_0, epsilon = var_3165_to_fp16, x = input_163_cast_fp16)[name = string("normed_79_cast_fp16")];
	tensor<int32, [2]> var_3168_split_sizes_0 = const()[name = string("op_3168_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_3168_axis_0 = const()[name = string("op_3168_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_3168_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_3168_cast_fp16_1 = split(axis = var_3168_axis_0, split_sizes = var_3168_split_sizes_0, x = normed_79_cast_fp16)[name = string("op_3168_cast_fp16")];
	tensor<fp16, [1024]> layers_10_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_10_ffn_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833713920)))];
	tensor<fp16, [1, 1, 1024]> normed_81_cast_fp16 = mul(x = var_3168_cast_fp16_0, y = layers_10_ffn_norm_weight_promoted_to_fp16)[name = string("normed_81_cast_fp16")];
	tensor<int32, [3]> var_3174 = const()[name = string("op_3174"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_3177_axes_0 = const()[name = string("op_3177_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_3175_cast_fp16 = transpose(perm = var_3174, x = normed_81_cast_fp16)[name = string("transpose_75")];
	tensor<fp16, [1, 1024, 1, 1]> var_3177_cast_fp16 = expand_dims(axes = var_3177_axes_0, x = var_3175_cast_fp16)[name = string("op_3177_cast_fp16")];
	string input_167_pad_type_0 = const()[name = string("input_167_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> input_167_strides_0 = const()[name = string("input_167_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> input_167_pad_0 = const()[name = string("input_167_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> input_167_dilations_0 = const()[name = string("input_167_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 input_167_groups_0 = const()[name = string("input_167_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> input_167 = conv(dilations = input_167_dilations_0, groups = input_167_groups_0, pad = input_167_pad_0, pad_type = input_167_pad_type_0, strides = input_167_strides_0, weight = layers_10_feed_forward_w1_weight, x = var_3177_cast_fp16)[name = string("input_167")];
	string b_21_pad_type_0 = const()[name = string("b_21_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> b_21_strides_0 = const()[name = string("b_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> b_21_pad_0 = const()[name = string("b_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> b_21_dilations_0 = const()[name = string("b_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 b_21_groups_0 = const()[name = string("b_21_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> b_21 = conv(dilations = b_21_dilations_0, groups = b_21_groups_0, pad = b_21_pad_0, pad_type = b_21_pad_type_0, strides = b_21_strides_0, weight = layers_10_feed_forward_w3_weight, x = var_3177_cast_fp16)[name = string("b_21")];
	tensor<fp16, [1, 4608, 1, 1]> var_3205 = silu(x = input_167)[name = string("op_3205")];
	tensor<fp16, [1, 4608, 1, 1]> input_169 = mul(x = var_3205, y = b_21)[name = string("input_169")];
	string mlp_41_pad_type_0 = const()[name = string("mlp_41_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_41_strides_0 = const()[name = string("mlp_41_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_41_pad_0 = const()[name = string("mlp_41_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_41_dilations_0 = const()[name = string("mlp_41_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_41_groups_0 = const()[name = string("mlp_41_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> mlp_41 = conv(dilations = mlp_41_dilations_0, groups = mlp_41_groups_0, pad = mlp_41_pad_0, pad_type = mlp_41_pad_type_0, strides = mlp_41_strides_0, weight = layers_10_feed_forward_w2_weight, x = input_169)[name = string("mlp_41")];
	tensor<int32, [1]> var_3219_axes_0 = const()[name = string("op_3219_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_3219 = squeeze(axes = var_3219_axes_0, x = mlp_41)[name = string("op_3219")];
	tensor<int32, [3]> var_3223 = const()[name = string("op_3223"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> mlp_43 = transpose(perm = var_3223, x = var_3219)[name = string("transpose_74")];
	tensor<fp16, [1, 1, 1024]> x_61_cast_fp16 = add(x = x_59_cast_fp16, y = mlp_43)[name = string("x_61_cast_fp16")];
	fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_3227_cast_fp16 = mul(x = x_61_cast_fp16, y = const_38_promoted_to_fp16)[name = string("op_3227_cast_fp16")];
	int32 var_3229 = const()[name = string("op_3229"), val = int32(-1)];
	bool input_171_interleave_0 = const()[name = string("input_171_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_171_cast_fp16 = concat(axis = var_3229, interleave = input_171_interleave_0, values = (x_61_cast_fp16, var_3227_cast_fp16))[name = string("input_171_cast_fp16")];
	tensor<int32, [1]> normed_83_axes_0 = const()[name = string("normed_83_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3235_to_fp16 = const()[name = string("op_3235_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_83_cast_fp16 = layer_norm(axes = normed_83_axes_0, epsilon = var_3235_to_fp16, x = input_171_cast_fp16)[name = string("normed_83_cast_fp16")];
	tensor<int32, [2]> var_3238_split_sizes_0 = const()[name = string("op_3238_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_3238_axis_0 = const()[name = string("op_3238_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_3238_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_3238_cast_fp16_1 = split(axis = var_3238_axis_0, split_sizes = var_3238_split_sizes_0, x = normed_83_cast_fp16)[name = string("op_3238_cast_fp16")];
	tensor<fp16, [1024]> layers_11_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_11_operator_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833716032)))];
	tensor<fp16, [1, 1, 1024]> hidden_states_23_cast_fp16 = mul(x = var_3238_cast_fp16_0, y = layers_11_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_23_cast_fp16")];
	tensor<int32, [3]> var_3244 = const()[name = string("op_3244"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_3247_axes_0 = const()[name = string("op_3247_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_3245_cast_fp16 = transpose(perm = var_3244, x = hidden_states_23_cast_fp16)[name = string("transpose_73")];
	tensor<fp16, [1, 1024, 1, 1]> var_3247_cast_fp16 = expand_dims(axes = var_3247_axes_0, x = var_3245_cast_fp16)[name = string("op_3247_cast_fp16")];
	string BCx_15_pad_type_0 = const()[name = string("BCx_15_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> BCx_15_strides_0 = const()[name = string("BCx_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> BCx_15_pad_0 = const()[name = string("BCx_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> BCx_15_dilations_0 = const()[name = string("BCx_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 BCx_15_groups_0 = const()[name = string("BCx_15_groups_0"), val = int32(1)];
	tensor<fp16, [1, 3072, 1, 1]> BCx_15 = conv(dilations = BCx_15_dilations_0, groups = BCx_15_groups_0, pad = BCx_15_pad_0, pad_type = BCx_15_pad_type_0, strides = BCx_15_strides_0, weight = layers_11_conv_in_proj_weight, x = var_3247_cast_fp16)[name = string("BCx_15")];
	tensor<int32, [3]> var_3264_split_sizes_0 = const()[name = string("op_3264_split_sizes_0"), val = tensor<int32, [3]>([1024, 1024, 1024])];
	int32 var_3264_axis_0 = const()[name = string("op_3264_axis_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> var_3264_0, tensor<fp16, [1, 1024, 1, 1]> var_3264_1, tensor<fp16, [1, 1024, 1, 1]> var_3264_2 = split(axis = var_3264_axis_0, split_sizes = var_3264_split_sizes_0, x = BCx_15)[name = string("op_3264")];
	tensor<fp16, [1, 1024, 1, 1]> Bx_15 = mul(x = var_3264_0, y = var_3264_2)[name = string("Bx_15")];
	tensor<int32, [3]> var_3270_begin_0 = const()[name = string("op_3270_begin_0"), val = tensor<int32, [3]>([7, 0, 0])];
	tensor<int32, [3]> var_3270_end_0 = const()[name = string("op_3270_end_0"), val = tensor<int32, [3]>([8, 1024, 3])];
	tensor<bool, [3]> var_3270_end_mask_0 = const()[name = string("op_3270_end_mask_0"), val = tensor<bool, [3]>([false, true, true])];
	tensor<bool, [3]> var_3270_squeeze_mask_0 = const()[name = string("op_3270_squeeze_mask_0"), val = tensor<bool, [3]>([true, false, false])];
	tensor<fp16, [1024, 3]> var_3270_cast_fp16 = slice_by_index(begin = var_3270_begin_0, end = var_3270_end_0, end_mask = var_3270_end_mask_0, squeeze_mask = var_3270_squeeze_mask_0, x = conv_state_in)[name = string("op_3270_cast_fp16")];
	tensor<int32, [1]> var_3272_axes_0 = const()[name = string("op_3272_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 1024, 3]> var_3272_cast_fp16 = expand_dims(axes = var_3272_axes_0, x = var_3270_cast_fp16)[name = string("op_3272_cast_fp16")];
	tensor<int32, [1]> slot_15_axes_0 = const()[name = string("slot_15_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1, 3]> slot_15_cast_fp16 = expand_dims(axes = slot_15_axes_0, x = var_3272_cast_fp16)[name = string("slot_15_cast_fp16")];
	tensor<int32, [4]> live_tail_15_begin_0 = const()[name = string("live_tail_15_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1])];
	tensor<int32, [4]> live_tail_15_end_0 = const()[name = string("live_tail_15_end_0"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
	tensor<bool, [4]> live_tail_15_end_mask_0 = const()[name = string("live_tail_15_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
	tensor<fp16, [1, 1024, 1, 2]> live_tail_15_cast_fp16 = slice_by_index(begin = live_tail_15_begin_0, end = live_tail_15_end_0, end_mask = live_tail_15_end_mask_0, x = slot_15_cast_fp16)[name = string("live_tail_15_cast_fp16")];
	int32 var_3281 = const()[name = string("op_3281"), val = int32(-1)];
	bool new_state_15_interleave_0 = const()[name = string("new_state_15_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1024, 1, 3]> new_state_15_cast_fp16 = concat(axis = var_3281, interleave = new_state_15_interleave_0, values = (live_tail_15_cast_fp16, Bx_15))[name = string("new_state_15_cast_fp16")];
	tensor<int32, [1]> var_3284_axes_0 = const()[name = string("op_3284_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1024, 1, 3]> var_3284_cast_fp16 = squeeze(axes = var_3284_axes_0, x = new_state_15_cast_fp16)[name = string("op_3284_cast_fp16")];
	tensor<int32, [1]> var_3286_axes_0 = const()[name = string("op_3286_axes_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1024, 3]> var_3286_cast_fp16 = squeeze(axes = var_3286_axes_0, x = var_3284_cast_fp16)[name = string("op_3286_cast_fp16")];
	string conv_out_15_pad_type_0 = const()[name = string("conv_out_15_pad_type_0"), val = string("valid")];
	int32 conv_out_15_groups_0 = const()[name = string("conv_out_15_groups_0"), val = int32(1024)];
	tensor<int32, [2]> conv_out_15_strides_0 = const()[name = string("conv_out_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> conv_out_15_pad_0 = const()[name = string("conv_out_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> conv_out_15_dilations_0 = const()[name = string("conv_out_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<fp16, [1024, 1, 1, 3]> layers_11_conv_conv_weight_promoted_to_fp16 = const()[name = string("layers_11_conv_conv_weight_promoted_to_fp16"), val = tensor<fp16, [1024, 1, 1, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833718144)))];
	tensor<fp16, [1, 1024, 1, 1]> conv_out_15_cast_fp16 = conv(dilations = conv_out_15_dilations_0, groups = conv_out_15_groups_0, pad = conv_out_15_pad_0, pad_type = conv_out_15_pad_type_0, strides = conv_out_15_strides_0, weight = layers_11_conv_conv_weight_promoted_to_fp16, x = new_state_15_cast_fp16)[name = string("conv_out_15_cast_fp16")];
	tensor<fp16, [1, 1024, 1, 1]> input_175_cast_fp16 = mul(x = var_3264_1, y = conv_out_15_cast_fp16)[name = string("input_175_cast_fp16")];
	string y_15_pad_type_0 = const()[name = string("y_15_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> y_15_strides_0 = const()[name = string("y_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> y_15_pad_0 = const()[name = string("y_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> y_15_dilations_0 = const()[name = string("y_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 y_15_groups_0 = const()[name = string("y_15_groups_0"), val = int32(1)];
	tensor<fp16, [1024, 1024, 1, 1]> layers_11_conv_out_proj_weight_promoted_to_fp16 = const()[name = string("layers_11_conv_out_proj_weight_promoted_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833724352)))];
	tensor<fp16, [1, 1024, 1, 1]> y_15_cast_fp16 = conv(dilations = y_15_dilations_0, groups = y_15_groups_0, pad = y_15_pad_0, pad_type = y_15_pad_type_0, strides = y_15_strides_0, weight = layers_11_conv_out_proj_weight_promoted_to_fp16, x = input_175_cast_fp16)[name = string("y_15_cast_fp16")];
	tensor<int32, [1]> var_3312_axes_0 = const()[name = string("op_3312_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_3312_cast_fp16 = squeeze(axes = var_3312_axes_0, x = y_15_cast_fp16)[name = string("op_3312_cast_fp16")];
	tensor<int32, [3]> var_3316 = const()[name = string("op_3316"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> op_out_23_cast_fp16 = transpose(perm = var_3316, x = var_3312_cast_fp16)[name = string("transpose_72")];
	tensor<fp16, [1, 1, 1024]> x_63_cast_fp16 = add(x = x_61_cast_fp16, y = op_out_23_cast_fp16)[name = string("x_63_cast_fp16")];
	fp16 const_39_promoted_to_fp16 = const()[name = string("const_39_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_3320_cast_fp16 = mul(x = x_63_cast_fp16, y = const_39_promoted_to_fp16)[name = string("op_3320_cast_fp16")];
	int32 var_3322 = const()[name = string("op_3322"), val = int32(-1)];
	bool input_177_interleave_0 = const()[name = string("input_177_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_177_cast_fp16 = concat(axis = var_3322, interleave = input_177_interleave_0, values = (x_63_cast_fp16, var_3320_cast_fp16))[name = string("input_177_cast_fp16")];
	tensor<int32, [1]> normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3328_to_fp16 = const()[name = string("op_3328_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_3328_to_fp16, x = input_177_cast_fp16)[name = string("normed_85_cast_fp16")];
	tensor<int32, [2]> var_3331_split_sizes_0 = const()[name = string("op_3331_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_3331_axis_0 = const()[name = string("op_3331_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_3331_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_3331_cast_fp16_1 = split(axis = var_3331_axis_0, split_sizes = var_3331_split_sizes_0, x = normed_85_cast_fp16)[name = string("op_3331_cast_fp16")];
	tensor<fp16, [1024]> layers_11_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_11_ffn_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(835821568)))];
	tensor<fp16, [1, 1, 1024]> normed_87_cast_fp16 = mul(x = var_3331_cast_fp16_0, y = layers_11_ffn_norm_weight_promoted_to_fp16)[name = string("normed_87_cast_fp16")];
	tensor<int32, [3]> var_3337 = const()[name = string("op_3337"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_3340_axes_0 = const()[name = string("op_3340_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_3338_cast_fp16 = transpose(perm = var_3337, x = normed_87_cast_fp16)[name = string("transpose_71")];
	tensor<fp16, [1, 1024, 1, 1]> var_3340_cast_fp16 = expand_dims(axes = var_3340_axes_0, x = var_3338_cast_fp16)[name = string("op_3340_cast_fp16")];
	string input_181_pad_type_0 = const()[name = string("input_181_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> input_181_strides_0 = const()[name = string("input_181_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> input_181_pad_0 = const()[name = string("input_181_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> input_181_dilations_0 = const()[name = string("input_181_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 input_181_groups_0 = const()[name = string("input_181_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> input_181 = conv(dilations = input_181_dilations_0, groups = input_181_groups_0, pad = input_181_pad_0, pad_type = input_181_pad_type_0, strides = input_181_strides_0, weight = layers_11_feed_forward_w1_weight, x = var_3340_cast_fp16)[name = string("input_181")];
	string b_23_pad_type_0 = const()[name = string("b_23_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> b_23_strides_0 = const()[name = string("b_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> b_23_pad_0 = const()[name = string("b_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> b_23_dilations_0 = const()[name = string("b_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 b_23_groups_0 = const()[name = string("b_23_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> b_23 = conv(dilations = b_23_dilations_0, groups = b_23_groups_0, pad = b_23_pad_0, pad_type = b_23_pad_type_0, strides = b_23_strides_0, weight = layers_11_feed_forward_w3_weight, x = var_3340_cast_fp16)[name = string("b_23")];
	tensor<fp16, [1, 4608, 1, 1]> var_3368 = silu(x = input_181)[name = string("op_3368")];
	tensor<fp16, [1, 4608, 1, 1]> input_183 = mul(x = var_3368, y = b_23)[name = string("input_183")];
	string mlp_45_pad_type_0 = const()[name = string("mlp_45_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_45_strides_0 = const()[name = string("mlp_45_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_45_pad_0 = const()[name = string("mlp_45_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_45_dilations_0 = const()[name = string("mlp_45_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_45_groups_0 = const()[name = string("mlp_45_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> mlp_45 = conv(dilations = mlp_45_dilations_0, groups = mlp_45_groups_0, pad = mlp_45_pad_0, pad_type = mlp_45_pad_type_0, strides = mlp_45_strides_0, weight = layers_11_feed_forward_w2_weight, x = input_183)[name = string("mlp_45")];
	tensor<int32, [1]> var_3382_axes_0 = const()[name = string("op_3382_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_3382 = squeeze(axes = var_3382_axes_0, x = mlp_45)[name = string("op_3382")];
	tensor<int32, [3]> var_3386 = const()[name = string("op_3386"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> mlp_47 = transpose(perm = var_3386, x = var_3382)[name = string("transpose_70")];
	tensor<fp16, [1, 1, 1024]> x_65_cast_fp16 = add(x = x_63_cast_fp16, y = mlp_47)[name = string("x_65_cast_fp16")];
	fp16 const_40_promoted_to_fp16 = const()[name = string("const_40_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_3390_cast_fp16 = mul(x = x_65_cast_fp16, y = const_40_promoted_to_fp16)[name = string("op_3390_cast_fp16")];
	int32 var_3392 = const()[name = string("op_3392"), val = int32(-1)];
	bool input_185_interleave_0 = const()[name = string("input_185_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_185_cast_fp16 = concat(axis = var_3392, interleave = input_185_interleave_0, values = (x_65_cast_fp16, var_3390_cast_fp16))[name = string("input_185_cast_fp16")];
	tensor<int32, [1]> normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3398_to_fp16 = const()[name = string("op_3398_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_3398_to_fp16, x = input_185_cast_fp16)[name = string("normed_89_cast_fp16")];
	tensor<int32, [2]> var_3401_split_sizes_0 = const()[name = string("op_3401_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_3401_axis_0 = const()[name = string("op_3401_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_3401_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_3401_cast_fp16_1 = split(axis = var_3401_axis_0, split_sizes = var_3401_split_sizes_0, x = normed_89_cast_fp16)[name = string("op_3401_cast_fp16")];
	tensor<fp16, [1024]> layers_12_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_12_operator_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(835823680)))];
	tensor<fp16, [1, 1, 1024]> hidden_states_25_cast_fp16 = mul(x = var_3401_cast_fp16_0, y = layers_12_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_25_cast_fp16")];
	tensor<int32, [3]> var_3407 = const()[name = string("op_3407"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_3410_axes_0 = const()[name = string("op_3410_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_3408_cast_fp16 = transpose(perm = var_3407, x = hidden_states_25_cast_fp16)[name = string("transpose_69")];
	tensor<fp16, [1, 1024, 1, 1]> var_3410_cast_fp16 = expand_dims(axes = var_3410_axes_0, x = var_3408_cast_fp16)[name = string("op_3410_cast_fp16")];
	string var_3426_pad_type_0 = const()[name = string("op_3426_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_3426_strides_0 = const()[name = string("op_3426_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_3426_pad_0 = const()[name = string("op_3426_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_3426_dilations_0 = const()[name = string("op_3426_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_3426_groups_0 = const()[name = string("op_3426_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> var_3426 = conv(dilations = var_3426_dilations_0, groups = var_3426_groups_0, pad = var_3426_pad_0, pad_type = var_3426_pad_type_0, strides = var_3426_strides_0, weight = layers_12_self_attn_q_proj_weight, x = var_3410_cast_fp16)[name = string("op_3426")];
	tensor<int32, [4]> var_3431 = const()[name = string("op_3431"), val = tensor<int32, [4]>([1, 16, 64, 1])];
	tensor<fp16, [1, 16, 64, 1]> var_3432 = reshape(shape = var_3431, x = var_3426)[name = string("op_3432")];
	tensor<int32, [4]> var_3437 = const()[name = string("op_3437"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	string var_3454_pad_type_0 = const()[name = string("op_3454_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_3454_strides_0 = const()[name = string("op_3454_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_3454_pad_0 = const()[name = string("op_3454_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_3454_dilations_0 = const()[name = string("op_3454_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_3454_groups_0 = const()[name = string("op_3454_groups_0"), val = int32(1)];
	tensor<fp16, [1, 512, 1, 1]> var_3454 = conv(dilations = var_3454_dilations_0, groups = var_3454_groups_0, pad = var_3454_pad_0, pad_type = var_3454_pad_type_0, strides = var_3454_strides_0, weight = layers_12_self_attn_k_proj_weight, x = var_3410_cast_fp16)[name = string("op_3454")];
	tensor<int32, [4]> var_3459 = const()[name = string("op_3459"), val = tensor<int32, [4]>([1, 8, 64, 1])];
	tensor<fp16, [1, 8, 64, 1]> var_3460 = reshape(shape = var_3459, x = var_3454)[name = string("op_3460")];
	tensor<int32, [4]> var_3465 = const()[name = string("op_3465"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	string var_3482_pad_type_0 = const()[name = string("op_3482_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_3482_strides_0 = const()[name = string("op_3482_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_3482_pad_0 = const()[name = string("op_3482_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_3482_dilations_0 = const()[name = string("op_3482_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_3482_groups_0 = const()[name = string("op_3482_groups_0"), val = int32(1)];
	tensor<fp16, [1, 512, 1, 1]> var_3482 = conv(dilations = var_3482_dilations_0, groups = var_3482_groups_0, pad = var_3482_pad_0, pad_type = var_3482_pad_type_0, strides = var_3482_strides_0, weight = layers_12_self_attn_v_proj_weight, x = var_3410_cast_fp16)[name = string("op_3482")];
	tensor<int32, [4]> var_3487 = const()[name = string("op_3487"), val = tensor<int32, [4]>([1, 8, 64, 1])];
	tensor<fp16, [1, 8, 64, 1]> var_3488 = reshape(shape = var_3487, x = var_3482)[name = string("op_3488")];
	tensor<int32, [4]> var_3493 = const()[name = string("op_3493"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	fp16 const_41_promoted = const()[name = string("const_41_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 16, 1, 64]> var_3438 = transpose(perm = var_3437, x = var_3432)[name = string("transpose_68")];
	tensor<fp16, [1, 16, 1, 64]> var_3500 = mul(x = var_3438, y = const_41_promoted)[name = string("op_3500")];
	int32 var_3502 = const()[name = string("op_3502"), val = int32(-1)];
	bool input_189_interleave_0 = const()[name = string("input_189_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 16, 1, 128]> input_189 = concat(axis = var_3502, interleave = input_189_interleave_0, values = (var_3438, var_3500))[name = string("input_189")];
	tensor<int32, [1]> normed_91_axes_0 = const()[name = string("normed_91_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3508_to_fp16 = const()[name = string("op_3508_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 16, 1, 128]> normed_91_cast_fp16 = layer_norm(axes = normed_91_axes_0, epsilon = var_3508_to_fp16, x = input_189)[name = string("normed_91_cast_fp16")];
	tensor<int32, [2]> var_3511_split_sizes_0 = const()[name = string("op_3511_split_sizes_0"), val = tensor<int32, [2]>([64, 64])];
	int32 var_3511_axis_0 = const()[name = string("op_3511_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 16, 1, 64]> var_3511_0, tensor<fp16, [1, 16, 1, 64]> var_3511_1 = split(axis = var_3511_axis_0, split_sizes = var_3511_split_sizes_0, x = normed_91_cast_fp16)[name = string("op_3511")];
	tensor<fp16, [1, 16, 1, 64]> q_17 = mul(x = var_3511_0, y = layers_12_self_attn_q_layernorm_weight)[name = string("q_17")];
	fp16 const_42_promoted = const()[name = string("const_42_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 64]> var_3466 = transpose(perm = var_3465, x = var_3460)[name = string("transpose_67")];
	tensor<fp16, [1, 8, 1, 64]> var_3514 = mul(x = var_3466, y = const_42_promoted)[name = string("op_3514")];
	int32 var_3516 = const()[name = string("op_3516"), val = int32(-1)];
	bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 128]> input_191 = concat(axis = var_3516, interleave = input_191_interleave_0, values = (var_3466, var_3514))[name = string("input_191")];
	tensor<int32, [1]> normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3522_to_fp16 = const()[name = string("op_3522_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 8, 1, 128]> normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_3522_to_fp16, x = input_191)[name = string("normed_93_cast_fp16")];
	tensor<int32, [2]> var_3525_split_sizes_0 = const()[name = string("op_3525_split_sizes_0"), val = tensor<int32, [2]>([64, 64])];
	int32 var_3525_axis_0 = const()[name = string("op_3525_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 64]> var_3525_0, tensor<fp16, [1, 8, 1, 64]> var_3525_1 = split(axis = var_3525_axis_0, split_sizes = var_3525_split_sizes_0, x = normed_93_cast_fp16)[name = string("op_3525")];
	tensor<fp16, [1, 8, 1, 64]> k_17 = mul(x = var_3525_0, y = layers_12_self_attn_k_layernorm_weight)[name = string("k_17")];
	tensor<fp16, [1, 16, 1, 64]> var_3528 = mul(x = q_17, y = cos)[name = string("op_3528")];
	tensor<int32, [2]> var_3529_split_sizes_0 = const()[name = string("op_3529_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
	int32 var_3529_axis_0 = const()[name = string("op_3529_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 16, 1, 32]> var_3529_0, tensor<fp16, [1, 16, 1, 32]> var_3529_1 = split(axis = var_3529_axis_0, split_sizes = var_3529_split_sizes_0, x = q_17)[name = string("op_3529")];
	fp16 const_43_promoted = const()[name = string("const_43_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 16, 1, 32]> var_3531 = mul(x = var_3529_1, y = const_43_promoted)[name = string("op_3531")];
	int32 var_3533 = const()[name = string("op_3533"), val = int32(-1)];
	bool var_3534_interleave_0 = const()[name = string("op_3534_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 16, 1, 64]> var_3534 = concat(axis = var_3533, interleave = var_3534_interleave_0, values = (var_3531, var_3529_0))[name = string("op_3534")];
	tensor<fp16, [1, 16, 1, 64]> var_3535 = mul(x = var_3534, y = sin)[name = string("op_3535")];
	tensor<fp16, [1, 16, 1, 64]> q_19 = add(x = var_3528, y = var_3535)[name = string("q_19")];
	tensor<fp16, [1, 8, 1, 64]> var_3538 = mul(x = k_17, y = cos)[name = string("op_3538")];
	tensor<int32, [2]> var_3539_split_sizes_0 = const()[name = string("op_3539_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
	int32 var_3539_axis_0 = const()[name = string("op_3539_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 32]> var_3539_0, tensor<fp16, [1, 8, 1, 32]> var_3539_1 = split(axis = var_3539_axis_0, split_sizes = var_3539_split_sizes_0, x = k_17)[name = string("op_3539")];
	fp16 const_44_promoted = const()[name = string("const_44_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 32]> var_3541 = mul(x = var_3539_1, y = const_44_promoted)[name = string("op_3541")];
	int32 var_3543 = const()[name = string("op_3543"), val = int32(-1)];
	bool var_3544_interleave_0 = const()[name = string("op_3544_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 64]> var_3544 = concat(axis = var_3543, interleave = var_3544_interleave_0, values = (var_3541, var_3539_0))[name = string("op_3544")];
	tensor<fp16, [1, 8, 1, 64]> var_3545 = mul(x = var_3544, y = sin)[name = string("op_3545")];
	tensor<fp16, [1, 8, 1, 64]> k_19 = add(x = var_3538, y = var_3545)[name = string("k_19")];
	tensor<int32, [4]> var_3550_begin_0 = const()[name = string("op_3550_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
	tensor<int32, [4]> var_3550_end_0 = const()[name = string("op_3550_end_0"), val = tensor<int32, [4]>([5, 8, 2048, 64])];
	tensor<bool, [4]> var_3550_end_mask_0 = const()[name = string("op_3550_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> var_3550_squeeze_mask_0 = const()[name = string("op_3550_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [8, 2048, 64]> var_3550_cast_fp16 = slice_by_index(begin = var_3550_begin_0, end = var_3550_end_0, end_mask = var_3550_end_mask_0, squeeze_mask = var_3550_squeeze_mask_0, x = coreml_update_state_19)[name = string("op_3550_cast_fp16")];
	tensor<int32, [1]> K_cache_9_axes_0 = const()[name = string("K_cache_9_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 8, 2048, 64]> K_cache_9_cast_fp16 = expand_dims(axes = K_cache_9_axes_0, x = var_3550_cast_fp16)[name = string("K_cache_9_cast_fp16")];
	tensor<int32, [4]> var_3555_begin_0 = const()[name = string("op_3555_begin_0"), val = tensor<int32, [4]>([10, 0, 0, 0])];
	tensor<int32, [4]> var_3555_end_0 = const()[name = string("op_3555_end_0"), val = tensor<int32, [4]>([11, 8, 2048, 64])];
	tensor<bool, [4]> var_3555_end_mask_0 = const()[name = string("op_3555_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> var_3555_squeeze_mask_0 = const()[name = string("op_3555_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [8, 2048, 64]> var_3555_cast_fp16 = slice_by_index(begin = var_3555_begin_0, end = var_3555_end_0, end_mask = var_3555_end_mask_0, squeeze_mask = var_3555_squeeze_mask_0, x = coreml_update_state_19)[name = string("op_3555_cast_fp16")];
	tensor<int32, [1]> V_cache_9_axes_0 = const()[name = string("V_cache_9_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 8, 2048, 64]> V_cache_9_cast_fp16 = expand_dims(axes = V_cache_9_axes_0, x = var_3555_cast_fp16)[name = string("V_cache_9_cast_fp16")];
	tensor<int32, [4]> k_b_9_reps_0 = const()[name = string("k_b_9_reps_0"), val = tensor<int32, [4]>([1, 1, 2048, 1])];
	tensor<fp16, [1, 8, 2048, 64]> k_b_9 = tile(reps = k_b_9_reps_0, x = k_19)[name = string("k_b_9")];
	tensor<int32, [4]> v_b_9_reps_0 = const()[name = string("v_b_9_reps_0"), val = tensor<int32, [4]>([1, 1, 2048, 1])];
	tensor<fp16, [1, 8, 1, 64]> var_3494 = transpose(perm = var_3493, x = var_3488)[name = string("transpose_66")];
	tensor<fp16, [1, 8, 2048, 64]> v_b_9 = tile(reps = v_b_9_reps_0, x = var_3494)[name = string("v_b_9")];
	tensor<fp16, [1, 8, 2048, 64]> var_3563_cast_fp16 = mul(x = K_cache_9_cast_fp16, y = var_1132_cast_fp16)[name = string("op_3563_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> var_3564_cast_fp16 = mul(x = k_b_9, y = update_mask)[name = string("op_3564_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> K_new_9_cast_fp16 = add(x = var_3563_cast_fp16, y = var_3564_cast_fp16)[name = string("K_new_9_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> var_3570_cast_fp16 = mul(x = V_cache_9_cast_fp16, y = var_1132_cast_fp16)[name = string("op_3570_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> var_3571_cast_fp16 = mul(x = v_b_9, y = update_mask)[name = string("op_3571_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> V_new_9_cast_fp16 = add(x = var_3570_cast_fp16, y = var_3571_cast_fp16)[name = string("V_new_9_cast_fp16")];
	tensor<int32, [1]> var_3575_axes_0 = const()[name = string("op_3575_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [8, 2048, 64]> var_3575_cast_fp16 = squeeze(axes = var_3575_axes_0, x = K_new_9_cast_fp16)[name = string("op_3575_cast_fp16")];
	tensor<int32, [4]> concat_32 = const()[name = string("concat_32"), val = tensor<int32, [4]>([4, 0, 0, 0])];
	tensor<int32, [4]> concat_33 = const()[name = string("concat_33"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [4]> kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [12, 8, 2048, 64]> kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_32, begin_mask = kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_33, end_mask = kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_9_stride_0, update = var_3575_cast_fp16, x = coreml_update_state_19)[name = string("kv_cache_0_internal_tensor_assign_9_cast_fp16")];
	write_state(data = kv_cache_0_internal_tensor_assign_9_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_20_write_state")];
	tensor<fp16, [12, 8, 2048, 64]> coreml_update_state_20 = read_state(input = kv_cache_0)[name = string("coreml_update_state_20")];
	tensor<int32, [1]> var_3582_axes_0 = const()[name = string("op_3582_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [8, 2048, 64]> var_3582_cast_fp16 = squeeze(axes = var_3582_axes_0, x = V_new_9_cast_fp16)[name = string("op_3582_cast_fp16")];
	tensor<int32, [4]> concat_34 = const()[name = string("concat_34"), val = tensor<int32, [4]>([10, 0, 0, 0])];
	tensor<int32, [4]> concat_35 = const()[name = string("concat_35"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [4]> kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [12, 8, 2048, 64]> kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_34, begin_mask = kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_35, end_mask = kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_10_stride_0, update = var_3582_cast_fp16, x = coreml_update_state_20)[name = string("kv_cache_0_internal_tensor_assign_10_cast_fp16")];
	write_state(data = kv_cache_0_internal_tensor_assign_10_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_21_write_state")];
	tensor<fp16, [12, 8, 2048, 64]> coreml_update_state_21 = read_state(input = kv_cache_0)[name = string("coreml_update_state_21")];
	tensor<int32, [4]> transpose_16_perm_0 = const()[name = string("transpose_16_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	tensor<int32, [4]> tile_8_reps_0 = const()[name = string("tile_8_reps_0"), val = tensor<int32, [4]>([2, 1, 1, 1])];
	tensor<fp16, [8, 1, 2048, 64]> transpose_16_cast_fp16 = transpose(perm = transpose_16_perm_0, x = K_new_9_cast_fp16)[name = string("transpose_65")];
	tensor<fp16, [16, 1, 2048, 64]> tile_8_cast_fp16 = tile(reps = tile_8_reps_0, x = transpose_16_cast_fp16)[name = string("tile_8_cast_fp16")];
	tensor<int32, [5]> concat_36 = const()[name = string("concat_36"), val = tensor<int32, [5]>([2, 8, 1, 2048, 64])];
	tensor<fp16, [2, 8, 1, 2048, 64]> reshape_16_cast_fp16 = reshape(shape = concat_36, x = tile_8_cast_fp16)[name = string("reshape_16_cast_fp16")];
	tensor<int32, [5]> transpose_17_perm_0 = const()[name = string("transpose_17_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])];
	tensor<int32, [4]> concat_37 = const()[name = string("concat_37"), val = tensor<int32, [4]>([-1, 1, 2048, 64])];
	tensor<fp16, [8, 2, 1, 2048, 64]> transpose_17_cast_fp16 = transpose(perm = transpose_17_perm_0, x = reshape_16_cast_fp16)[name = string("transpose_64")];
	tensor<fp16, [16, 1, 2048, 64]> reshape_17_cast_fp16 = reshape(shape = concat_37, x = transpose_17_cast_fp16)[name = string("reshape_17_cast_fp16")];
	tensor<int32, [4]> transpose_28_perm_0 = const()[name = string("transpose_28_perm_0"), val = tensor<int32, [4]>([1, 0, 3, 2])];
	tensor<int32, [4]> transpose_18_perm_0 = const()[name = string("transpose_18_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	tensor<int32, [4]> tile_9_reps_0 = const()[name = string("tile_9_reps_0"), val = tensor<int32, [4]>([2, 1, 1, 1])];
	tensor<fp16, [8, 1, 2048, 64]> transpose_18_cast_fp16 = transpose(perm = transpose_18_perm_0, x = V_new_9_cast_fp16)[name = string("transpose_62")];
	tensor<fp16, [16, 1, 2048, 64]> tile_9_cast_fp16 = tile(reps = tile_9_reps_0, x = transpose_18_cast_fp16)[name = string("tile_9_cast_fp16")];
	tensor<int32, [5]> concat_38 = const()[name = string("concat_38"), val = tensor<int32, [5]>([2, 8, 1, 2048, 64])];
	tensor<fp16, [2, 8, 1, 2048, 64]> reshape_18_cast_fp16 = reshape(shape = concat_38, x = tile_9_cast_fp16)[name = string("reshape_18_cast_fp16")];
	tensor<int32, [5]> transpose_19_perm_0 = const()[name = string("transpose_19_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])];
	tensor<int32, [4]> concat_39 = const()[name = string("concat_39"), val = tensor<int32, [4]>([-1, 1, 2048, 64])];
	tensor<fp16, [8, 2, 1, 2048, 64]> transpose_19_cast_fp16 = transpose(perm = transpose_19_perm_0, x = reshape_18_cast_fp16)[name = string("transpose_61")];
	tensor<fp16, [16, 1, 2048, 64]> reshape_19_cast_fp16 = reshape(shape = concat_39, x = transpose_19_cast_fp16)[name = string("reshape_19_cast_fp16")];
	tensor<int32, [4]> V_e_9_perm_0 = const()[name = string("V_e_9_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	bool var_3609_transpose_x_0 = const()[name = string("op_3609_transpose_x_0"), val = bool(false)];
	bool var_3609_transpose_y_0 = const()[name = string("op_3609_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 16, 64, 2048]> transpose_28_cast_fp16 = transpose(perm = transpose_28_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_63")];
	tensor<fp16, [1, 16, 1, 2048]> var_3609_cast_fp16 = matmul(transpose_x = var_3609_transpose_x_0, transpose_y = var_3609_transpose_y_0, x = q_19, y = transpose_28_cast_fp16)[name = string("op_3609_cast_fp16")];
	fp16 var_3610_to_fp16 = const()[name = string("op_3610_to_fp16"), val = fp16(0x1p-3)];
	tensor<fp16, [1, 16, 1, 2048]> attn_25_cast_fp16 = mul(x = var_3609_cast_fp16, y = var_3610_to_fp16)[name = string("attn_25_cast_fp16")];
	tensor<fp16, [1, 16, 1, 2048]> attn_27_cast_fp16 = add(x = attn_25_cast_fp16, y = causal_mask)[name = string("attn_27_cast_fp16")];
	int32 var_3619 = const()[name = string("op_3619"), val = int32(-1)];
	tensor<fp16, [1, 16, 1, 2048]> var_3621_cast_fp16 = softmax(axis = var_3619, x = attn_27_cast_fp16)[name = string("op_3621_cast_fp16")];
	bool var_3637_transpose_x_0 = const()[name = string("op_3637_transpose_x_0"), val = bool(false)];
	bool var_3637_transpose_y_0 = const()[name = string("op_3637_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 16, 2048, 64]> V_e_9_cast_fp16 = transpose(perm = V_e_9_perm_0, x = reshape_19_cast_fp16)[name = string("transpose_60")];
	tensor<fp16, [1, 16, 1, 64]> var_3637_cast_fp16 = matmul(transpose_x = var_3637_transpose_x_0, transpose_y = var_3637_transpose_y_0, x = var_3621_cast_fp16, y = V_e_9_cast_fp16)[name = string("op_3637_cast_fp16")];
	tensor<int32, [4]> var_3647 = const()[name = string("op_3647"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_3654 = const()[name = string("op_3654"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 16, 64]> var_3648 = transpose(perm = var_3647, x = var_3637_cast_fp16)[name = string("transpose_59")];
	tensor<fp16, [1, 1, 1024]> out_19 = reshape(shape = var_3654, x = var_3648)[name = string("out_19")];
	tensor<int32, [3]> var_3659 = const()[name = string("op_3659"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1024, 1024, 1]> squeeze_4 = const()[name = string("squeeze_4"), val = tensor<fp16, [1024, 1024, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(835825792)))];
	string var_3675_pad_type_0 = const()[name = string("op_3675_pad_type_0"), val = string("valid")];
	int32 var_3675_groups_0 = const()[name = string("op_3675_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_3675_strides_0 = const()[name = string("op_3675_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_3675_pad_0 = const()[name = string("op_3675_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_3675_dilations_0 = const()[name = string("op_3675_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1, 1024, 1]> var_3660 = transpose(perm = var_3659, x = out_19)[name = string("transpose_58")];
	tensor<fp16, [1, 1024, 1]> var_3675 = conv(dilations = var_3675_dilations_0, groups = var_3675_groups_0, pad = var_3675_pad_0, pad_type = var_3675_pad_type_0, strides = var_3675_strides_0, weight = squeeze_4, x = var_3660)[name = string("op_3675")];
	tensor<int32, [3]> var_3679 = const()[name = string("op_3679"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> op_out_25 = transpose(perm = var_3679, x = var_3675)[name = string("transpose_57")];
	tensor<fp16, [1, 1, 1024]> x_71_cast_fp16 = add(x = x_65_cast_fp16, y = op_out_25)[name = string("x_71_cast_fp16")];
	fp16 const_45_promoted_to_fp16 = const()[name = string("const_45_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_3683_cast_fp16 = mul(x = x_71_cast_fp16, y = const_45_promoted_to_fp16)[name = string("op_3683_cast_fp16")];
	int32 var_3685 = const()[name = string("op_3685"), val = int32(-1)];
	bool input_195_interleave_0 = const()[name = string("input_195_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_195_cast_fp16 = concat(axis = var_3685, interleave = input_195_interleave_0, values = (x_71_cast_fp16, var_3683_cast_fp16))[name = string("input_195_cast_fp16")];
	tensor<int32, [1]> normed_95_axes_0 = const()[name = string("normed_95_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3691_to_fp16 = const()[name = string("op_3691_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_95_cast_fp16 = layer_norm(axes = normed_95_axes_0, epsilon = var_3691_to_fp16, x = input_195_cast_fp16)[name = string("normed_95_cast_fp16")];
	tensor<int32, [2]> var_3694_split_sizes_0 = const()[name = string("op_3694_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_3694_axis_0 = const()[name = string("op_3694_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_3694_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_3694_cast_fp16_1 = split(axis = var_3694_axis_0, split_sizes = var_3694_split_sizes_0, x = normed_95_cast_fp16)[name = string("op_3694_cast_fp16")];
	tensor<fp16, [1024]> layers_12_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_12_ffn_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(837923008)))];
	tensor<fp16, [1, 1, 1024]> normed_97_cast_fp16 = mul(x = var_3694_cast_fp16_0, y = layers_12_ffn_norm_weight_promoted_to_fp16)[name = string("normed_97_cast_fp16")];
	tensor<int32, [3]> var_3700 = const()[name = string("op_3700"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_3703_axes_0 = const()[name = string("op_3703_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_3701_cast_fp16 = transpose(perm = var_3700, x = normed_97_cast_fp16)[name = string("transpose_56")];
	tensor<fp16, [1, 1024, 1, 1]> var_3703_cast_fp16 = expand_dims(axes = var_3703_axes_0, x = var_3701_cast_fp16)[name = string("op_3703_cast_fp16")];
	string input_199_pad_type_0 = const()[name = string("input_199_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> input_199_strides_0 = const()[name = string("input_199_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> input_199_pad_0 = const()[name = string("input_199_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> input_199_dilations_0 = const()[name = string("input_199_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 input_199_groups_0 = const()[name = string("input_199_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> input_199 = conv(dilations = input_199_dilations_0, groups = input_199_groups_0, pad = input_199_pad_0, pad_type = input_199_pad_type_0, strides = input_199_strides_0, weight = layers_12_feed_forward_w1_weight, x = var_3703_cast_fp16)[name = string("input_199")];
	string b_25_pad_type_0 = const()[name = string("b_25_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> b_25_strides_0 = const()[name = string("b_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> b_25_pad_0 = const()[name = string("b_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> b_25_dilations_0 = const()[name = string("b_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 b_25_groups_0 = const()[name = string("b_25_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> b_25 = conv(dilations = b_25_dilations_0, groups = b_25_groups_0, pad = b_25_pad_0, pad_type = b_25_pad_type_0, strides = b_25_strides_0, weight = layers_12_feed_forward_w3_weight, x = var_3703_cast_fp16)[name = string("b_25")];
	tensor<fp16, [1, 4608, 1, 1]> var_3731 = silu(x = input_199)[name = string("op_3731")];
	tensor<fp16, [1, 4608, 1, 1]> input_201 = mul(x = var_3731, y = b_25)[name = string("input_201")];
	string mlp_49_pad_type_0 = const()[name = string("mlp_49_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_49_strides_0 = const()[name = string("mlp_49_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_49_pad_0 = const()[name = string("mlp_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_49_dilations_0 = const()[name = string("mlp_49_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_49_groups_0 = const()[name = string("mlp_49_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> mlp_49 = conv(dilations = mlp_49_dilations_0, groups = mlp_49_groups_0, pad = mlp_49_pad_0, pad_type = mlp_49_pad_type_0, strides = mlp_49_strides_0, weight = layers_12_feed_forward_w2_weight, x = input_201)[name = string("mlp_49")];
	tensor<int32, [1]> var_3745_axes_0 = const()[name = string("op_3745_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_3745 = squeeze(axes = var_3745_axes_0, x = mlp_49)[name = string("op_3745")];
	tensor<int32, [3]> var_3749 = const()[name = string("op_3749"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> mlp_51 = transpose(perm = var_3749, x = var_3745)[name = string("transpose_55")];
	tensor<fp16, [1, 1, 1024]> x_73_cast_fp16 = add(x = x_71_cast_fp16, y = mlp_51)[name = string("x_73_cast_fp16")];
	fp16 const_46_promoted_to_fp16 = const()[name = string("const_46_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_3753_cast_fp16 = mul(x = x_73_cast_fp16, y = const_46_promoted_to_fp16)[name = string("op_3753_cast_fp16")];
	int32 var_3755 = const()[name = string("op_3755"), val = int32(-1)];
	bool input_203_interleave_0 = const()[name = string("input_203_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_203_cast_fp16 = concat(axis = var_3755, interleave = input_203_interleave_0, values = (x_73_cast_fp16, var_3753_cast_fp16))[name = string("input_203_cast_fp16")];
	tensor<int32, [1]> normed_99_axes_0 = const()[name = string("normed_99_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3761_to_fp16 = const()[name = string("op_3761_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_99_cast_fp16 = layer_norm(axes = normed_99_axes_0, epsilon = var_3761_to_fp16, x = input_203_cast_fp16)[name = string("normed_99_cast_fp16")];
	tensor<int32, [2]> var_3764_split_sizes_0 = const()[name = string("op_3764_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_3764_axis_0 = const()[name = string("op_3764_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_3764_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_3764_cast_fp16_1 = split(axis = var_3764_axis_0, split_sizes = var_3764_split_sizes_0, x = normed_99_cast_fp16)[name = string("op_3764_cast_fp16")];
	tensor<fp16, [1024]> layers_13_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_13_operator_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(837925120)))];
	tensor<fp16, [1, 1, 1024]> hidden_states_27_cast_fp16 = mul(x = var_3764_cast_fp16_0, y = layers_13_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_27_cast_fp16")];
	tensor<int32, [3]> var_3770 = const()[name = string("op_3770"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_3773_axes_0 = const()[name = string("op_3773_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_3771_cast_fp16 = transpose(perm = var_3770, x = hidden_states_27_cast_fp16)[name = string("transpose_54")];
	tensor<fp16, [1, 1024, 1, 1]> var_3773_cast_fp16 = expand_dims(axes = var_3773_axes_0, x = var_3771_cast_fp16)[name = string("op_3773_cast_fp16")];
	string BCx_17_pad_type_0 = const()[name = string("BCx_17_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> BCx_17_strides_0 = const()[name = string("BCx_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> BCx_17_pad_0 = const()[name = string("BCx_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> BCx_17_dilations_0 = const()[name = string("BCx_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 BCx_17_groups_0 = const()[name = string("BCx_17_groups_0"), val = int32(1)];
	tensor<fp16, [1, 3072, 1, 1]> BCx_17 = conv(dilations = BCx_17_dilations_0, groups = BCx_17_groups_0, pad = BCx_17_pad_0, pad_type = BCx_17_pad_type_0, strides = BCx_17_strides_0, weight = layers_13_conv_in_proj_weight, x = var_3773_cast_fp16)[name = string("BCx_17")];
	tensor<int32, [3]> var_3790_split_sizes_0 = const()[name = string("op_3790_split_sizes_0"), val = tensor<int32, [3]>([1024, 1024, 1024])];
	int32 var_3790_axis_0 = const()[name = string("op_3790_axis_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> var_3790_0, tensor<fp16, [1, 1024, 1, 1]> var_3790_1, tensor<fp16, [1, 1024, 1, 1]> var_3790_2 = split(axis = var_3790_axis_0, split_sizes = var_3790_split_sizes_0, x = BCx_17)[name = string("op_3790")];
	tensor<fp16, [1, 1024, 1, 1]> Bx_17 = mul(x = var_3790_0, y = var_3790_2)[name = string("Bx_17")];
	tensor<int32, [3]> var_3796_begin_0 = const()[name = string("op_3796_begin_0"), val = tensor<int32, [3]>([8, 0, 0])];
	tensor<int32, [3]> var_3796_end_0 = const()[name = string("op_3796_end_0"), val = tensor<int32, [3]>([9, 1024, 3])];
	tensor<bool, [3]> var_3796_end_mask_0 = const()[name = string("op_3796_end_mask_0"), val = tensor<bool, [3]>([false, true, true])];
	tensor<bool, [3]> var_3796_squeeze_mask_0 = const()[name = string("op_3796_squeeze_mask_0"), val = tensor<bool, [3]>([true, false, false])];
	tensor<fp16, [1024, 3]> var_3796_cast_fp16 = slice_by_index(begin = var_3796_begin_0, end = var_3796_end_0, end_mask = var_3796_end_mask_0, squeeze_mask = var_3796_squeeze_mask_0, x = conv_state_in)[name = string("op_3796_cast_fp16")];
	tensor<int32, [1]> var_3798_axes_0 = const()[name = string("op_3798_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 1024, 3]> var_3798_cast_fp16 = expand_dims(axes = var_3798_axes_0, x = var_3796_cast_fp16)[name = string("op_3798_cast_fp16")];
	tensor<int32, [1]> slot_17_axes_0 = const()[name = string("slot_17_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1, 3]> slot_17_cast_fp16 = expand_dims(axes = slot_17_axes_0, x = var_3798_cast_fp16)[name = string("slot_17_cast_fp16")];
	tensor<int32, [4]> live_tail_17_begin_0 = const()[name = string("live_tail_17_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1])];
	tensor<int32, [4]> live_tail_17_end_0 = const()[name = string("live_tail_17_end_0"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
	tensor<bool, [4]> live_tail_17_end_mask_0 = const()[name = string("live_tail_17_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
	tensor<fp16, [1, 1024, 1, 2]> live_tail_17_cast_fp16 = slice_by_index(begin = live_tail_17_begin_0, end = live_tail_17_end_0, end_mask = live_tail_17_end_mask_0, x = slot_17_cast_fp16)[name = string("live_tail_17_cast_fp16")];
	int32 var_3807 = const()[name = string("op_3807"), val = int32(-1)];
	bool new_state_17_interleave_0 = const()[name = string("new_state_17_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1024, 1, 3]> new_state_17_cast_fp16 = concat(axis = var_3807, interleave = new_state_17_interleave_0, values = (live_tail_17_cast_fp16, Bx_17))[name = string("new_state_17_cast_fp16")];
	tensor<int32, [1]> var_3810_axes_0 = const()[name = string("op_3810_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1024, 1, 3]> var_3810_cast_fp16 = squeeze(axes = var_3810_axes_0, x = new_state_17_cast_fp16)[name = string("op_3810_cast_fp16")];
	tensor<int32, [1]> var_3812_axes_0 = const()[name = string("op_3812_axes_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1024, 3]> var_3812_cast_fp16 = squeeze(axes = var_3812_axes_0, x = var_3810_cast_fp16)[name = string("op_3812_cast_fp16")];
	string conv_out_17_pad_type_0 = const()[name = string("conv_out_17_pad_type_0"), val = string("valid")];
	int32 conv_out_17_groups_0 = const()[name = string("conv_out_17_groups_0"), val = int32(1024)];
	tensor<int32, [2]> conv_out_17_strides_0 = const()[name = string("conv_out_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> conv_out_17_pad_0 = const()[name = string("conv_out_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> conv_out_17_dilations_0 = const()[name = string("conv_out_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<fp16, [1024, 1, 1, 3]> layers_13_conv_conv_weight_promoted_to_fp16 = const()[name = string("layers_13_conv_conv_weight_promoted_to_fp16"), val = tensor<fp16, [1024, 1, 1, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(837927232)))];
	tensor<fp16, [1, 1024, 1, 1]> conv_out_17_cast_fp16 = conv(dilations = conv_out_17_dilations_0, groups = conv_out_17_groups_0, pad = conv_out_17_pad_0, pad_type = conv_out_17_pad_type_0, strides = conv_out_17_strides_0, weight = layers_13_conv_conv_weight_promoted_to_fp16, x = new_state_17_cast_fp16)[name = string("conv_out_17_cast_fp16")];
	tensor<fp16, [1, 1024, 1, 1]> input_207_cast_fp16 = mul(x = var_3790_1, y = conv_out_17_cast_fp16)[name = string("input_207_cast_fp16")];
	string y_17_pad_type_0 = const()[name = string("y_17_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> y_17_strides_0 = const()[name = string("y_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> y_17_pad_0 = const()[name = string("y_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> y_17_dilations_0 = const()[name = string("y_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 y_17_groups_0 = const()[name = string("y_17_groups_0"), val = int32(1)];
	tensor<fp16, [1024, 1024, 1, 1]> layers_13_conv_out_proj_weight_promoted_to_fp16 = const()[name = string("layers_13_conv_out_proj_weight_promoted_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(837933440)))];
	tensor<fp16, [1, 1024, 1, 1]> y_17_cast_fp16 = conv(dilations = y_17_dilations_0, groups = y_17_groups_0, pad = y_17_pad_0, pad_type = y_17_pad_type_0, strides = y_17_strides_0, weight = layers_13_conv_out_proj_weight_promoted_to_fp16, x = input_207_cast_fp16)[name = string("y_17_cast_fp16")];
	tensor<int32, [1]> var_3838_axes_0 = const()[name = string("op_3838_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_3838_cast_fp16 = squeeze(axes = var_3838_axes_0, x = y_17_cast_fp16)[name = string("op_3838_cast_fp16")];
	tensor<int32, [3]> var_3842 = const()[name = string("op_3842"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> op_out_27_cast_fp16 = transpose(perm = var_3842, x = var_3838_cast_fp16)[name = string("transpose_53")];
	tensor<fp16, [1, 1, 1024]> x_75_cast_fp16 = add(x = x_73_cast_fp16, y = op_out_27_cast_fp16)[name = string("x_75_cast_fp16")];
	fp16 const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_3846_cast_fp16 = mul(x = x_75_cast_fp16, y = const_47_promoted_to_fp16)[name = string("op_3846_cast_fp16")];
	int32 var_3848 = const()[name = string("op_3848"), val = int32(-1)];
	bool input_209_interleave_0 = const()[name = string("input_209_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_209_cast_fp16 = concat(axis = var_3848, interleave = input_209_interleave_0, values = (x_75_cast_fp16, var_3846_cast_fp16))[name = string("input_209_cast_fp16")];
	tensor<int32, [1]> normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3854_to_fp16 = const()[name = string("op_3854_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_3854_to_fp16, x = input_209_cast_fp16)[name = string("normed_101_cast_fp16")];
	tensor<int32, [2]> var_3857_split_sizes_0 = const()[name = string("op_3857_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_3857_axis_0 = const()[name = string("op_3857_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_3857_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_3857_cast_fp16_1 = split(axis = var_3857_axis_0, split_sizes = var_3857_split_sizes_0, x = normed_101_cast_fp16)[name = string("op_3857_cast_fp16")];
	tensor<fp16, [1024]> layers_13_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_13_ffn_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840030656)))];
	tensor<fp16, [1, 1, 1024]> normed_103_cast_fp16 = mul(x = var_3857_cast_fp16_0, y = layers_13_ffn_norm_weight_promoted_to_fp16)[name = string("normed_103_cast_fp16")];
	tensor<int32, [3]> var_3863 = const()[name = string("op_3863"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_3866_axes_0 = const()[name = string("op_3866_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_3864_cast_fp16 = transpose(perm = var_3863, x = normed_103_cast_fp16)[name = string("transpose_52")];
	tensor<fp16, [1, 1024, 1, 1]> var_3866_cast_fp16 = expand_dims(axes = var_3866_axes_0, x = var_3864_cast_fp16)[name = string("op_3866_cast_fp16")];
	string input_213_pad_type_0 = const()[name = string("input_213_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> input_213_strides_0 = const()[name = string("input_213_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> input_213_pad_0 = const()[name = string("input_213_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> input_213_dilations_0 = const()[name = string("input_213_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 input_213_groups_0 = const()[name = string("input_213_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> input_213 = conv(dilations = input_213_dilations_0, groups = input_213_groups_0, pad = input_213_pad_0, pad_type = input_213_pad_type_0, strides = input_213_strides_0, weight = layers_13_feed_forward_w1_weight, x = var_3866_cast_fp16)[name = string("input_213")];
	string b_27_pad_type_0 = const()[name = string("b_27_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> b_27_strides_0 = const()[name = string("b_27_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> b_27_pad_0 = const()[name = string("b_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> b_27_dilations_0 = const()[name = string("b_27_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 b_27_groups_0 = const()[name = string("b_27_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> b_27 = conv(dilations = b_27_dilations_0, groups = b_27_groups_0, pad = b_27_pad_0, pad_type = b_27_pad_type_0, strides = b_27_strides_0, weight = layers_13_feed_forward_w3_weight, x = var_3866_cast_fp16)[name = string("b_27")];
	tensor<fp16, [1, 4608, 1, 1]> var_3894 = silu(x = input_213)[name = string("op_3894")];
	tensor<fp16, [1, 4608, 1, 1]> input_215 = mul(x = var_3894, y = b_27)[name = string("input_215")];
	string mlp_53_pad_type_0 = const()[name = string("mlp_53_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_53_strides_0 = const()[name = string("mlp_53_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_53_pad_0 = const()[name = string("mlp_53_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_53_dilations_0 = const()[name = string("mlp_53_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_53_groups_0 = const()[name = string("mlp_53_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> mlp_53 = conv(dilations = mlp_53_dilations_0, groups = mlp_53_groups_0, pad = mlp_53_pad_0, pad_type = mlp_53_pad_type_0, strides = mlp_53_strides_0, weight = layers_13_feed_forward_w2_weight, x = input_215)[name = string("mlp_53")];
	tensor<int32, [1]> var_3908_axes_0 = const()[name = string("op_3908_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_3908 = squeeze(axes = var_3908_axes_0, x = mlp_53)[name = string("op_3908")];
	tensor<int32, [3]> var_3912 = const()[name = string("op_3912"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> mlp_55 = transpose(perm = var_3912, x = var_3908)[name = string("transpose_51")];
	tensor<fp16, [1, 1, 1024]> x_77_cast_fp16 = add(x = x_75_cast_fp16, y = mlp_55)[name = string("x_77_cast_fp16")];
	fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_3916_cast_fp16 = mul(x = x_77_cast_fp16, y = const_48_promoted_to_fp16)[name = string("op_3916_cast_fp16")];
	int32 var_3918 = const()[name = string("op_3918"), val = int32(-1)];
	bool input_217_interleave_0 = const()[name = string("input_217_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_217_cast_fp16 = concat(axis = var_3918, interleave = input_217_interleave_0, values = (x_77_cast_fp16, var_3916_cast_fp16))[name = string("input_217_cast_fp16")];
	tensor<int32, [1]> normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3924_to_fp16 = const()[name = string("op_3924_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_3924_to_fp16, x = input_217_cast_fp16)[name = string("normed_105_cast_fp16")];
	tensor<int32, [2]> var_3927_split_sizes_0 = const()[name = string("op_3927_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_3927_axis_0 = const()[name = string("op_3927_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_3927_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_3927_cast_fp16_1 = split(axis = var_3927_axis_0, split_sizes = var_3927_split_sizes_0, x = normed_105_cast_fp16)[name = string("op_3927_cast_fp16")];
	tensor<fp16, [1024]> layers_14_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_14_operator_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840032768)))];
	tensor<fp16, [1, 1, 1024]> hidden_states_29_cast_fp16 = mul(x = var_3927_cast_fp16_0, y = layers_14_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_29_cast_fp16")];
	tensor<int32, [3]> var_3933 = const()[name = string("op_3933"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_3936_axes_0 = const()[name = string("op_3936_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_3934_cast_fp16 = transpose(perm = var_3933, x = hidden_states_29_cast_fp16)[name = string("transpose_50")];
	tensor<fp16, [1, 1024, 1, 1]> var_3936_cast_fp16 = expand_dims(axes = var_3936_axes_0, x = var_3934_cast_fp16)[name = string("op_3936_cast_fp16")];
	string var_3952_pad_type_0 = const()[name = string("op_3952_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_3952_strides_0 = const()[name = string("op_3952_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_3952_pad_0 = const()[name = string("op_3952_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_3952_dilations_0 = const()[name = string("op_3952_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_3952_groups_0 = const()[name = string("op_3952_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> var_3952 = conv(dilations = var_3952_dilations_0, groups = var_3952_groups_0, pad = var_3952_pad_0, pad_type = var_3952_pad_type_0, strides = var_3952_strides_0, weight = layers_14_self_attn_q_proj_weight, x = var_3936_cast_fp16)[name = string("op_3952")];
	tensor<int32, [4]> var_3957 = const()[name = string("op_3957"), val = tensor<int32, [4]>([1, 16, 64, 1])];
	tensor<fp16, [1, 16, 64, 1]> var_3958 = reshape(shape = var_3957, x = var_3952)[name = string("op_3958")];
	tensor<int32, [4]> var_3963 = const()[name = string("op_3963"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	string var_3980_pad_type_0 = const()[name = string("op_3980_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_3980_strides_0 = const()[name = string("op_3980_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_3980_pad_0 = const()[name = string("op_3980_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_3980_dilations_0 = const()[name = string("op_3980_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_3980_groups_0 = const()[name = string("op_3980_groups_0"), val = int32(1)];
	tensor<fp16, [1, 512, 1, 1]> var_3980 = conv(dilations = var_3980_dilations_0, groups = var_3980_groups_0, pad = var_3980_pad_0, pad_type = var_3980_pad_type_0, strides = var_3980_strides_0, weight = layers_14_self_attn_k_proj_weight, x = var_3936_cast_fp16)[name = string("op_3980")];
	tensor<int32, [4]> var_3985 = const()[name = string("op_3985"), val = tensor<int32, [4]>([1, 8, 64, 1])];
	tensor<fp16, [1, 8, 64, 1]> var_3986 = reshape(shape = var_3985, x = var_3980)[name = string("op_3986")];
	tensor<int32, [4]> var_3991 = const()[name = string("op_3991"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	string var_4008_pad_type_0 = const()[name = string("op_4008_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_4008_strides_0 = const()[name = string("op_4008_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_4008_pad_0 = const()[name = string("op_4008_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_4008_dilations_0 = const()[name = string("op_4008_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_4008_groups_0 = const()[name = string("op_4008_groups_0"), val = int32(1)];
	tensor<fp16, [1, 512, 1, 1]> var_4008 = conv(dilations = var_4008_dilations_0, groups = var_4008_groups_0, pad = var_4008_pad_0, pad_type = var_4008_pad_type_0, strides = var_4008_strides_0, weight = layers_14_self_attn_v_proj_weight, x = var_3936_cast_fp16)[name = string("op_4008")];
	tensor<int32, [4]> var_4013 = const()[name = string("op_4013"), val = tensor<int32, [4]>([1, 8, 64, 1])];
	tensor<fp16, [1, 8, 64, 1]> var_4014 = reshape(shape = var_4013, x = var_4008)[name = string("op_4014")];
	tensor<int32, [4]> var_4019 = const()[name = string("op_4019"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	fp16 const_49_promoted = const()[name = string("const_49_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 16, 1, 64]> var_3964 = transpose(perm = var_3963, x = var_3958)[name = string("transpose_49")];
	tensor<fp16, [1, 16, 1, 64]> var_4026 = mul(x = var_3964, y = const_49_promoted)[name = string("op_4026")];
	int32 var_4028 = const()[name = string("op_4028"), val = int32(-1)];
	bool input_221_interleave_0 = const()[name = string("input_221_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 16, 1, 128]> input_221 = concat(axis = var_4028, interleave = input_221_interleave_0, values = (var_3964, var_4026))[name = string("input_221")];
	tensor<int32, [1]> normed_107_axes_0 = const()[name = string("normed_107_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4034_to_fp16 = const()[name = string("op_4034_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 16, 1, 128]> normed_107_cast_fp16 = layer_norm(axes = normed_107_axes_0, epsilon = var_4034_to_fp16, x = input_221)[name = string("normed_107_cast_fp16")];
	tensor<int32, [2]> var_4037_split_sizes_0 = const()[name = string("op_4037_split_sizes_0"), val = tensor<int32, [2]>([64, 64])];
	int32 var_4037_axis_0 = const()[name = string("op_4037_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 16, 1, 64]> var_4037_0, tensor<fp16, [1, 16, 1, 64]> var_4037_1 = split(axis = var_4037_axis_0, split_sizes = var_4037_split_sizes_0, x = normed_107_cast_fp16)[name = string("op_4037")];
	tensor<fp16, [1, 16, 1, 64]> q_21 = mul(x = var_4037_0, y = layers_14_self_attn_q_layernorm_weight)[name = string("q_21")];
	fp16 const_50_promoted = const()[name = string("const_50_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 64]> var_3992 = transpose(perm = var_3991, x = var_3986)[name = string("transpose_48")];
	tensor<fp16, [1, 8, 1, 64]> var_4040 = mul(x = var_3992, y = const_50_promoted)[name = string("op_4040")];
	int32 var_4042 = const()[name = string("op_4042"), val = int32(-1)];
	bool input_223_interleave_0 = const()[name = string("input_223_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 128]> input_223 = concat(axis = var_4042, interleave = input_223_interleave_0, values = (var_3992, var_4040))[name = string("input_223")];
	tensor<int32, [1]> normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4048_to_fp16 = const()[name = string("op_4048_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 8, 1, 128]> normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_4048_to_fp16, x = input_223)[name = string("normed_109_cast_fp16")];
	tensor<int32, [2]> var_4051_split_sizes_0 = const()[name = string("op_4051_split_sizes_0"), val = tensor<int32, [2]>([64, 64])];
	int32 var_4051_axis_0 = const()[name = string("op_4051_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 64]> var_4051_0, tensor<fp16, [1, 8, 1, 64]> var_4051_1 = split(axis = var_4051_axis_0, split_sizes = var_4051_split_sizes_0, x = normed_109_cast_fp16)[name = string("op_4051")];
	tensor<fp16, [1, 8, 1, 64]> k_21 = mul(x = var_4051_0, y = layers_14_self_attn_k_layernorm_weight)[name = string("k_21")];
	tensor<fp16, [1, 16, 1, 64]> var_4054 = mul(x = q_21, y = cos)[name = string("op_4054")];
	tensor<int32, [2]> var_4055_split_sizes_0 = const()[name = string("op_4055_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
	int32 var_4055_axis_0 = const()[name = string("op_4055_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 16, 1, 32]> var_4055_0, tensor<fp16, [1, 16, 1, 32]> var_4055_1 = split(axis = var_4055_axis_0, split_sizes = var_4055_split_sizes_0, x = q_21)[name = string("op_4055")];
	fp16 const_51_promoted = const()[name = string("const_51_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 16, 1, 32]> var_4057 = mul(x = var_4055_1, y = const_51_promoted)[name = string("op_4057")];
	int32 var_4059 = const()[name = string("op_4059"), val = int32(-1)];
	bool var_4060_interleave_0 = const()[name = string("op_4060_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 16, 1, 64]> var_4060 = concat(axis = var_4059, interleave = var_4060_interleave_0, values = (var_4057, var_4055_0))[name = string("op_4060")];
	tensor<fp16, [1, 16, 1, 64]> var_4061 = mul(x = var_4060, y = sin)[name = string("op_4061")];
	tensor<fp16, [1, 16, 1, 64]> q = add(x = var_4054, y = var_4061)[name = string("q")];
	tensor<fp16, [1, 8, 1, 64]> var_4064 = mul(x = k_21, y = cos)[name = string("op_4064")];
	tensor<int32, [2]> var_4065_split_sizes_0 = const()[name = string("op_4065_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
	int32 var_4065_axis_0 = const()[name = string("op_4065_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 32]> var_4065_0, tensor<fp16, [1, 8, 1, 32]> var_4065_1 = split(axis = var_4065_axis_0, split_sizes = var_4065_split_sizes_0, x = k_21)[name = string("op_4065")];
	fp16 const_52_promoted = const()[name = string("const_52_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 32]> var_4067 = mul(x = var_4065_1, y = const_52_promoted)[name = string("op_4067")];
	int32 var_4069 = const()[name = string("op_4069"), val = int32(-1)];
	bool var_4070_interleave_0 = const()[name = string("op_4070_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 64]> var_4070 = concat(axis = var_4069, interleave = var_4070_interleave_0, values = (var_4067, var_4065_0))[name = string("op_4070")];
	tensor<fp16, [1, 8, 1, 64]> var_4071 = mul(x = var_4070, y = sin)[name = string("op_4071")];
	tensor<fp16, [1, 8, 1, 64]> k = add(x = var_4064, y = var_4071)[name = string("k")];
	tensor<int32, [4]> var_4076_begin_0 = const()[name = string("op_4076_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
	tensor<int32, [4]> var_4076_end_0 = const()[name = string("op_4076_end_0"), val = tensor<int32, [4]>([6, 8, 2048, 64])];
	tensor<bool, [4]> var_4076_end_mask_0 = const()[name = string("op_4076_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> var_4076_squeeze_mask_0 = const()[name = string("op_4076_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [8, 2048, 64]> var_4076_cast_fp16 = slice_by_index(begin = var_4076_begin_0, end = var_4076_end_0, end_mask = var_4076_end_mask_0, squeeze_mask = var_4076_squeeze_mask_0, x = coreml_update_state_21)[name = string("op_4076_cast_fp16")];
	tensor<int32, [1]> K_cache_axes_0 = const()[name = string("K_cache_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 8, 2048, 64]> K_cache_cast_fp16 = expand_dims(axes = K_cache_axes_0, x = var_4076_cast_fp16)[name = string("K_cache_cast_fp16")];
	tensor<int32, [4]> var_4081_begin_0 = const()[name = string("op_4081_begin_0"), val = tensor<int32, [4]>([11, 0, 0, 0])];
	tensor<int32, [4]> var_4081_end_0 = const()[name = string("op_4081_end_0"), val = tensor<int32, [4]>([12, 8, 2048, 64])];
	tensor<bool, [4]> var_4081_end_mask_0 = const()[name = string("op_4081_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> var_4081_squeeze_mask_0 = const()[name = string("op_4081_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [8, 2048, 64]> var_4081_cast_fp16 = slice_by_index(begin = var_4081_begin_0, end = var_4081_end_0, end_mask = var_4081_end_mask_0, squeeze_mask = var_4081_squeeze_mask_0, x = coreml_update_state_21)[name = string("op_4081_cast_fp16")];
	tensor<int32, [1]> V_cache_axes_0 = const()[name = string("V_cache_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 8, 2048, 64]> V_cache_cast_fp16 = expand_dims(axes = V_cache_axes_0, x = var_4081_cast_fp16)[name = string("V_cache_cast_fp16")];
	tensor<int32, [4]> k_b_reps_0 = const()[name = string("k_b_reps_0"), val = tensor<int32, [4]>([1, 1, 2048, 1])];
	tensor<fp16, [1, 8, 2048, 64]> k_b = tile(reps = k_b_reps_0, x = k)[name = string("k_b")];
	tensor<int32, [4]> v_b_reps_0 = const()[name = string("v_b_reps_0"), val = tensor<int32, [4]>([1, 1, 2048, 1])];
	tensor<fp16, [1, 8, 1, 64]> var_4020 = transpose(perm = var_4019, x = var_4014)[name = string("transpose_47")];
	tensor<fp16, [1, 8, 2048, 64]> v_b = tile(reps = v_b_reps_0, x = var_4020)[name = string("v_b")];
	tensor<fp16, [1, 8, 2048, 64]> var_4089_cast_fp16 = mul(x = K_cache_cast_fp16, y = var_1132_cast_fp16)[name = string("op_4089_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> var_4090_cast_fp16 = mul(x = k_b, y = update_mask)[name = string("op_4090_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> K_new_cast_fp16 = add(x = var_4089_cast_fp16, y = var_4090_cast_fp16)[name = string("K_new_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> var_4096_cast_fp16 = mul(x = V_cache_cast_fp16, y = var_1132_cast_fp16)[name = string("op_4096_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> var_4097_cast_fp16 = mul(x = v_b, y = update_mask)[name = string("op_4097_cast_fp16")];
	tensor<fp16, [1, 8, 2048, 64]> V_new_cast_fp16 = add(x = var_4096_cast_fp16, y = var_4097_cast_fp16)[name = string("V_new_cast_fp16")];
	tensor<int32, [1]> var_4101_axes_0 = const()[name = string("op_4101_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [8, 2048, 64]> var_4101_cast_fp16 = squeeze(axes = var_4101_axes_0, x = K_new_cast_fp16)[name = string("op_4101_cast_fp16")];
	tensor<int32, [4]> concat_40 = const()[name = string("concat_40"), val = tensor<int32, [4]>([5, 0, 0, 0])];
	tensor<int32, [4]> concat_41 = const()[name = string("concat_41"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [4]> kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [12, 8, 2048, 64]> kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_40, begin_mask = kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_41, end_mask = kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_11_stride_0, update = var_4101_cast_fp16, x = coreml_update_state_21)[name = string("kv_cache_0_internal_tensor_assign_11_cast_fp16")];
	write_state(data = kv_cache_0_internal_tensor_assign_11_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_22_write_state")];
	tensor<fp16, [12, 8, 2048, 64]> coreml_update_state_22 = read_state(input = kv_cache_0)[name = string("coreml_update_state_22")];
	tensor<int32, [1]> var_4108_axes_0 = const()[name = string("op_4108_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [8, 2048, 64]> var_4108_cast_fp16 = squeeze(axes = var_4108_axes_0, x = V_new_cast_fp16)[name = string("op_4108_cast_fp16")];
	tensor<int32, [4]> concat_42 = const()[name = string("concat_42"), val = tensor<int32, [4]>([11, 0, 0, 0])];
	tensor<int32, [4]> concat_43 = const()[name = string("concat_43"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [4]> kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [12, 8, 2048, 64]> kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_42, begin_mask = kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_43, end_mask = kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_12_stride_0, update = var_4108_cast_fp16, x = coreml_update_state_22)[name = string("kv_cache_0_internal_tensor_assign_12_cast_fp16")];
	write_state(data = kv_cache_0_internal_tensor_assign_12_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_23_write_state")];
	tensor<int32, [4]> transpose_20_perm_0 = const()[name = string("transpose_20_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	tensor<int32, [4]> tile_10_reps_0 = const()[name = string("tile_10_reps_0"), val = tensor<int32, [4]>([2, 1, 1, 1])];
	tensor<fp16, [8, 1, 2048, 64]> transpose_20_cast_fp16 = transpose(perm = transpose_20_perm_0, x = K_new_cast_fp16)[name = string("transpose_46")];
	tensor<fp16, [16, 1, 2048, 64]> tile_10_cast_fp16 = tile(reps = tile_10_reps_0, x = transpose_20_cast_fp16)[name = string("tile_10_cast_fp16")];
	tensor<int32, [5]> concat_44 = const()[name = string("concat_44"), val = tensor<int32, [5]>([2, 8, 1, 2048, 64])];
	tensor<fp16, [2, 8, 1, 2048, 64]> reshape_20_cast_fp16 = reshape(shape = concat_44, x = tile_10_cast_fp16)[name = string("reshape_20_cast_fp16")];
	tensor<int32, [5]> transpose_21_perm_0 = const()[name = string("transpose_21_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])];
	tensor<int32, [4]> concat_45 = const()[name = string("concat_45"), val = tensor<int32, [4]>([-1, 1, 2048, 64])];
	tensor<fp16, [8, 2, 1, 2048, 64]> transpose_21_cast_fp16 = transpose(perm = transpose_21_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_45")];
	tensor<fp16, [16, 1, 2048, 64]> reshape_21_cast_fp16 = reshape(shape = concat_45, x = transpose_21_cast_fp16)[name = string("reshape_21_cast_fp16")];
	tensor<int32, [4]> transpose_29_perm_0 = const()[name = string("transpose_29_perm_0"), val = tensor<int32, [4]>([1, 0, 3, 2])];
	tensor<int32, [4]> transpose_22_perm_0 = const()[name = string("transpose_22_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	tensor<int32, [4]> tile_11_reps_0 = const()[name = string("tile_11_reps_0"), val = tensor<int32, [4]>([2, 1, 1, 1])];
	tensor<fp16, [8, 1, 2048, 64]> transpose_22_cast_fp16 = transpose(perm = transpose_22_perm_0, x = V_new_cast_fp16)[name = string("transpose_43")];
	tensor<fp16, [16, 1, 2048, 64]> tile_11_cast_fp16 = tile(reps = tile_11_reps_0, x = transpose_22_cast_fp16)[name = string("tile_11_cast_fp16")];
	tensor<int32, [5]> concat_46 = const()[name = string("concat_46"), val = tensor<int32, [5]>([2, 8, 1, 2048, 64])];
	tensor<fp16, [2, 8, 1, 2048, 64]> reshape_22_cast_fp16 = reshape(shape = concat_46, x = tile_11_cast_fp16)[name = string("reshape_22_cast_fp16")];
	tensor<int32, [5]> transpose_23_perm_0 = const()[name = string("transpose_23_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])];
	tensor<int32, [4]> concat_47 = const()[name = string("concat_47"), val = tensor<int32, [4]>([-1, 1, 2048, 64])];
	tensor<fp16, [8, 2, 1, 2048, 64]> transpose_23_cast_fp16 = transpose(perm = transpose_23_perm_0, x = reshape_22_cast_fp16)[name = string("transpose_42")];
	tensor<fp16, [16, 1, 2048, 64]> reshape_23_cast_fp16 = reshape(shape = concat_47, x = transpose_23_cast_fp16)[name = string("reshape_23_cast_fp16")];
	tensor<int32, [4]> V_e_perm_0 = const()[name = string("V_e_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	bool var_4135_transpose_x_0 = const()[name = string("op_4135_transpose_x_0"), val = bool(false)];
	bool var_4135_transpose_y_0 = const()[name = string("op_4135_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 16, 64, 2048]> transpose_29_cast_fp16 = transpose(perm = transpose_29_perm_0, x = reshape_21_cast_fp16)[name = string("transpose_44")];
	tensor<fp16, [1, 16, 1, 2048]> var_4135_cast_fp16 = matmul(transpose_x = var_4135_transpose_x_0, transpose_y = var_4135_transpose_y_0, x = q, y = transpose_29_cast_fp16)[name = string("op_4135_cast_fp16")];
	fp16 var_4136_to_fp16 = const()[name = string("op_4136_to_fp16"), val = fp16(0x1p-3)];
	tensor<fp16, [1, 16, 1, 2048]> attn_31_cast_fp16 = mul(x = var_4135_cast_fp16, y = var_4136_to_fp16)[name = string("attn_31_cast_fp16")];
	tensor<fp16, [1, 16, 1, 2048]> attn_33_cast_fp16 = add(x = attn_31_cast_fp16, y = causal_mask)[name = string("attn_33_cast_fp16")];
	int32 var_4145 = const()[name = string("op_4145"), val = int32(-1)];
	tensor<fp16, [1, 16, 1, 2048]> var_4147_cast_fp16 = softmax(axis = var_4145, x = attn_33_cast_fp16)[name = string("op_4147_cast_fp16")];
	bool var_4163_transpose_x_0 = const()[name = string("op_4163_transpose_x_0"), val = bool(false)];
	bool var_4163_transpose_y_0 = const()[name = string("op_4163_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 16, 2048, 64]> V_e_cast_fp16 = transpose(perm = V_e_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_41")];
	tensor<fp16, [1, 16, 1, 64]> var_4163_cast_fp16 = matmul(transpose_x = var_4163_transpose_x_0, transpose_y = var_4163_transpose_y_0, x = var_4147_cast_fp16, y = V_e_cast_fp16)[name = string("op_4163_cast_fp16")];
	tensor<int32, [4]> var_4173 = const()[name = string("op_4173"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_4180 = const()[name = string("op_4180"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 16, 64]> var_4174 = transpose(perm = var_4173, x = var_4163_cast_fp16)[name = string("transpose_40")];
	tensor<fp16, [1, 1, 1024]> out = reshape(shape = var_4180, x = var_4174)[name = string("out")];
	tensor<int32, [3]> var_4185 = const()[name = string("op_4185"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1024, 1024, 1]> squeeze_5 = const()[name = string("squeeze_5"), val = tensor<fp16, [1024, 1024, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840034880)))];
	string var_4201_pad_type_0 = const()[name = string("op_4201_pad_type_0"), val = string("valid")];
	int32 var_4201_groups_0 = const()[name = string("op_4201_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_4201_strides_0 = const()[name = string("op_4201_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_4201_pad_0 = const()[name = string("op_4201_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_4201_dilations_0 = const()[name = string("op_4201_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1, 1024, 1]> var_4186 = transpose(perm = var_4185, x = out)[name = string("transpose_39")];
	tensor<fp16, [1, 1024, 1]> var_4201 = conv(dilations = var_4201_dilations_0, groups = var_4201_groups_0, pad = var_4201_pad_0, pad_type = var_4201_pad_type_0, strides = var_4201_strides_0, weight = squeeze_5, x = var_4186)[name = string("op_4201")];
	tensor<int32, [3]> var_4205 = const()[name = string("op_4205"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> op_out_29 = transpose(perm = var_4205, x = var_4201)[name = string("transpose_38")];
	tensor<fp16, [1, 1, 1024]> x_83_cast_fp16 = add(x = x_77_cast_fp16, y = op_out_29)[name = string("x_83_cast_fp16")];
	fp16 const_53_promoted_to_fp16 = const()[name = string("const_53_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_4209_cast_fp16 = mul(x = x_83_cast_fp16, y = const_53_promoted_to_fp16)[name = string("op_4209_cast_fp16")];
	int32 var_4211 = const()[name = string("op_4211"), val = int32(-1)];
	bool input_227_interleave_0 = const()[name = string("input_227_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_227_cast_fp16 = concat(axis = var_4211, interleave = input_227_interleave_0, values = (x_83_cast_fp16, var_4209_cast_fp16))[name = string("input_227_cast_fp16")];
	tensor<int32, [1]> normed_111_axes_0 = const()[name = string("normed_111_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4217_to_fp16 = const()[name = string("op_4217_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_111_cast_fp16 = layer_norm(axes = normed_111_axes_0, epsilon = var_4217_to_fp16, x = input_227_cast_fp16)[name = string("normed_111_cast_fp16")];
	tensor<int32, [2]> var_4220_split_sizes_0 = const()[name = string("op_4220_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_4220_axis_0 = const()[name = string("op_4220_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_4220_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_4220_cast_fp16_1 = split(axis = var_4220_axis_0, split_sizes = var_4220_split_sizes_0, x = normed_111_cast_fp16)[name = string("op_4220_cast_fp16")];
	tensor<fp16, [1024]> layers_14_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_14_ffn_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(842132096)))];
	tensor<fp16, [1, 1, 1024]> normed_113_cast_fp16 = mul(x = var_4220_cast_fp16_0, y = layers_14_ffn_norm_weight_promoted_to_fp16)[name = string("normed_113_cast_fp16")];
	tensor<int32, [3]> var_4226 = const()[name = string("op_4226"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_4229_axes_0 = const()[name = string("op_4229_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_4227_cast_fp16 = transpose(perm = var_4226, x = normed_113_cast_fp16)[name = string("transpose_37")];
	tensor<fp16, [1, 1024, 1, 1]> var_4229_cast_fp16 = expand_dims(axes = var_4229_axes_0, x = var_4227_cast_fp16)[name = string("op_4229_cast_fp16")];
	string input_231_pad_type_0 = const()[name = string("input_231_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> input_231_strides_0 = const()[name = string("input_231_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> input_231_pad_0 = const()[name = string("input_231_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> input_231_dilations_0 = const()[name = string("input_231_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 input_231_groups_0 = const()[name = string("input_231_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> input_231 = conv(dilations = input_231_dilations_0, groups = input_231_groups_0, pad = input_231_pad_0, pad_type = input_231_pad_type_0, strides = input_231_strides_0, weight = layers_14_feed_forward_w1_weight, x = var_4229_cast_fp16)[name = string("input_231")];
	string b_29_pad_type_0 = const()[name = string("b_29_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> b_29_strides_0 = const()[name = string("b_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> b_29_pad_0 = const()[name = string("b_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> b_29_dilations_0 = const()[name = string("b_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 b_29_groups_0 = const()[name = string("b_29_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> b_29 = conv(dilations = b_29_dilations_0, groups = b_29_groups_0, pad = b_29_pad_0, pad_type = b_29_pad_type_0, strides = b_29_strides_0, weight = layers_14_feed_forward_w3_weight, x = var_4229_cast_fp16)[name = string("b_29")];
	tensor<fp16, [1, 4608, 1, 1]> var_4257 = silu(x = input_231)[name = string("op_4257")];
	tensor<fp16, [1, 4608, 1, 1]> input_233 = mul(x = var_4257, y = b_29)[name = string("input_233")];
	string mlp_57_pad_type_0 = const()[name = string("mlp_57_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_57_strides_0 = const()[name = string("mlp_57_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_57_pad_0 = const()[name = string("mlp_57_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_57_dilations_0 = const()[name = string("mlp_57_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_57_groups_0 = const()[name = string("mlp_57_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> mlp_57 = conv(dilations = mlp_57_dilations_0, groups = mlp_57_groups_0, pad = mlp_57_pad_0, pad_type = mlp_57_pad_type_0, strides = mlp_57_strides_0, weight = layers_14_feed_forward_w2_weight, x = input_233)[name = string("mlp_57")];
	tensor<int32, [1]> var_4271_axes_0 = const()[name = string("op_4271_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_4271 = squeeze(axes = var_4271_axes_0, x = mlp_57)[name = string("op_4271")];
	tensor<int32, [3]> var_4275 = const()[name = string("op_4275"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> mlp_59 = transpose(perm = var_4275, x = var_4271)[name = string("transpose_36")];
	tensor<fp16, [1, 1, 1024]> x_85_cast_fp16 = add(x = x_83_cast_fp16, y = mlp_59)[name = string("x_85_cast_fp16")];
	fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_4279_cast_fp16 = mul(x = x_85_cast_fp16, y = const_54_promoted_to_fp16)[name = string("op_4279_cast_fp16")];
	int32 var_4281 = const()[name = string("op_4281"), val = int32(-1)];
	bool input_235_interleave_0 = const()[name = string("input_235_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_235_cast_fp16 = concat(axis = var_4281, interleave = input_235_interleave_0, values = (x_85_cast_fp16, var_4279_cast_fp16))[name = string("input_235_cast_fp16")];
	tensor<int32, [1]> normed_115_axes_0 = const()[name = string("normed_115_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4287_to_fp16 = const()[name = string("op_4287_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_115_cast_fp16 = layer_norm(axes = normed_115_axes_0, epsilon = var_4287_to_fp16, x = input_235_cast_fp16)[name = string("normed_115_cast_fp16")];
	tensor<int32, [2]> var_4290_split_sizes_0 = const()[name = string("op_4290_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_4290_axis_0 = const()[name = string("op_4290_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_4290_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_4290_cast_fp16_1 = split(axis = var_4290_axis_0, split_sizes = var_4290_split_sizes_0, x = normed_115_cast_fp16)[name = string("op_4290_cast_fp16")];
	tensor<fp16, [1024]> layers_15_operator_norm_weight_promoted_to_fp16 = const()[name = string("layers_15_operator_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(842134208)))];
	tensor<fp16, [1, 1, 1024]> hidden_states_31_cast_fp16 = mul(x = var_4290_cast_fp16_0, y = layers_15_operator_norm_weight_promoted_to_fp16)[name = string("hidden_states_31_cast_fp16")];
	tensor<int32, [3]> var_4296 = const()[name = string("op_4296"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_4299_axes_0 = const()[name = string("op_4299_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_4297_cast_fp16 = transpose(perm = var_4296, x = hidden_states_31_cast_fp16)[name = string("transpose_35")];
	tensor<fp16, [1, 1024, 1, 1]> var_4299_cast_fp16 = expand_dims(axes = var_4299_axes_0, x = var_4297_cast_fp16)[name = string("op_4299_cast_fp16")];
	string BCx_pad_type_0 = const()[name = string("BCx_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> BCx_strides_0 = const()[name = string("BCx_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> BCx_pad_0 = const()[name = string("BCx_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> BCx_dilations_0 = const()[name = string("BCx_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 BCx_groups_0 = const()[name = string("BCx_groups_0"), val = int32(1)];
	tensor<fp16, [1, 3072, 1, 1]> BCx = conv(dilations = BCx_dilations_0, groups = BCx_groups_0, pad = BCx_pad_0, pad_type = BCx_pad_type_0, strides = BCx_strides_0, weight = layers_15_conv_in_proj_weight, x = var_4299_cast_fp16)[name = string("BCx")];
	tensor<int32, [3]> var_4316_split_sizes_0 = const()[name = string("op_4316_split_sizes_0"), val = tensor<int32, [3]>([1024, 1024, 1024])];
	int32 var_4316_axis_0 = const()[name = string("op_4316_axis_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> var_4316_0, tensor<fp16, [1, 1024, 1, 1]> var_4316_1, tensor<fp16, [1, 1024, 1, 1]> var_4316_2 = split(axis = var_4316_axis_0, split_sizes = var_4316_split_sizes_0, x = BCx)[name = string("op_4316")];
	tensor<fp16, [1, 1024, 1, 1]> Bx = mul(x = var_4316_0, y = var_4316_2)[name = string("Bx")];
	tensor<int32, [3]> var_4322_begin_0 = const()[name = string("op_4322_begin_0"), val = tensor<int32, [3]>([9, 0, 0])];
	tensor<int32, [3]> var_4322_end_0 = const()[name = string("op_4322_end_0"), val = tensor<int32, [3]>([10, 1024, 3])];
	tensor<bool, [3]> var_4322_end_mask_0 = const()[name = string("op_4322_end_mask_0"), val = tensor<bool, [3]>([false, true, true])];
	tensor<bool, [3]> var_4322_squeeze_mask_0 = const()[name = string("op_4322_squeeze_mask_0"), val = tensor<bool, [3]>([true, false, false])];
	tensor<fp16, [1024, 3]> var_4322_cast_fp16 = slice_by_index(begin = var_4322_begin_0, end = var_4322_end_0, end_mask = var_4322_end_mask_0, squeeze_mask = var_4322_squeeze_mask_0, x = conv_state_in)[name = string("op_4322_cast_fp16")];
	tensor<int32, [1]> var_4324_axes_0 = const()[name = string("op_4324_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 1024, 3]> var_4324_cast_fp16 = expand_dims(axes = var_4324_axes_0, x = var_4322_cast_fp16)[name = string("op_4324_cast_fp16")];
	tensor<int32, [1]> slot_axes_0 = const()[name = string("slot_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1, 3]> slot_cast_fp16 = expand_dims(axes = slot_axes_0, x = var_4324_cast_fp16)[name = string("slot_cast_fp16")];
	tensor<int32, [4]> live_tail_begin_0 = const()[name = string("live_tail_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1])];
	tensor<int32, [4]> live_tail_end_0 = const()[name = string("live_tail_end_0"), val = tensor<int32, [4]>([1, 1024, 1, 1])];
	tensor<bool, [4]> live_tail_end_mask_0 = const()[name = string("live_tail_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
	tensor<fp16, [1, 1024, 1, 2]> live_tail_cast_fp16 = slice_by_index(begin = live_tail_begin_0, end = live_tail_end_0, end_mask = live_tail_end_mask_0, x = slot_cast_fp16)[name = string("live_tail_cast_fp16")];
	int32 var_4333 = const()[name = string("op_4333"), val = int32(-1)];
	bool new_state_interleave_0 = const()[name = string("new_state_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1024, 1, 3]> new_state_cast_fp16 = concat(axis = var_4333, interleave = new_state_interleave_0, values = (live_tail_cast_fp16, Bx))[name = string("new_state_cast_fp16")];
	tensor<int32, [1]> var_4336_axes_0 = const()[name = string("op_4336_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1024, 1, 3]> var_4336_cast_fp16 = squeeze(axes = var_4336_axes_0, x = new_state_cast_fp16)[name = string("op_4336_cast_fp16")];
	tensor<int32, [1]> new_slot_axes_0 = const()[name = string("new_slot_axes_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1024, 3]> new_slot_cast_fp16 = squeeze(axes = new_slot_axes_0, x = var_4336_cast_fp16)[name = string("new_slot_cast_fp16")];
	string conv_out_pad_type_0 = const()[name = string("conv_out_pad_type_0"), val = string("valid")];
	int32 conv_out_groups_0 = const()[name = string("conv_out_groups_0"), val = int32(1024)];
	tensor<int32, [2]> conv_out_strides_0 = const()[name = string("conv_out_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> conv_out_pad_0 = const()[name = string("conv_out_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> conv_out_dilations_0 = const()[name = string("conv_out_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<fp16, [1024, 1, 1, 3]> layers_15_conv_conv_weight_promoted_to_fp16 = const()[name = string("layers_15_conv_conv_weight_promoted_to_fp16"), val = tensor<fp16, [1024, 1, 1, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(842136320)))];
	tensor<fp16, [1, 1024, 1, 1]> conv_out_cast_fp16 = conv(dilations = conv_out_dilations_0, groups = conv_out_groups_0, pad = conv_out_pad_0, pad_type = conv_out_pad_type_0, strides = conv_out_strides_0, weight = layers_15_conv_conv_weight_promoted_to_fp16, x = new_state_cast_fp16)[name = string("conv_out_cast_fp16")];
	tensor<fp16, [1, 1024, 1, 1]> input_239_cast_fp16 = mul(x = var_4316_1, y = conv_out_cast_fp16)[name = string("input_239_cast_fp16")];
	string y_pad_type_0 = const()[name = string("y_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> y_strides_0 = const()[name = string("y_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> y_pad_0 = const()[name = string("y_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> y_dilations_0 = const()[name = string("y_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 y_groups_0 = const()[name = string("y_groups_0"), val = int32(1)];
	tensor<fp16, [1024, 1024, 1, 1]> layers_15_conv_out_proj_weight_promoted_to_fp16 = const()[name = string("layers_15_conv_out_proj_weight_promoted_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(842142528)))];
	tensor<fp16, [1, 1024, 1, 1]> y_cast_fp16 = conv(dilations = y_dilations_0, groups = y_groups_0, pad = y_pad_0, pad_type = y_pad_type_0, strides = y_strides_0, weight = layers_15_conv_out_proj_weight_promoted_to_fp16, x = input_239_cast_fp16)[name = string("y_cast_fp16")];
	tensor<int32, [1]> var_4364_axes_0 = const()[name = string("op_4364_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_4364_cast_fp16 = squeeze(axes = var_4364_axes_0, x = y_cast_fp16)[name = string("op_4364_cast_fp16")];
	tensor<int32, [3]> var_4368 = const()[name = string("op_4368"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> op_out_cast_fp16 = transpose(perm = var_4368, x = var_4364_cast_fp16)[name = string("transpose_34")];
	tensor<fp16, [1, 1, 1024]> x_87_cast_fp16 = add(x = x_85_cast_fp16, y = op_out_cast_fp16)[name = string("x_87_cast_fp16")];
	fp16 const_55_promoted_to_fp16 = const()[name = string("const_55_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_4372_cast_fp16 = mul(x = x_87_cast_fp16, y = const_55_promoted_to_fp16)[name = string("op_4372_cast_fp16")];
	int32 var_4374 = const()[name = string("op_4374"), val = int32(-1)];
	bool input_241_interleave_0 = const()[name = string("input_241_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_241_cast_fp16 = concat(axis = var_4374, interleave = input_241_interleave_0, values = (x_87_cast_fp16, var_4372_cast_fp16))[name = string("input_241_cast_fp16")];
	tensor<int32, [1]> normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4380_to_fp16 = const()[name = string("op_4380_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_4380_to_fp16, x = input_241_cast_fp16)[name = string("normed_117_cast_fp16")];
	tensor<int32, [2]> var_4383_split_sizes_0 = const()[name = string("op_4383_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_4383_axis_0 = const()[name = string("op_4383_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_4383_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_4383_cast_fp16_1 = split(axis = var_4383_axis_0, split_sizes = var_4383_split_sizes_0, x = normed_117_cast_fp16)[name = string("op_4383_cast_fp16")];
	tensor<fp16, [1024]> layers_15_ffn_norm_weight_promoted_to_fp16 = const()[name = string("layers_15_ffn_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(844239744)))];
	tensor<fp16, [1, 1, 1024]> normed_119_cast_fp16 = mul(x = var_4383_cast_fp16_0, y = layers_15_ffn_norm_weight_promoted_to_fp16)[name = string("normed_119_cast_fp16")];
	tensor<int32, [3]> var_4389 = const()[name = string("op_4389"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_4392_axes_0 = const()[name = string("op_4392_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_4390_cast_fp16 = transpose(perm = var_4389, x = normed_119_cast_fp16)[name = string("transpose_33")];
	tensor<fp16, [1, 1024, 1, 1]> var_4392_cast_fp16 = expand_dims(axes = var_4392_axes_0, x = var_4390_cast_fp16)[name = string("op_4392_cast_fp16")];
	string input_245_pad_type_0 = const()[name = string("input_245_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> input_245_strides_0 = const()[name = string("input_245_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> input_245_pad_0 = const()[name = string("input_245_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> input_245_dilations_0 = const()[name = string("input_245_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 input_245_groups_0 = const()[name = string("input_245_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> input_245 = conv(dilations = input_245_dilations_0, groups = input_245_groups_0, pad = input_245_pad_0, pad_type = input_245_pad_type_0, strides = input_245_strides_0, weight = layers_15_feed_forward_w1_weight, x = var_4392_cast_fp16)[name = string("input_245")];
	string b_pad_type_0 = const()[name = string("b_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> b_strides_0 = const()[name = string("b_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> b_pad_0 = const()[name = string("b_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> b_dilations_0 = const()[name = string("b_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 b_groups_0 = const()[name = string("b_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4608, 1, 1]> b = conv(dilations = b_dilations_0, groups = b_groups_0, pad = b_pad_0, pad_type = b_pad_type_0, strides = b_strides_0, weight = layers_15_feed_forward_w3_weight, x = var_4392_cast_fp16)[name = string("b")];
	tensor<fp16, [1, 4608, 1, 1]> var_4420 = silu(x = input_245)[name = string("op_4420")];
	tensor<fp16, [1, 4608, 1, 1]> input_247 = mul(x = var_4420, y = b)[name = string("input_247")];
	string mlp_61_pad_type_0 = const()[name = string("mlp_61_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_61_strides_0 = const()[name = string("mlp_61_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_61_pad_0 = const()[name = string("mlp_61_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_61_dilations_0 = const()[name = string("mlp_61_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_61_groups_0 = const()[name = string("mlp_61_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1024, 1, 1]> mlp_61 = conv(dilations = mlp_61_dilations_0, groups = mlp_61_groups_0, pad = mlp_61_pad_0, pad_type = mlp_61_pad_type_0, strides = mlp_61_strides_0, weight = layers_15_feed_forward_w2_weight, x = input_247)[name = string("mlp_61")];
	tensor<int32, [1]> var_4434_axes_0 = const()[name = string("op_4434_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_4434 = squeeze(axes = var_4434_axes_0, x = mlp_61)[name = string("op_4434")];
	tensor<int32, [3]> var_4438 = const()[name = string("op_4438"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 1, 1024]> mlp = transpose(perm = var_4438, x = var_4434)[name = string("transpose_32")];
	tensor<fp16, [1, 1, 1024]> x_cast_fp16 = add(x = x_87_cast_fp16, y = mlp)[name = string("x_cast_fp16")];
	fp16 const_56_promoted_to_fp16 = const()[name = string("const_56_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1024]> var_4442_cast_fp16 = mul(x = x_cast_fp16, y = const_56_promoted_to_fp16)[name = string("op_4442_cast_fp16")];
	int32 var_4444 = const()[name = string("op_4444"), val = int32(-1)];
	bool input_249_interleave_0 = const()[name = string("input_249_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 2048]> input_249_cast_fp16 = concat(axis = var_4444, interleave = input_249_interleave_0, values = (x_cast_fp16, var_4442_cast_fp16))[name = string("input_249_cast_fp16")];
	tensor<int32, [1]> normed_axes_0 = const()[name = string("normed_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4450_to_fp16 = const()[name = string("op_4450_to_fp16"), val = fp16(0x1.5p-17)];
	tensor<fp16, [1, 1, 2048]> normed_cast_fp16 = layer_norm(axes = normed_axes_0, epsilon = var_4450_to_fp16, x = input_249_cast_fp16)[name = string("normed_cast_fp16")];
	tensor<int32, [2]> var_4453_split_sizes_0 = const()[name = string("op_4453_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
	int32 var_4453_axis_0 = const()[name = string("op_4453_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1024]> var_4453_cast_fp16_0, tensor<fp16, [1, 1, 1024]> var_4453_cast_fp16_1 = split(axis = var_4453_axis_0, split_sizes = var_4453_split_sizes_0, x = normed_cast_fp16)[name = string("op_4453_cast_fp16")];
	tensor<fp16, [1024]> embedding_norm_weight_promoted_to_fp16 = const()[name = string("embedding_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(844241856)))];
	tensor<fp16, [1, 1, 1024]> hidden_states_cast_fp16 = mul(x = var_4453_cast_fp16_0, y = embedding_norm_weight_promoted_to_fp16)[name = string("hidden_states_cast_fp16")];
	tensor<int32, [3]> var_4459 = const()[name = string("op_4459"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_4462_axes_0 = const()[name = string("op_4462_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1024, 1]> var_4460_cast_fp16 = transpose(perm = var_4459, x = hidden_states_cast_fp16)[name = string("transpose_31")];
	tensor<fp16, [1, 1024, 1, 1]> var_4462_cast_fp16 = expand_dims(axes = var_4462_axes_0, x = var_4460_cast_fp16)[name = string("op_4462_cast_fp16")];
	string var_4478_pad_type_0 = const()[name = string("op_4478_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_4478_strides_0 = const()[name = string("op_4478_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_4478_pad_0 = const()[name = string("op_4478_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_4478_dilations_0 = const()[name = string("op_4478_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_4478_groups_0 = const()[name = string("op_4478_groups_0"), val = int32(1)];
	tensor<fp16, [1, 65536, 1, 1]> var_4478 = conv(dilations = var_4478_dilations_0, groups = var_4478_groups_0, pad = var_4478_pad_0, pad_type = var_4478_pad_type_0, strides = var_4478_strides_0, weight = lm_head_weight, x = var_4462_cast_fp16)[name = string("op_4478")];
	tensor<int32, [1]> var_4480_axes_0 = const()[name = string("op_4480_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 65536, 1]> var_4480 = squeeze(axes = var_4480_axes_0, x = var_4478)[name = string("op_4480")];
	tensor<int32, [3]> var_4484 = const()[name = string("op_4484"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> logits_2d_axes_0 = const()[name = string("logits_2d_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 1, 65536]> logits = transpose(perm = var_4484, x = var_4480)[name = string("transpose_30")];
	tensor<fp16, [1, 65536]> logits_2d = squeeze(axes = logits_2d_axes_0, x = logits)[name = string("logits_2d")];
	int32 token_id_axis_0 = const()[name = string("token_id_axis_0"), val = int32(-1)];
	bool token_id_keep_dims_0 = const()[name = string("token_id_keep_dims_0"), val = bool(false)];
	string token_id_output_dtype_0 = const()[name = string("token_id_output_dtype_0"), val = string("int32")];
	tensor<int32, [1]> token_id = reduce_argmax(axis = token_id_axis_0, keep_dims = token_id_keep_dims_0, output_dtype = token_id_output_dtype_0, x = logits_2d)[name = string("token_id")];
	tensor<int32, [1]> var_4492_axes_0 = const()[name = string("op_4492_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<int32, [1, 1]> var_4492 = expand_dims(axes = var_4492_axes_0, x = token_id)[name = string("op_4492")];
	int32 var_4493 = const()[name = string("op_4493"), val = int32(-1)];
	bool var_4495_validate_indices_0 = const()[name = string("op_4495_validate_indices_0"), val = bool(false)];
	tensor<fp16, [1, 1]> var_4495 = gather_along_axis(axis = var_4493, indices = var_4492, validate_indices = var_4495_validate_indices_0, x = logits_2d)[name = string("op_4495")];
	tensor<int32, [1]> var_4497_axes_0 = const()[name = string("op_4497_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [1]> token_logit = squeeze(axes = var_4497_axes_0, x = var_4495)[name = string("op_4497")];
	int32 var_4500_axis_0 = const()[name = string("op_4500_axis_0"), val = int32(0)];
	tensor<fp16, [10, 1024, 3]> conv_state_out = stack(axis = var_4500_axis_0, values = (var_693_cast_fp16, var_856_cast_fp16, var_1382_cast_fp16, var_1545_cast_fp16, var_2071_cast_fp16, var_2234_cast_fp16, var_2760_cast_fp16, var_3286_cast_fp16, var_3812_cast_fp16, new_slot_cast_fp16))[name = string("op_4500_cast_fp16")];
	} -> (token_id, token_logit, conv_state_out);
	}