Upload swa/chunk3.mlmodelc/model.mil with huggingface_hub

0ce7b97 verified 12 days ago

330 kB

	program(1.3)
	[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}})]
	{
	func main<ios18>(tensor<fp16, [1, 1, 1, 2048]> causal_mask_full, tensor<fp16, [1, 1, 1, 512]> causal_mask_sliding, tensor<fp16, [1, 1, 1, 512]> cos_f, tensor<fp16, [1, 1, 1, 256]> cos_s, tensor<fp16, [1, 1, 1536]> hidden_states, tensor<fp16, [1, 1, 512, 256]> kv13_k, tensor<fp16, [1, 1, 512, 256]> kv13_v, tensor<fp16, [1, 1, 2048, 512]> kv14_k, tensor<fp16, [1, 1, 2048, 512]> kv14_v, tensor<fp16, [1, 1, 8960]> per_layer_combined, tensor<fp16, [1, 1, 1, 512]> sin_f, tensor<fp16, [1, 1, 1, 256]> sin_s, tensor<fp16, [1, 1, 2048, 1]> update_mask) {
	tensor<fp16, [2048, 1536, 1, 1]> layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1572992))))[name = string("layers_0_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [256]> layers_0_self_attn_q_norm_weight = const()[name = string("layers_0_self_attn_q_norm_weight"), val = tensor<fp16, [256]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1575104)))];
	tensor<fp16, [12288, 1536, 1, 1]> layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1575680))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11012928))))[name = string("layers_0_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11025280))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20462528))))[name = string("layers_0_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20474880))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29912128))))[name = string("layers_0_mlp_down_proj_weight_palettized")];
	tensor<fp16, [1536]> layers_0_post_feedforward_layernorm_weight = const()[name = string("layers_0_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29913728)))];
	tensor<fp16, [256, 1536, 1, 1]> layers_0_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29916864))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30113536))))[name = string("layers_0_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [2048, 1536, 1, 1]> layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30113856))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31686784))))[name = string("layers_1_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31688896))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41126144))))[name = string("layers_1_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41138496))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50575744))))[name = string("layers_1_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50588096))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60025344))))[name = string("layers_1_mlp_down_proj_weight_palettized")];
	tensor<fp16, [1536]> layers_1_post_feedforward_layernorm_weight = const()[name = string("layers_1_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60026944)))];
	tensor<fp16, [256, 1536, 1, 1]> layers_1_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60030080))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60226752))))[name = string("layers_1_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [2048, 1536, 1, 1]> layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60227072))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61800000))))[name = string("layers_2_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61802112))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71239360))))[name = string("layers_2_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71251712))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80688960))))[name = string("layers_2_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80701312))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90138560))))[name = string("layers_2_mlp_down_proj_weight_palettized")];
	tensor<fp16, [1536]> layers_2_post_feedforward_layernorm_weight = const()[name = string("layers_2_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90140160)))];
	tensor<fp16, [256, 1536, 1, 1]> layers_2_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90143296))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90339968))))[name = string("layers_2_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [2048, 1536, 1, 1]> layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90340288))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91913216))))[name = string("layers_3_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91915328))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101352576))))[name = string("layers_3_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101364928))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110802176))))[name = string("layers_3_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110814528))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120251776))))[name = string("layers_3_mlp_down_proj_weight_palettized")];
	tensor<fp16, [1536]> layers_3_post_feedforward_layernorm_weight = const()[name = string("layers_3_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120253376)))];
	tensor<fp16, [256, 1536, 1, 1]> layers_3_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120256512))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120453184))))[name = string("layers_3_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [4096, 1536, 1, 1]> layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [4096, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120453504))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123599296))))[name = string("layers_4_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [512]> layers_4_self_attn_q_norm_weight = const()[name = string("layers_4_self_attn_q_norm_weight"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123603456)))];
	tensor<fp16, [12288, 1536, 1, 1]> layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123604544))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133041792))))[name = string("layers_4_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133054144))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142491392))))[name = string("layers_4_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142503744))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151940992))))[name = string("layers_4_mlp_down_proj_weight_palettized")];
	tensor<fp16, [1536]> layers_4_post_feedforward_layernorm_weight = const()[name = string("layers_4_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151942592)))];
	tensor<fp16, [256, 1536, 1, 1]> layers_4_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151945728))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152142400))))[name = string("layers_4_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [2048, 1536, 1, 1]> layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152142720))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153715648))))[name = string("layers_5_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153717760))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163155008))))[name = string("layers_5_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163167360))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172604608))))[name = string("layers_5_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172616960))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182054208))))[name = string("layers_5_mlp_down_proj_weight_palettized")];
	tensor<fp16, [1536]> layers_5_post_feedforward_layernorm_weight = const()[name = string("layers_5_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182055808)))];
	tensor<fp16, [256, 1536, 1, 1]> layers_5_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182058944))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182255616))))[name = string("layers_5_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [2048, 1536, 1, 1]> layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182255936))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183828864))))[name = string("layers_6_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183830976))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193268224))))[name = string("layers_6_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193280576))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202717824))))[name = string("layers_6_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202730176))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212167424))))[name = string("layers_6_mlp_down_proj_weight_palettized")];
	tensor<fp16, [1536]> layers_6_post_feedforward_layernorm_weight = const()[name = string("layers_6_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212169024)))];
	tensor<fp16, [256, 1536, 1, 1]> layers_6_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212172160))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212368832))))[name = string("layers_6_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [2048, 1536, 1, 1]> layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212369152))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213942080))))[name = string("layers_7_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213944192))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223381440))))[name = string("layers_7_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223393792))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232831040))))[name = string("layers_7_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232843392))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242280640))))[name = string("layers_7_mlp_down_proj_weight_palettized")];
	tensor<fp16, [1536]> layers_7_post_feedforward_layernorm_weight = const()[name = string("layers_7_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242282240)))];
	tensor<fp16, [256, 1536, 1, 1]> layers_7_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242285376))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242482048))))[name = string("layers_7_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [2048, 1536, 1, 1]> layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242482368))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244055296))))[name = string("layers_8_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244057408))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253494656))))[name = string("layers_8_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253507008))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262944256))))[name = string("layers_8_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262956608))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272393856))))[name = string("layers_8_mlp_down_proj_weight_palettized")];
	tensor<fp16, [1536]> layers_8_post_feedforward_layernorm_weight = const()[name = string("layers_8_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272395456)))];
	tensor<fp16, [256, 1536, 1, 1]> layers_8_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272398592))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272595264))))[name = string("layers_8_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [4096, 1536, 1, 1]> layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [4096, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272595584))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275741376))))[name = string("layers_9_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_9_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275745536))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285182784))))[name = string("layers_9_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_9_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285195136))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294632384))))[name = string("layers_9_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_9_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294644736))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304081984))))[name = string("layers_9_mlp_down_proj_weight_palettized")];
	tensor<fp16, [1536]> layers_9_post_feedforward_layernorm_weight = const()[name = string("layers_9_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304083584)))];
	tensor<fp16, [256, 1536, 1, 1]> layers_9_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304086720))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304283392))))[name = string("layers_9_per_layer_input_gate_weight_palettized")];
	int32 var_498 = const()[name = string("op_498"), val = int32(-1)];
	fp16 const_0_promoted_to_fp16 = const()[name = string("const_0_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_500_cast_fp16 = mul(x = hidden_states, y = const_0_promoted_to_fp16)[name = string("op_500_cast_fp16")];
	bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_1_cast_fp16 = concat(axis = var_498, interleave = input_1_interleave_0, values = (hidden_states, var_500_cast_fp16))[name = string("input_1_cast_fp16")];
	tensor<int32, [1]> normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_495_to_fp16 = const()[name = string("op_495_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_495_to_fp16, x = input_1_cast_fp16)[name = string("normed_1_cast_fp16")];
	tensor<int32, [2]> var_505_split_sizes_0 = const()[name = string("op_505_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_505_axis_0 = const()[name = string("op_505_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_505_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_505_cast_fp16_1 = split(axis = var_505_axis_0, split_sizes = var_505_split_sizes_0, x = normed_1_cast_fp16)[name = string("op_505_cast_fp16")];
	tensor<fp16, [1536]> layers_0_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304283712)))];
	tensor<fp16, [1, 1, 1536]> h_1_cast_fp16 = mul(x = var_505_cast_fp16_0, y = layers_0_input_layernorm_weight_promoted_to_fp16)[name = string("h_1_cast_fp16")];
	tensor<int32, [3]> var_511 = const()[name = string("op_511"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_514_axes_0 = const()[name = string("op_514_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_512_cast_fp16 = transpose(perm = var_511, x = h_1_cast_fp16)[name = string("transpose_111")];
	tensor<fp16, [1, 1536, 1, 1]> var_514_cast_fp16 = expand_dims(axes = var_514_axes_0, x = var_512_cast_fp16)[name = string("op_514_cast_fp16")];
	string var_530_pad_type_0 = const()[name = string("op_530_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_530_strides_0 = const()[name = string("op_530_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_530_pad_0 = const()[name = string("op_530_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_530_dilations_0 = const()[name = string("op_530_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_530_groups_0 = const()[name = string("op_530_groups_0"), val = int32(1)];
	tensor<fp16, [1, 2048, 1, 1]> var_530 = conv(dilations = var_530_dilations_0, groups = var_530_groups_0, pad = var_530_pad_0, pad_type = var_530_pad_type_0, strides = var_530_strides_0, weight = layers_0_self_attn_q_proj_weight_palettized, x = var_514_cast_fp16)[name = string("op_530")];
	tensor<int32, [4]> var_535 = const()[name = string("op_535"), val = tensor<int32, [4]>([1, 8, 256, 1])];
	tensor<fp16, [1, 8, 256, 1]> var_536 = reshape(shape = var_535, x = var_530)[name = string("op_536")];
	tensor<int32, [4]> var_541 = const()[name = string("op_541"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	tensor<int32, [3]> var_551 = const()[name = string("op_551"), val = tensor<int32, [3]>([1, 8, 256])];
	tensor<fp16, [1, 8, 1, 256]> var_542 = transpose(perm = var_541, x = var_536)[name = string("transpose_110")];
	tensor<fp16, [1, 8, 256]> x_1 = reshape(shape = var_551, x = var_542)[name = string("x_1")];
	int32 var_557 = const()[name = string("op_557"), val = int32(-1)];
	fp16 const_1_promoted = const()[name = string("const_1_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 256]> var_559 = mul(x = x_1, y = const_1_promoted)[name = string("op_559")];
	bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512]> input_5 = concat(axis = var_557, interleave = input_5_interleave_0, values = (x_1, var_559))[name = string("input_5")];
	tensor<int32, [1]> normed_5_axes_0 = const()[name = string("normed_5_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_554_to_fp16 = const()[name = string("op_554_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 8, 512]> normed_5_cast_fp16 = layer_norm(axes = normed_5_axes_0, epsilon = var_554_to_fp16, x = input_5)[name = string("normed_5_cast_fp16")];
	tensor<int32, [2]> var_564_split_sizes_0 = const()[name = string("op_564_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_564_axis_0 = const()[name = string("op_564_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 256]> var_564_0, tensor<fp16, [1, 8, 256]> var_564_1 = split(axis = var_564_axis_0, split_sizes = var_564_split_sizes_0, x = normed_5_cast_fp16)[name = string("op_564")];
	tensor<fp16, [1, 8, 256]> var_566 = mul(x = var_564_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_566")];
	tensor<int32, [4]> var_571 = const()[name = string("op_571"), val = tensor<int32, [4]>([1, 8, 1, 256])];
	tensor<fp16, [1, 8, 1, 256]> q_3 = reshape(shape = var_571, x = var_566)[name = string("q_3")];
	tensor<fp16, [1, 8, 1, 256]> var_573_cast_fp16 = mul(x = q_3, y = cos_s)[name = string("op_573_cast_fp16")];
	tensor<int32, [2]> var_574_split_sizes_0 = const()[name = string("op_574_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_574_axis_0 = const()[name = string("op_574_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 128]> var_574_0, tensor<fp16, [1, 8, 1, 128]> var_574_1 = split(axis = var_574_axis_0, split_sizes = var_574_split_sizes_0, x = q_3)[name = string("op_574")];
	fp16 const_2_promoted = const()[name = string("const_2_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 128]> var_576 = mul(x = var_574_1, y = const_2_promoted)[name = string("op_576")];
	int32 var_578 = const()[name = string("op_578"), val = int32(-1)];
	bool var_579_interleave_0 = const()[name = string("op_579_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> var_579 = concat(axis = var_578, interleave = var_579_interleave_0, values = (var_576, var_574_0))[name = string("op_579")];
	tensor<fp16, [1, 8, 1, 256]> var_580_cast_fp16 = mul(x = var_579, y = sin_s)[name = string("op_580_cast_fp16")];
	tensor<fp16, [1, 8, 1, 256]> q_5_cast_fp16 = add(x = var_573_cast_fp16, y = var_580_cast_fp16)[name = string("q_5_cast_fp16")];
	tensor<int32, [4]> transpose_0_perm_0 = const()[name = string("transpose_0_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	tensor<int32, [4]> tile_0_reps_0 = const()[name = string("tile_0_reps_0"), val = tensor<int32, [4]>([8, 1, 1, 1])];
	tensor<fp16, [1, 1, 512, 256]> transpose_0_cast_fp16 = transpose(perm = transpose_0_perm_0, x = kv13_k)[name = string("transpose_109")];
	tensor<fp16, [8, 1, 512, 256]> tile_0_cast_fp16 = tile(reps = tile_0_reps_0, x = transpose_0_cast_fp16)[name = string("tile_0_cast_fp16")];
	tensor<int32, [5]> concat_0 = const()[name = string("concat_0"), val = tensor<int32, [5]>([8, 1, 1, 512, 256])];
	tensor<fp16, [8, 1, 1, 512, 256]> reshape_0_cast_fp16 = reshape(shape = concat_0, x = tile_0_cast_fp16)[name = string("reshape_0_cast_fp16")];
	tensor<int32, [5]> transpose_1_perm_0 = const()[name = string("transpose_1_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])];
	tensor<int32, [4]> concat_1 = const()[name = string("concat_1"), val = tensor<int32, [4]>([-1, 1, 512, 256])];
	tensor<fp16, [1, 8, 1, 512, 256]> transpose_1_cast_fp16 = transpose(perm = transpose_1_perm_0, x = reshape_0_cast_fp16)[name = string("transpose_108")];
	tensor<fp16, [8, 1, 512, 256]> reshape_1_cast_fp16 = reshape(shape = concat_1, x = transpose_1_cast_fp16)[name = string("reshape_1_cast_fp16")];
	tensor<int32, [4]> transpose_40_perm_0 = const()[name = string("transpose_40_perm_0"), val = tensor<int32, [4]>([1, 0, -1, -2])];
	tensor<int32, [4]> transpose_2_perm_0 = const()[name = string("transpose_2_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	tensor<int32, [4]> tile_1_reps_0 = const()[name = string("tile_1_reps_0"), val = tensor<int32, [4]>([8, 1, 1, 1])];
	tensor<fp16, [1, 1, 512, 256]> transpose_2_cast_fp16 = transpose(perm = transpose_2_perm_0, x = kv13_v)[name = string("transpose_107")];
	tensor<fp16, [8, 1, 512, 256]> tile_1_cast_fp16 = tile(reps = tile_1_reps_0, x = transpose_2_cast_fp16)[name = string("tile_1_cast_fp16")];
	tensor<int32, [5]> concat_2 = const()[name = string("concat_2"), val = tensor<int32, [5]>([8, 1, 1, 512, 256])];
	tensor<fp16, [8, 1, 1, 512, 256]> reshape_2_cast_fp16 = reshape(shape = concat_2, x = tile_1_cast_fp16)[name = string("reshape_2_cast_fp16")];
	tensor<int32, [5]> transpose_3_perm_0 = const()[name = string("transpose_3_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])];
	tensor<int32, [4]> concat_3 = const()[name = string("concat_3"), val = tensor<int32, [4]>([-1, 1, 512, 256])];
	tensor<fp16, [1, 8, 1, 512, 256]> transpose_3_cast_fp16 = transpose(perm = transpose_3_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_106")];
	tensor<fp16, [8, 1, 512, 256]> reshape_3_cast_fp16 = reshape(shape = concat_3, x = transpose_3_cast_fp16)[name = string("reshape_3_cast_fp16")];
	tensor<int32, [4]> V_expanded_1_perm_0 = const()[name = string("V_expanded_1_perm_0"), val = tensor<int32, [4]>([1, 0, -2, -1])];
	bool attn_weights_1_transpose_x_0 = const()[name = string("attn_weights_1_transpose_x_0"), val = bool(false)];
	bool attn_weights_1_transpose_y_0 = const()[name = string("attn_weights_1_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 256, 512]> transpose_40_cast_fp16 = transpose(perm = transpose_40_perm_0, x = reshape_1_cast_fp16)[name = string("transpose_105")];
	tensor<fp16, [1, 8, 1, 512]> attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = q_5_cast_fp16, y = transpose_40_cast_fp16)[name = string("attn_weights_1_cast_fp16")];
	tensor<fp16, [1, 8, 1, 512]> x_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask_sliding)[name = string("x_3_cast_fp16")];
	tensor<int32, [1]> reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor<int32, [1]>([-1])];
	bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 1, 1]> reduce_max_0 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_3_cast_fp16)[name = string("reduce_max_0")];
	tensor<fp16, [1, 8, 1, 512]> var_612 = sub(x = x_3_cast_fp16, y = reduce_max_0)[name = string("op_612")];
	tensor<fp16, [1, 8, 1, 512]> var_618 = exp(x = var_612)[name = string("op_618")];
	tensor<int32, [1]> var_628_axes_0 = const()[name = string("op_628_axes_0"), val = tensor<int32, [1]>([-1])];
	bool var_628_keep_dims_0 = const()[name = string("op_628_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 1, 1]> var_628 = reduce_sum(axes = var_628_axes_0, keep_dims = var_628_keep_dims_0, x = var_618)[name = string("op_628")];
	tensor<fp16, [1, 8, 1, 512]> var_634_cast_fp16 = real_div(x = var_618, y = var_628)[name = string("op_634_cast_fp16")];
	bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)];
	bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 256]> V_expanded_1_cast_fp16 = transpose(perm = V_expanded_1_perm_0, x = reshape_3_cast_fp16)[name = string("transpose_104")];
	tensor<fp16, [1, 8, 1, 256]> attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = var_634_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_1_cast_fp16")];
	tensor<int32, [4]> var_645 = const()[name = string("op_645"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_652 = const()[name = string("op_652"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 8, 256]> var_646_cast_fp16 = transpose(perm = var_645, x = attn_output_1_cast_fp16)[name = string("transpose_103")];
	tensor<fp16, [1, 1, 2048]> attn_output_3_cast_fp16 = reshape(shape = var_652, x = var_646_cast_fp16)[name = string("attn_output_3_cast_fp16")];
	tensor<int32, [3]> var_657 = const()[name = string("op_657"), val = tensor<int32, [3]>([0, 2, 1])];
	string var_673_pad_type_0 = const()[name = string("op_673_pad_type_0"), val = string("valid")];
	int32 var_673_groups_0 = const()[name = string("op_673_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_673_strides_0 = const()[name = string("op_673_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_673_pad_0 = const()[name = string("op_673_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_673_dilations_0 = const()[name = string("op_673_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1536, 2048, 1]> squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304286848))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305859776))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")];
	tensor<fp16, [1, 2048, 1]> var_658_cast_fp16 = transpose(perm = var_657, x = attn_output_3_cast_fp16)[name = string("transpose_102")];
	tensor<fp16, [1, 1536, 1]> var_673_cast_fp16 = conv(dilations = var_673_dilations_0, groups = var_673_groups_0, pad = var_673_pad_0, pad_type = var_673_pad_type_0, strides = var_673_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_658_cast_fp16)[name = string("op_673_cast_fp16")];
	tensor<int32, [3]> var_677 = const()[name = string("op_677"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_683 = const()[name = string("op_683"), val = int32(-1)];
	fp16 const_3_promoted_to_fp16 = const()[name = string("const_3_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_7_cast_fp16 = transpose(perm = var_677, x = var_673_cast_fp16)[name = string("transpose_101")];
	tensor<fp16, [1, 1, 1536]> var_685_cast_fp16 = mul(x = x_7_cast_fp16, y = const_3_promoted_to_fp16)[name = string("op_685_cast_fp16")];
	bool input_9_interleave_0 = const()[name = string("input_9_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_9_cast_fp16 = concat(axis = var_683, interleave = input_9_interleave_0, values = (x_7_cast_fp16, var_685_cast_fp16))[name = string("input_9_cast_fp16")];
	tensor<int32, [1]> normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_680_to_fp16 = const()[name = string("op_680_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_680_to_fp16, x = input_9_cast_fp16)[name = string("normed_9_cast_fp16")];
	tensor<int32, [2]> var_690_split_sizes_0 = const()[name = string("op_690_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_690_axis_0 = const()[name = string("op_690_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_690_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_690_cast_fp16_1 = split(axis = var_690_axis_0, split_sizes = var_690_split_sizes_0, x = normed_9_cast_fp16)[name = string("op_690_cast_fp16")];
	tensor<fp16, [1536]> layers_0_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305861376)))];
	tensor<fp16, [1, 1, 1536]> attn_output_5_cast_fp16 = mul(x = var_690_cast_fp16_0, y = layers_0_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_5_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> x_9_cast_fp16 = add(x = hidden_states, y = attn_output_5_cast_fp16)[name = string("x_9_cast_fp16")];
	int32 var_699 = const()[name = string("op_699"), val = int32(-1)];
	fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_701_cast_fp16 = mul(x = x_9_cast_fp16, y = const_4_promoted_to_fp16)[name = string("op_701_cast_fp16")];
	bool input_11_interleave_0 = const()[name = string("input_11_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_11_cast_fp16 = concat(axis = var_699, interleave = input_11_interleave_0, values = (x_9_cast_fp16, var_701_cast_fp16))[name = string("input_11_cast_fp16")];
	tensor<int32, [1]> normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_696_to_fp16 = const()[name = string("op_696_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_696_to_fp16, x = input_11_cast_fp16)[name = string("normed_13_cast_fp16")];
	tensor<int32, [2]> var_706_split_sizes_0 = const()[name = string("op_706_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_706_axis_0 = const()[name = string("op_706_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_706_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_706_cast_fp16_1 = split(axis = var_706_axis_0, split_sizes = var_706_split_sizes_0, x = normed_13_cast_fp16)[name = string("op_706_cast_fp16")];
	tensor<fp16, [1536]> layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305864512)))];
	tensor<fp16, [1, 1, 1536]> h_3_cast_fp16 = mul(x = var_706_cast_fp16_0, y = layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_3_cast_fp16")];
	tensor<int32, [3]> var_717 = const()[name = string("op_717"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_13_axes_0 = const()[name = string("input_13_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_718 = transpose(perm = var_717, x = h_3_cast_fp16)[name = string("transpose_100")];
	tensor<fp16, [1, 1536, 1, 1]> input_13 = expand_dims(axes = input_13_axes_0, x = var_718)[name = string("input_13")];
	string gate_1_pad_type_0 = const()[name = string("gate_1_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gate_1_strides_0 = const()[name = string("gate_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gate_1_pad_0 = const()[name = string("gate_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gate_1_dilations_0 = const()[name = string("gate_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gate_1_groups_0 = const()[name = string("gate_1_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> gate_1 = conv(dilations = gate_1_dilations_0, groups = gate_1_groups_0, pad = gate_1_pad_0, pad_type = gate_1_pad_type_0, strides = gate_1_strides_0, weight = layers_0_mlp_gate_proj_weight_palettized, x = input_13)[name = string("gate_1")];
	string up_1_pad_type_0 = const()[name = string("up_1_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> up_1_strides_0 = const()[name = string("up_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> up_1_pad_0 = const()[name = string("up_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> up_1_dilations_0 = const()[name = string("up_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 up_1_groups_0 = const()[name = string("up_1_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> up_1 = conv(dilations = up_1_dilations_0, groups = up_1_groups_0, pad = up_1_pad_0, pad_type = up_1_pad_type_0, strides = up_1_strides_0, weight = layers_0_mlp_up_proj_weight_palettized, x = input_13)[name = string("up_1")];
	string gate_3_mode_0 = const()[name = string("gate_3_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 1]> gate_3 = gelu(mode = gate_3_mode_0, x = gate_1)[name = string("gate_3")];
	tensor<fp16, [1, 12288, 1, 1]> input_15 = mul(x = gate_3, y = up_1)[name = string("input_15")];
	string mlp_out_1_pad_type_0 = const()[name = string("mlp_out_1_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_out_1_strides_0 = const()[name = string("mlp_out_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_out_1_pad_0 = const()[name = string("mlp_out_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_out_1_dilations_0 = const()[name = string("mlp_out_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_out_1_groups_0 = const()[name = string("mlp_out_1_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 1]> mlp_out_1 = conv(dilations = mlp_out_1_dilations_0, groups = mlp_out_1_groups_0, pad = mlp_out_1_pad_0, pad_type = mlp_out_1_pad_type_0, strides = mlp_out_1_strides_0, weight = layers_0_mlp_down_proj_weight_palettized, x = input_15)[name = string("mlp_out_1")];
	tensor<int32, [1]> var_758_axes_0 = const()[name = string("op_758_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_758 = squeeze(axes = var_758_axes_0, x = mlp_out_1)[name = string("op_758")];
	tensor<int32, [3]> var_762 = const()[name = string("op_762"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_768 = const()[name = string("op_768"), val = int32(-1)];
	fp16 const_5_promoted = const()[name = string("const_5_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_11 = transpose(perm = var_762, x = var_758)[name = string("transpose_99")];
	tensor<fp16, [1, 1, 1536]> var_770 = mul(x = x_11, y = const_5_promoted)[name = string("op_770")];
	bool input_17_interleave_0 = const()[name = string("input_17_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_17 = concat(axis = var_768, interleave = input_17_interleave_0, values = (x_11, var_770))[name = string("input_17")];
	tensor<int32, [1]> normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_765_to_fp16 = const()[name = string("op_765_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_765_to_fp16, x = input_17)[name = string("normed_17_cast_fp16")];
	tensor<int32, [2]> var_775_split_sizes_0 = const()[name = string("op_775_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_775_axis_0 = const()[name = string("op_775_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_775_0, tensor<fp16, [1, 1, 1536]> var_775_1 = split(axis = var_775_axis_0, split_sizes = var_775_split_sizes_0, x = normed_17_cast_fp16)[name = string("op_775")];
	tensor<fp16, [1, 1, 1536]> hidden_states_3 = mul(x = var_775_0, y = layers_0_post_feedforward_layernorm_weight)[name = string("hidden_states_3")];
	tensor<fp16, [1, 1, 1536]> hidden_states_5_cast_fp16 = add(x = x_9_cast_fp16, y = hidden_states_3)[name = string("hidden_states_5_cast_fp16")];
	tensor<int32, [3]> per_layer_slice_1_begin_0 = const()[name = string("per_layer_slice_1_begin_0"), val = tensor<int32, [3]>([0, 0, 3840])];
	tensor<int32, [3]> per_layer_slice_1_end_0 = const()[name = string("per_layer_slice_1_end_0"), val = tensor<int32, [3]>([1, 1, 4096])];
	tensor<bool, [3]> per_layer_slice_1_end_mask_0 = const()[name = string("per_layer_slice_1_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 1, 256]> per_layer_slice_1_cast_fp16 = slice_by_index(begin = per_layer_slice_1_begin_0, end = per_layer_slice_1_end_0, end_mask = per_layer_slice_1_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_1_cast_fp16")];
	tensor<int32, [3]> var_803 = const()[name = string("op_803"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_19_axes_0 = const()[name = string("input_19_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_804 = transpose(perm = var_803, x = hidden_states_5_cast_fp16)[name = string("transpose_98")];
	tensor<fp16, [1, 1536, 1, 1]> input_19 = expand_dims(axes = input_19_axes_0, x = var_804)[name = string("input_19")];
	string gated_1_pad_type_0 = const()[name = string("gated_1_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_1_strides_0 = const()[name = string("gated_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_1_pad_0 = const()[name = string("gated_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_1_dilations_0 = const()[name = string("gated_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_1_groups_0 = const()[name = string("gated_1_groups_0"), val = int32(1)];
	tensor<fp16, [1, 256, 1, 1]> gated_1 = conv(dilations = gated_1_dilations_0, groups = gated_1_groups_0, pad = gated_1_pad_0, pad_type = gated_1_pad_type_0, strides = gated_1_strides_0, weight = layers_0_per_layer_input_gate_weight_palettized, x = input_19)[name = string("gated_1")];
	string gated_3_mode_0 = const()[name = string("gated_3_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 256, 1, 1]> gated_3 = gelu(mode = gated_3_mode_0, x = gated_1)[name = string("gated_3")];
	tensor<int32, [3]> var_823 = const()[name = string("op_823"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> per_layer_slice_conv_1_axes_0 = const()[name = string("per_layer_slice_conv_1_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 256, 1]> var_824_cast_fp16 = transpose(perm = var_823, x = per_layer_slice_1_cast_fp16)[name = string("transpose_97")];
	tensor<fp16, [1, 256, 1, 1]> per_layer_slice_conv_1_cast_fp16 = expand_dims(axes = per_layer_slice_conv_1_axes_0, x = var_824_cast_fp16)[name = string("per_layer_slice_conv_1_cast_fp16")];
	tensor<fp16, [1, 256, 1, 1]> input_21_cast_fp16 = mul(x = gated_3, y = per_layer_slice_conv_1_cast_fp16)[name = string("input_21_cast_fp16")];
	string gated_5_pad_type_0 = const()[name = string("gated_5_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_5_strides_0 = const()[name = string("gated_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_5_pad_0 = const()[name = string("gated_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_5_dilations_0 = const()[name = string("gated_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_5_groups_0 = const()[name = string("gated_5_groups_0"), val = int32(1)];
	tensor<fp16, [1536, 256, 1, 1]> layers_0_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305867648))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306064320))))[name = string("layers_0_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1536, 1, 1]> gated_5_cast_fp16 = conv(dilations = gated_5_dilations_0, groups = gated_5_groups_0, pad = gated_5_pad_0, pad_type = gated_5_pad_type_0, strides = gated_5_strides_0, weight = layers_0_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_21_cast_fp16)[name = string("gated_5_cast_fp16")];
	tensor<int32, [1]> var_840_axes_0 = const()[name = string("op_840_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_840_cast_fp16 = squeeze(axes = var_840_axes_0, x = gated_5_cast_fp16)[name = string("op_840_cast_fp16")];
	tensor<int32, [3]> var_844 = const()[name = string("op_844"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_850 = const()[name = string("op_850"), val = int32(-1)];
	fp16 const_6_promoted_to_fp16 = const()[name = string("const_6_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_13_cast_fp16 = transpose(perm = var_844, x = var_840_cast_fp16)[name = string("transpose_96")];
	tensor<fp16, [1, 1, 1536]> var_852_cast_fp16 = mul(x = x_13_cast_fp16, y = const_6_promoted_to_fp16)[name = string("op_852_cast_fp16")];
	bool input_23_interleave_0 = const()[name = string("input_23_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_23_cast_fp16 = concat(axis = var_850, interleave = input_23_interleave_0, values = (x_13_cast_fp16, var_852_cast_fp16))[name = string("input_23_cast_fp16")];
	tensor<int32, [1]> normed_21_axes_0 = const()[name = string("normed_21_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_847_to_fp16 = const()[name = string("op_847_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_21_cast_fp16 = layer_norm(axes = normed_21_axes_0, epsilon = var_847_to_fp16, x = input_23_cast_fp16)[name = string("normed_21_cast_fp16")];
	tensor<int32, [2]> var_857_split_sizes_0 = const()[name = string("op_857_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_857_axis_0 = const()[name = string("op_857_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_857_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_857_cast_fp16_1 = split(axis = var_857_axis_0, split_sizes = var_857_split_sizes_0, x = normed_21_cast_fp16)[name = string("op_857_cast_fp16")];
	tensor<fp16, [1536]> layers_0_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_0_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306065920)))];
	tensor<fp16, [1, 1, 1536]> hidden_states_9_cast_fp16 = mul(x = var_857_cast_fp16_0, y = layers_0_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_9_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_11_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("hidden_states_11_cast_fp16")];
	tensor<fp16, [1]> const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.04p-2])];
	tensor<fp16, [1, 1, 1536]> x_15_cast_fp16 = mul(x = hidden_states_11_cast_fp16, y = const_7_promoted_to_fp16)[name = string("x_15_cast_fp16")];
	int32 var_872 = const()[name = string("op_872"), val = int32(-1)];
	fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_874_cast_fp16 = mul(x = x_15_cast_fp16, y = const_8_promoted_to_fp16)[name = string("op_874_cast_fp16")];
	bool input_25_interleave_0 = const()[name = string("input_25_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_25_cast_fp16 = concat(axis = var_872, interleave = input_25_interleave_0, values = (x_15_cast_fp16, var_874_cast_fp16))[name = string("input_25_cast_fp16")];
	tensor<int32, [1]> normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_869_to_fp16 = const()[name = string("op_869_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_869_to_fp16, x = input_25_cast_fp16)[name = string("normed_25_cast_fp16")];
	tensor<int32, [2]> var_879_split_sizes_0 = const()[name = string("op_879_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_879_axis_0 = const()[name = string("op_879_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_879_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_879_cast_fp16_1 = split(axis = var_879_axis_0, split_sizes = var_879_split_sizes_0, x = normed_25_cast_fp16)[name = string("op_879_cast_fp16")];
	tensor<fp16, [1536]> layers_1_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306069056)))];
	tensor<fp16, [1, 1, 1536]> h_7_cast_fp16 = mul(x = var_879_cast_fp16_0, y = layers_1_input_layernorm_weight_promoted_to_fp16)[name = string("h_7_cast_fp16")];
	tensor<int32, [3]> var_885 = const()[name = string("op_885"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_888_axes_0 = const()[name = string("op_888_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_886_cast_fp16 = transpose(perm = var_885, x = h_7_cast_fp16)[name = string("transpose_95")];
	tensor<fp16, [1, 1536, 1, 1]> var_888_cast_fp16 = expand_dims(axes = var_888_axes_0, x = var_886_cast_fp16)[name = string("op_888_cast_fp16")];
	string var_904_pad_type_0 = const()[name = string("op_904_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_904_strides_0 = const()[name = string("op_904_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_904_pad_0 = const()[name = string("op_904_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_904_dilations_0 = const()[name = string("op_904_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_904_groups_0 = const()[name = string("op_904_groups_0"), val = int32(1)];
	tensor<fp16, [1, 2048, 1, 1]> var_904 = conv(dilations = var_904_dilations_0, groups = var_904_groups_0, pad = var_904_pad_0, pad_type = var_904_pad_type_0, strides = var_904_strides_0, weight = layers_1_self_attn_q_proj_weight_palettized, x = var_888_cast_fp16)[name = string("op_904")];
	tensor<int32, [4]> var_909 = const()[name = string("op_909"), val = tensor<int32, [4]>([1, 8, 256, 1])];
	tensor<fp16, [1, 8, 256, 1]> var_910 = reshape(shape = var_909, x = var_904)[name = string("op_910")];
	tensor<int32, [4]> var_915 = const()[name = string("op_915"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	tensor<int32, [3]> var_925 = const()[name = string("op_925"), val = tensor<int32, [3]>([1, 8, 256])];
	tensor<fp16, [1, 8, 1, 256]> var_916 = transpose(perm = var_915, x = var_910)[name = string("transpose_94")];
	tensor<fp16, [1, 8, 256]> x_17 = reshape(shape = var_925, x = var_916)[name = string("x_17")];
	int32 var_931 = const()[name = string("op_931"), val = int32(-1)];
	fp16 const_9_promoted = const()[name = string("const_9_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 256]> var_933 = mul(x = x_17, y = const_9_promoted)[name = string("op_933")];
	bool input_29_interleave_0 = const()[name = string("input_29_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512]> input_29 = concat(axis = var_931, interleave = input_29_interleave_0, values = (x_17, var_933))[name = string("input_29")];
	tensor<int32, [1]> normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_928_to_fp16 = const()[name = string("op_928_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 8, 512]> normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_928_to_fp16, x = input_29)[name = string("normed_29_cast_fp16")];
	tensor<int32, [2]> var_938_split_sizes_0 = const()[name = string("op_938_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_938_axis_0 = const()[name = string("op_938_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 256]> var_938_0, tensor<fp16, [1, 8, 256]> var_938_1 = split(axis = var_938_axis_0, split_sizes = var_938_split_sizes_0, x = normed_29_cast_fp16)[name = string("op_938")];
	tensor<fp16, [1, 8, 256]> var_940 = mul(x = var_938_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_940")];
	tensor<int32, [4]> var_945 = const()[name = string("op_945"), val = tensor<int32, [4]>([1, 8, 1, 256])];
	tensor<fp16, [1, 8, 1, 256]> q_9 = reshape(shape = var_945, x = var_940)[name = string("q_9")];
	tensor<fp16, [1, 8, 1, 256]> var_947_cast_fp16 = mul(x = q_9, y = cos_s)[name = string("op_947_cast_fp16")];
	tensor<int32, [2]> var_948_split_sizes_0 = const()[name = string("op_948_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_948_axis_0 = const()[name = string("op_948_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 128]> var_948_0, tensor<fp16, [1, 8, 1, 128]> var_948_1 = split(axis = var_948_axis_0, split_sizes = var_948_split_sizes_0, x = q_9)[name = string("op_948")];
	fp16 const_10_promoted = const()[name = string("const_10_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 128]> var_950 = mul(x = var_948_1, y = const_10_promoted)[name = string("op_950")];
	int32 var_952 = const()[name = string("op_952"), val = int32(-1)];
	bool var_953_interleave_0 = const()[name = string("op_953_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> var_953 = concat(axis = var_952, interleave = var_953_interleave_0, values = (var_950, var_948_0))[name = string("op_953")];
	tensor<fp16, [1, 8, 1, 256]> var_954_cast_fp16 = mul(x = var_953, y = sin_s)[name = string("op_954_cast_fp16")];
	tensor<fp16, [1, 8, 1, 256]> q_11_cast_fp16 = add(x = var_947_cast_fp16, y = var_954_cast_fp16)[name = string("q_11_cast_fp16")];
	bool attn_weights_5_transpose_x_0 = const()[name = string("attn_weights_5_transpose_x_0"), val = bool(false)];
	bool attn_weights_5_transpose_y_0 = const()[name = string("attn_weights_5_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 512]> attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = q_11_cast_fp16, y = transpose_40_cast_fp16)[name = string("attn_weights_5_cast_fp16")];
	tensor<fp16, [1, 8, 1, 512]> x_19_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask_sliding)[name = string("x_19_cast_fp16")];
	tensor<int32, [1]> reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor<int32, [1]>([-1])];
	bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 1, 1]> reduce_max_1 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_19_cast_fp16)[name = string("reduce_max_1")];
	tensor<fp16, [1, 8, 1, 512]> var_986 = sub(x = x_19_cast_fp16, y = reduce_max_1)[name = string("op_986")];
	tensor<fp16, [1, 8, 1, 512]> var_992 = exp(x = var_986)[name = string("op_992")];
	tensor<int32, [1]> var_1002_axes_0 = const()[name = string("op_1002_axes_0"), val = tensor<int32, [1]>([-1])];
	bool var_1002_keep_dims_0 = const()[name = string("op_1002_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 1, 1]> var_1002 = reduce_sum(axes = var_1002_axes_0, keep_dims = var_1002_keep_dims_0, x = var_992)[name = string("op_1002")];
	tensor<fp16, [1, 8, 1, 512]> var_1008_cast_fp16 = real_div(x = var_992, y = var_1002)[name = string("op_1008_cast_fp16")];
	bool attn_output_7_transpose_x_0 = const()[name = string("attn_output_7_transpose_x_0"), val = bool(false)];
	bool attn_output_7_transpose_y_0 = const()[name = string("attn_output_7_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_0, transpose_y = attn_output_7_transpose_y_0, x = var_1008_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_7_cast_fp16")];
	tensor<int32, [4]> var_1019 = const()[name = string("op_1019"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_1026 = const()[name = string("op_1026"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 8, 256]> var_1020_cast_fp16 = transpose(perm = var_1019, x = attn_output_7_cast_fp16)[name = string("transpose_93")];
	tensor<fp16, [1, 1, 2048]> attn_output_9_cast_fp16 = reshape(shape = var_1026, x = var_1020_cast_fp16)[name = string("attn_output_9_cast_fp16")];
	tensor<int32, [3]> var_1031 = const()[name = string("op_1031"), val = tensor<int32, [3]>([0, 2, 1])];
	string var_1047_pad_type_0 = const()[name = string("op_1047_pad_type_0"), val = string("valid")];
	int32 var_1047_groups_0 = const()[name = string("op_1047_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_1047_strides_0 = const()[name = string("op_1047_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_1047_pad_0 = const()[name = string("op_1047_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_1047_dilations_0 = const()[name = string("op_1047_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1536, 2048, 1]> squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306072192))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307645120))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")];
	tensor<fp16, [1, 2048, 1]> var_1032_cast_fp16 = transpose(perm = var_1031, x = attn_output_9_cast_fp16)[name = string("transpose_92")];
	tensor<fp16, [1, 1536, 1]> var_1047_cast_fp16 = conv(dilations = var_1047_dilations_0, groups = var_1047_groups_0, pad = var_1047_pad_0, pad_type = var_1047_pad_type_0, strides = var_1047_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_1032_cast_fp16)[name = string("op_1047_cast_fp16")];
	tensor<int32, [3]> var_1051 = const()[name = string("op_1051"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_1057 = const()[name = string("op_1057"), val = int32(-1)];
	fp16 const_11_promoted_to_fp16 = const()[name = string("const_11_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_23_cast_fp16 = transpose(perm = var_1051, x = var_1047_cast_fp16)[name = string("transpose_91")];
	tensor<fp16, [1, 1, 1536]> var_1059_cast_fp16 = mul(x = x_23_cast_fp16, y = const_11_promoted_to_fp16)[name = string("op_1059_cast_fp16")];
	bool input_33_interleave_0 = const()[name = string("input_33_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_33_cast_fp16 = concat(axis = var_1057, interleave = input_33_interleave_0, values = (x_23_cast_fp16, var_1059_cast_fp16))[name = string("input_33_cast_fp16")];
	tensor<int32, [1]> normed_33_axes_0 = const()[name = string("normed_33_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1054_to_fp16 = const()[name = string("op_1054_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_33_cast_fp16 = layer_norm(axes = normed_33_axes_0, epsilon = var_1054_to_fp16, x = input_33_cast_fp16)[name = string("normed_33_cast_fp16")];
	tensor<int32, [2]> var_1064_split_sizes_0 = const()[name = string("op_1064_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1064_axis_0 = const()[name = string("op_1064_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_1064_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1064_cast_fp16_1 = split(axis = var_1064_axis_0, split_sizes = var_1064_split_sizes_0, x = normed_33_cast_fp16)[name = string("op_1064_cast_fp16")];
	tensor<fp16, [1536]> layers_1_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307646720)))];
	tensor<fp16, [1, 1, 1536]> attn_output_11_cast_fp16 = mul(x = var_1064_cast_fp16_0, y = layers_1_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_11_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> x_25_cast_fp16 = add(x = x_15_cast_fp16, y = attn_output_11_cast_fp16)[name = string("x_25_cast_fp16")];
	int32 var_1073 = const()[name = string("op_1073"), val = int32(-1)];
	fp16 const_12_promoted_to_fp16 = const()[name = string("const_12_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_1075_cast_fp16 = mul(x = x_25_cast_fp16, y = const_12_promoted_to_fp16)[name = string("op_1075_cast_fp16")];
	bool input_35_interleave_0 = const()[name = string("input_35_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_35_cast_fp16 = concat(axis = var_1073, interleave = input_35_interleave_0, values = (x_25_cast_fp16, var_1075_cast_fp16))[name = string("input_35_cast_fp16")];
	tensor<int32, [1]> normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1070_to_fp16 = const()[name = string("op_1070_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_1070_to_fp16, x = input_35_cast_fp16)[name = string("normed_37_cast_fp16")];
	tensor<int32, [2]> var_1080_split_sizes_0 = const()[name = string("op_1080_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1080_axis_0 = const()[name = string("op_1080_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_1080_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1080_cast_fp16_1 = split(axis = var_1080_axis_0, split_sizes = var_1080_split_sizes_0, x = normed_37_cast_fp16)[name = string("op_1080_cast_fp16")];
	tensor<fp16, [1536]> layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307649856)))];
	tensor<fp16, [1, 1, 1536]> h_9_cast_fp16 = mul(x = var_1080_cast_fp16_0, y = layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_9_cast_fp16")];
	tensor<int32, [3]> var_1091 = const()[name = string("op_1091"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_1092 = transpose(perm = var_1091, x = h_9_cast_fp16)[name = string("transpose_90")];
	tensor<fp16, [1, 1536, 1, 1]> input_37 = expand_dims(axes = input_37_axes_0, x = var_1092)[name = string("input_37")];
	string gate_5_pad_type_0 = const()[name = string("gate_5_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gate_5_strides_0 = const()[name = string("gate_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gate_5_pad_0 = const()[name = string("gate_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gate_5_dilations_0 = const()[name = string("gate_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gate_5_groups_0 = const()[name = string("gate_5_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> gate_5 = conv(dilations = gate_5_dilations_0, groups = gate_5_groups_0, pad = gate_5_pad_0, pad_type = gate_5_pad_type_0, strides = gate_5_strides_0, weight = layers_1_mlp_gate_proj_weight_palettized, x = input_37)[name = string("gate_5")];
	string up_3_pad_type_0 = const()[name = string("up_3_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> up_3_strides_0 = const()[name = string("up_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> up_3_pad_0 = const()[name = string("up_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> up_3_dilations_0 = const()[name = string("up_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 up_3_groups_0 = const()[name = string("up_3_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> up_3 = conv(dilations = up_3_dilations_0, groups = up_3_groups_0, pad = up_3_pad_0, pad_type = up_3_pad_type_0, strides = up_3_strides_0, weight = layers_1_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_3")];
	string gate_7_mode_0 = const()[name = string("gate_7_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 1]> gate_7 = gelu(mode = gate_7_mode_0, x = gate_5)[name = string("gate_7")];
	tensor<fp16, [1, 12288, 1, 1]> input_39 = mul(x = gate_7, y = up_3)[name = string("input_39")];
	string mlp_out_3_pad_type_0 = const()[name = string("mlp_out_3_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_out_3_strides_0 = const()[name = string("mlp_out_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_out_3_pad_0 = const()[name = string("mlp_out_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_out_3_dilations_0 = const()[name = string("mlp_out_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_out_3_groups_0 = const()[name = string("mlp_out_3_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 1]> mlp_out_3 = conv(dilations = mlp_out_3_dilations_0, groups = mlp_out_3_groups_0, pad = mlp_out_3_pad_0, pad_type = mlp_out_3_pad_type_0, strides = mlp_out_3_strides_0, weight = layers_1_mlp_down_proj_weight_palettized, x = input_39)[name = string("mlp_out_3")];
	tensor<int32, [1]> var_1132_axes_0 = const()[name = string("op_1132_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_1132 = squeeze(axes = var_1132_axes_0, x = mlp_out_3)[name = string("op_1132")];
	tensor<int32, [3]> var_1136 = const()[name = string("op_1136"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_1142 = const()[name = string("op_1142"), val = int32(-1)];
	fp16 const_13_promoted = const()[name = string("const_13_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_27 = transpose(perm = var_1136, x = var_1132)[name = string("transpose_89")];
	tensor<fp16, [1, 1, 1536]> var_1144 = mul(x = x_27, y = const_13_promoted)[name = string("op_1144")];
	bool input_41_interleave_0 = const()[name = string("input_41_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_41 = concat(axis = var_1142, interleave = input_41_interleave_0, values = (x_27, var_1144))[name = string("input_41")];
	tensor<int32, [1]> normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1139_to_fp16 = const()[name = string("op_1139_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_1139_to_fp16, x = input_41)[name = string("normed_41_cast_fp16")];
	tensor<int32, [2]> var_1149_split_sizes_0 = const()[name = string("op_1149_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1149_axis_0 = const()[name = string("op_1149_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_1149_0, tensor<fp16, [1, 1, 1536]> var_1149_1 = split(axis = var_1149_axis_0, split_sizes = var_1149_split_sizes_0, x = normed_41_cast_fp16)[name = string("op_1149")];
	tensor<fp16, [1, 1, 1536]> hidden_states_13 = mul(x = var_1149_0, y = layers_1_post_feedforward_layernorm_weight)[name = string("hidden_states_13")];
	tensor<fp16, [1, 1, 1536]> hidden_states_15_cast_fp16 = add(x = x_25_cast_fp16, y = hidden_states_13)[name = string("hidden_states_15_cast_fp16")];
	tensor<int32, [3]> per_layer_slice_3_begin_0 = const()[name = string("per_layer_slice_3_begin_0"), val = tensor<int32, [3]>([0, 0, 4096])];
	tensor<int32, [3]> per_layer_slice_3_end_0 = const()[name = string("per_layer_slice_3_end_0"), val = tensor<int32, [3]>([1, 1, 4352])];
	tensor<bool, [3]> per_layer_slice_3_end_mask_0 = const()[name = string("per_layer_slice_3_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 1, 256]> per_layer_slice_3_cast_fp16 = slice_by_index(begin = per_layer_slice_3_begin_0, end = per_layer_slice_3_end_0, end_mask = per_layer_slice_3_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_3_cast_fp16")];
	tensor<int32, [3]> var_1177 = const()[name = string("op_1177"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_43_axes_0 = const()[name = string("input_43_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_1178 = transpose(perm = var_1177, x = hidden_states_15_cast_fp16)[name = string("transpose_88")];
	tensor<fp16, [1, 1536, 1, 1]> input_43 = expand_dims(axes = input_43_axes_0, x = var_1178)[name = string("input_43")];
	string gated_7_pad_type_0 = const()[name = string("gated_7_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_7_strides_0 = const()[name = string("gated_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_7_pad_0 = const()[name = string("gated_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_7_dilations_0 = const()[name = string("gated_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_7_groups_0 = const()[name = string("gated_7_groups_0"), val = int32(1)];
	tensor<fp16, [1, 256, 1, 1]> gated_7 = conv(dilations = gated_7_dilations_0, groups = gated_7_groups_0, pad = gated_7_pad_0, pad_type = gated_7_pad_type_0, strides = gated_7_strides_0, weight = layers_1_per_layer_input_gate_weight_palettized, x = input_43)[name = string("gated_7")];
	string gated_9_mode_0 = const()[name = string("gated_9_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 256, 1, 1]> gated_9 = gelu(mode = gated_9_mode_0, x = gated_7)[name = string("gated_9")];
	tensor<int32, [3]> var_1197 = const()[name = string("op_1197"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> per_layer_slice_conv_3_axes_0 = const()[name = string("per_layer_slice_conv_3_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 256, 1]> var_1198_cast_fp16 = transpose(perm = var_1197, x = per_layer_slice_3_cast_fp16)[name = string("transpose_87")];
	tensor<fp16, [1, 256, 1, 1]> per_layer_slice_conv_3_cast_fp16 = expand_dims(axes = per_layer_slice_conv_3_axes_0, x = var_1198_cast_fp16)[name = string("per_layer_slice_conv_3_cast_fp16")];
	tensor<fp16, [1, 256, 1, 1]> input_45_cast_fp16 = mul(x = gated_9, y = per_layer_slice_conv_3_cast_fp16)[name = string("input_45_cast_fp16")];
	string gated_11_pad_type_0 = const()[name = string("gated_11_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_11_strides_0 = const()[name = string("gated_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_11_pad_0 = const()[name = string("gated_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_11_dilations_0 = const()[name = string("gated_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_11_groups_0 = const()[name = string("gated_11_groups_0"), val = int32(1)];
	tensor<fp16, [1536, 256, 1, 1]> layers_1_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307652992))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307849664))))[name = string("layers_1_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1536, 1, 1]> gated_11_cast_fp16 = conv(dilations = gated_11_dilations_0, groups = gated_11_groups_0, pad = gated_11_pad_0, pad_type = gated_11_pad_type_0, strides = gated_11_strides_0, weight = layers_1_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_45_cast_fp16)[name = string("gated_11_cast_fp16")];
	tensor<int32, [1]> var_1214_axes_0 = const()[name = string("op_1214_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_1214_cast_fp16 = squeeze(axes = var_1214_axes_0, x = gated_11_cast_fp16)[name = string("op_1214_cast_fp16")];
	tensor<int32, [3]> var_1218 = const()[name = string("op_1218"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_1224 = const()[name = string("op_1224"), val = int32(-1)];
	fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_29_cast_fp16 = transpose(perm = var_1218, x = var_1214_cast_fp16)[name = string("transpose_86")];
	tensor<fp16, [1, 1, 1536]> var_1226_cast_fp16 = mul(x = x_29_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_1226_cast_fp16")];
	bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_47_cast_fp16 = concat(axis = var_1224, interleave = input_47_interleave_0, values = (x_29_cast_fp16, var_1226_cast_fp16))[name = string("input_47_cast_fp16")];
	tensor<int32, [1]> normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1221_to_fp16 = const()[name = string("op_1221_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_1221_to_fp16, x = input_47_cast_fp16)[name = string("normed_45_cast_fp16")];
	tensor<int32, [2]> var_1231_split_sizes_0 = const()[name = string("op_1231_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1231_axis_0 = const()[name = string("op_1231_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_1231_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1231_cast_fp16_1 = split(axis = var_1231_axis_0, split_sizes = var_1231_split_sizes_0, x = normed_45_cast_fp16)[name = string("op_1231_cast_fp16")];
	tensor<fp16, [1536]> layers_1_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_1_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307851264)))];
	tensor<fp16, [1, 1, 1536]> hidden_states_19_cast_fp16 = mul(x = var_1231_cast_fp16_0, y = layers_1_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_19_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_21_cast_fp16 = add(x = hidden_states_15_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("hidden_states_21_cast_fp16")];
	tensor<fp16, [1]> const_15_promoted_to_fp16 = const()[name = string("const_15_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.2cp-1])];
	tensor<fp16, [1, 1, 1536]> x_31_cast_fp16 = mul(x = hidden_states_21_cast_fp16, y = const_15_promoted_to_fp16)[name = string("x_31_cast_fp16")];
	int32 var_1246 = const()[name = string("op_1246"), val = int32(-1)];
	fp16 const_16_promoted_to_fp16 = const()[name = string("const_16_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_1248_cast_fp16 = mul(x = x_31_cast_fp16, y = const_16_promoted_to_fp16)[name = string("op_1248_cast_fp16")];
	bool input_49_interleave_0 = const()[name = string("input_49_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_49_cast_fp16 = concat(axis = var_1246, interleave = input_49_interleave_0, values = (x_31_cast_fp16, var_1248_cast_fp16))[name = string("input_49_cast_fp16")];
	tensor<int32, [1]> normed_49_axes_0 = const()[name = string("normed_49_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1243_to_fp16 = const()[name = string("op_1243_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_49_cast_fp16 = layer_norm(axes = normed_49_axes_0, epsilon = var_1243_to_fp16, x = input_49_cast_fp16)[name = string("normed_49_cast_fp16")];
	tensor<int32, [2]> var_1253_split_sizes_0 = const()[name = string("op_1253_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1253_axis_0 = const()[name = string("op_1253_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_1253_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1253_cast_fp16_1 = split(axis = var_1253_axis_0, split_sizes = var_1253_split_sizes_0, x = normed_49_cast_fp16)[name = string("op_1253_cast_fp16")];
	tensor<fp16, [1536]> layers_2_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307854400)))];
	tensor<fp16, [1, 1, 1536]> h_13_cast_fp16 = mul(x = var_1253_cast_fp16_0, y = layers_2_input_layernorm_weight_promoted_to_fp16)[name = string("h_13_cast_fp16")];
	tensor<int32, [3]> var_1259 = const()[name = string("op_1259"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_1262_axes_0 = const()[name = string("op_1262_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_1260_cast_fp16 = transpose(perm = var_1259, x = h_13_cast_fp16)[name = string("transpose_85")];
	tensor<fp16, [1, 1536, 1, 1]> var_1262_cast_fp16 = expand_dims(axes = var_1262_axes_0, x = var_1260_cast_fp16)[name = string("op_1262_cast_fp16")];
	string var_1278_pad_type_0 = const()[name = string("op_1278_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_1278_strides_0 = const()[name = string("op_1278_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_1278_pad_0 = const()[name = string("op_1278_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_1278_dilations_0 = const()[name = string("op_1278_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_1278_groups_0 = const()[name = string("op_1278_groups_0"), val = int32(1)];
	tensor<fp16, [1, 2048, 1, 1]> var_1278 = conv(dilations = var_1278_dilations_0, groups = var_1278_groups_0, pad = var_1278_pad_0, pad_type = var_1278_pad_type_0, strides = var_1278_strides_0, weight = layers_2_self_attn_q_proj_weight_palettized, x = var_1262_cast_fp16)[name = string("op_1278")];
	tensor<int32, [4]> var_1283 = const()[name = string("op_1283"), val = tensor<int32, [4]>([1, 8, 256, 1])];
	tensor<fp16, [1, 8, 256, 1]> var_1284 = reshape(shape = var_1283, x = var_1278)[name = string("op_1284")];
	tensor<int32, [4]> var_1289 = const()[name = string("op_1289"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	tensor<int32, [3]> var_1299 = const()[name = string("op_1299"), val = tensor<int32, [3]>([1, 8, 256])];
	tensor<fp16, [1, 8, 1, 256]> var_1290 = transpose(perm = var_1289, x = var_1284)[name = string("transpose_84")];
	tensor<fp16, [1, 8, 256]> x_33 = reshape(shape = var_1299, x = var_1290)[name = string("x_33")];
	int32 var_1305 = const()[name = string("op_1305"), val = int32(-1)];
	fp16 const_17_promoted = const()[name = string("const_17_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 256]> var_1307 = mul(x = x_33, y = const_17_promoted)[name = string("op_1307")];
	bool input_53_interleave_0 = const()[name = string("input_53_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512]> input_53 = concat(axis = var_1305, interleave = input_53_interleave_0, values = (x_33, var_1307))[name = string("input_53")];
	tensor<int32, [1]> normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1302_to_fp16 = const()[name = string("op_1302_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 8, 512]> normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_1302_to_fp16, x = input_53)[name = string("normed_53_cast_fp16")];
	tensor<int32, [2]> var_1312_split_sizes_0 = const()[name = string("op_1312_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_1312_axis_0 = const()[name = string("op_1312_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 256]> var_1312_0, tensor<fp16, [1, 8, 256]> var_1312_1 = split(axis = var_1312_axis_0, split_sizes = var_1312_split_sizes_0, x = normed_53_cast_fp16)[name = string("op_1312")];
	tensor<fp16, [1, 8, 256]> var_1314 = mul(x = var_1312_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_1314")];
	tensor<int32, [4]> var_1319 = const()[name = string("op_1319"), val = tensor<int32, [4]>([1, 8, 1, 256])];
	tensor<fp16, [1, 8, 1, 256]> q_15 = reshape(shape = var_1319, x = var_1314)[name = string("q_15")];
	tensor<fp16, [1, 8, 1, 256]> var_1321_cast_fp16 = mul(x = q_15, y = cos_s)[name = string("op_1321_cast_fp16")];
	tensor<int32, [2]> var_1322_split_sizes_0 = const()[name = string("op_1322_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_1322_axis_0 = const()[name = string("op_1322_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 128]> var_1322_0, tensor<fp16, [1, 8, 1, 128]> var_1322_1 = split(axis = var_1322_axis_0, split_sizes = var_1322_split_sizes_0, x = q_15)[name = string("op_1322")];
	fp16 const_18_promoted = const()[name = string("const_18_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 128]> var_1324 = mul(x = var_1322_1, y = const_18_promoted)[name = string("op_1324")];
	int32 var_1326 = const()[name = string("op_1326"), val = int32(-1)];
	bool var_1327_interleave_0 = const()[name = string("op_1327_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> var_1327 = concat(axis = var_1326, interleave = var_1327_interleave_0, values = (var_1324, var_1322_0))[name = string("op_1327")];
	tensor<fp16, [1, 8, 1, 256]> var_1328_cast_fp16 = mul(x = var_1327, y = sin_s)[name = string("op_1328_cast_fp16")];
	tensor<fp16, [1, 8, 1, 256]> q_17_cast_fp16 = add(x = var_1321_cast_fp16, y = var_1328_cast_fp16)[name = string("q_17_cast_fp16")];
	bool attn_weights_9_transpose_x_0 = const()[name = string("attn_weights_9_transpose_x_0"), val = bool(false)];
	bool attn_weights_9_transpose_y_0 = const()[name = string("attn_weights_9_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 512]> attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = q_17_cast_fp16, y = transpose_40_cast_fp16)[name = string("attn_weights_9_cast_fp16")];
	tensor<fp16, [1, 8, 1, 512]> x_35_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask_sliding)[name = string("x_35_cast_fp16")];
	tensor<int32, [1]> reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor<int32, [1]>([-1])];
	bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 1, 1]> reduce_max_2 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_35_cast_fp16)[name = string("reduce_max_2")];
	tensor<fp16, [1, 8, 1, 512]> var_1360 = sub(x = x_35_cast_fp16, y = reduce_max_2)[name = string("op_1360")];
	tensor<fp16, [1, 8, 1, 512]> var_1366 = exp(x = var_1360)[name = string("op_1366")];
	tensor<int32, [1]> var_1376_axes_0 = const()[name = string("op_1376_axes_0"), val = tensor<int32, [1]>([-1])];
	bool var_1376_keep_dims_0 = const()[name = string("op_1376_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 1, 1]> var_1376 = reduce_sum(axes = var_1376_axes_0, keep_dims = var_1376_keep_dims_0, x = var_1366)[name = string("op_1376")];
	tensor<fp16, [1, 8, 1, 512]> var_1382_cast_fp16 = real_div(x = var_1366, y = var_1376)[name = string("op_1382_cast_fp16")];
	bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)];
	bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = var_1382_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_13_cast_fp16")];
	tensor<int32, [4]> var_1393 = const()[name = string("op_1393"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_1400 = const()[name = string("op_1400"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 8, 256]> var_1394_cast_fp16 = transpose(perm = var_1393, x = attn_output_13_cast_fp16)[name = string("transpose_83")];
	tensor<fp16, [1, 1, 2048]> attn_output_15_cast_fp16 = reshape(shape = var_1400, x = var_1394_cast_fp16)[name = string("attn_output_15_cast_fp16")];
	tensor<int32, [3]> var_1405 = const()[name = string("op_1405"), val = tensor<int32, [3]>([0, 2, 1])];
	string var_1421_pad_type_0 = const()[name = string("op_1421_pad_type_0"), val = string("valid")];
	int32 var_1421_groups_0 = const()[name = string("op_1421_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_1421_strides_0 = const()[name = string("op_1421_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_1421_pad_0 = const()[name = string("op_1421_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_1421_dilations_0 = const()[name = string("op_1421_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1536, 2048, 1]> squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307857536))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309430464))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")];
	tensor<fp16, [1, 2048, 1]> var_1406_cast_fp16 = transpose(perm = var_1405, x = attn_output_15_cast_fp16)[name = string("transpose_82")];
	tensor<fp16, [1, 1536, 1]> var_1421_cast_fp16 = conv(dilations = var_1421_dilations_0, groups = var_1421_groups_0, pad = var_1421_pad_0, pad_type = var_1421_pad_type_0, strides = var_1421_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_1406_cast_fp16)[name = string("op_1421_cast_fp16")];
	tensor<int32, [3]> var_1425 = const()[name = string("op_1425"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_1431 = const()[name = string("op_1431"), val = int32(-1)];
	fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_39_cast_fp16 = transpose(perm = var_1425, x = var_1421_cast_fp16)[name = string("transpose_81")];
	tensor<fp16, [1, 1, 1536]> var_1433_cast_fp16 = mul(x = x_39_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_1433_cast_fp16")];
	bool input_57_interleave_0 = const()[name = string("input_57_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_57_cast_fp16 = concat(axis = var_1431, interleave = input_57_interleave_0, values = (x_39_cast_fp16, var_1433_cast_fp16))[name = string("input_57_cast_fp16")];
	tensor<int32, [1]> normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1428_to_fp16 = const()[name = string("op_1428_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_1428_to_fp16, x = input_57_cast_fp16)[name = string("normed_57_cast_fp16")];
	tensor<int32, [2]> var_1438_split_sizes_0 = const()[name = string("op_1438_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1438_axis_0 = const()[name = string("op_1438_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_1438_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1438_cast_fp16_1 = split(axis = var_1438_axis_0, split_sizes = var_1438_split_sizes_0, x = normed_57_cast_fp16)[name = string("op_1438_cast_fp16")];
	tensor<fp16, [1536]> layers_2_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309432064)))];
	tensor<fp16, [1, 1, 1536]> attn_output_17_cast_fp16 = mul(x = var_1438_cast_fp16_0, y = layers_2_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_17_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> x_41_cast_fp16 = add(x = x_31_cast_fp16, y = attn_output_17_cast_fp16)[name = string("x_41_cast_fp16")];
	int32 var_1447 = const()[name = string("op_1447"), val = int32(-1)];
	fp16 const_20_promoted_to_fp16 = const()[name = string("const_20_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_1449_cast_fp16 = mul(x = x_41_cast_fp16, y = const_20_promoted_to_fp16)[name = string("op_1449_cast_fp16")];
	bool input_59_interleave_0 = const()[name = string("input_59_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_59_cast_fp16 = concat(axis = var_1447, interleave = input_59_interleave_0, values = (x_41_cast_fp16, var_1449_cast_fp16))[name = string("input_59_cast_fp16")];
	tensor<int32, [1]> normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1444_to_fp16 = const()[name = string("op_1444_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_1444_to_fp16, x = input_59_cast_fp16)[name = string("normed_61_cast_fp16")];
	tensor<int32, [2]> var_1454_split_sizes_0 = const()[name = string("op_1454_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1454_axis_0 = const()[name = string("op_1454_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_1454_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1454_cast_fp16_1 = split(axis = var_1454_axis_0, split_sizes = var_1454_split_sizes_0, x = normed_61_cast_fp16)[name = string("op_1454_cast_fp16")];
	tensor<fp16, [1536]> layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309435200)))];
	tensor<fp16, [1, 1, 1536]> h_15_cast_fp16 = mul(x = var_1454_cast_fp16_0, y = layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_15_cast_fp16")];
	tensor<int32, [3]> var_1465 = const()[name = string("op_1465"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_61_axes_0 = const()[name = string("input_61_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_1466 = transpose(perm = var_1465, x = h_15_cast_fp16)[name = string("transpose_80")];
	tensor<fp16, [1, 1536, 1, 1]> input_61 = expand_dims(axes = input_61_axes_0, x = var_1466)[name = string("input_61")];
	string gate_9_pad_type_0 = const()[name = string("gate_9_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gate_9_strides_0 = const()[name = string("gate_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gate_9_pad_0 = const()[name = string("gate_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gate_9_dilations_0 = const()[name = string("gate_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gate_9_groups_0 = const()[name = string("gate_9_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> gate_9 = conv(dilations = gate_9_dilations_0, groups = gate_9_groups_0, pad = gate_9_pad_0, pad_type = gate_9_pad_type_0, strides = gate_9_strides_0, weight = layers_2_mlp_gate_proj_weight_palettized, x = input_61)[name = string("gate_9")];
	string up_5_pad_type_0 = const()[name = string("up_5_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> up_5_strides_0 = const()[name = string("up_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> up_5_pad_0 = const()[name = string("up_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> up_5_dilations_0 = const()[name = string("up_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 up_5_groups_0 = const()[name = string("up_5_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> up_5 = conv(dilations = up_5_dilations_0, groups = up_5_groups_0, pad = up_5_pad_0, pad_type = up_5_pad_type_0, strides = up_5_strides_0, weight = layers_2_mlp_up_proj_weight_palettized, x = input_61)[name = string("up_5")];
	string gate_11_mode_0 = const()[name = string("gate_11_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 1]> gate_11 = gelu(mode = gate_11_mode_0, x = gate_9)[name = string("gate_11")];
	tensor<fp16, [1, 12288, 1, 1]> input_63 = mul(x = gate_11, y = up_5)[name = string("input_63")];
	string mlp_out_5_pad_type_0 = const()[name = string("mlp_out_5_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_out_5_strides_0 = const()[name = string("mlp_out_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_out_5_pad_0 = const()[name = string("mlp_out_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_out_5_dilations_0 = const()[name = string("mlp_out_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_out_5_groups_0 = const()[name = string("mlp_out_5_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 1]> mlp_out_5 = conv(dilations = mlp_out_5_dilations_0, groups = mlp_out_5_groups_0, pad = mlp_out_5_pad_0, pad_type = mlp_out_5_pad_type_0, strides = mlp_out_5_strides_0, weight = layers_2_mlp_down_proj_weight_palettized, x = input_63)[name = string("mlp_out_5")];
	tensor<int32, [1]> var_1506_axes_0 = const()[name = string("op_1506_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_1506 = squeeze(axes = var_1506_axes_0, x = mlp_out_5)[name = string("op_1506")];
	tensor<int32, [3]> var_1510 = const()[name = string("op_1510"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_1516 = const()[name = string("op_1516"), val = int32(-1)];
	fp16 const_21_promoted = const()[name = string("const_21_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_43 = transpose(perm = var_1510, x = var_1506)[name = string("transpose_79")];
	tensor<fp16, [1, 1, 1536]> var_1518 = mul(x = x_43, y = const_21_promoted)[name = string("op_1518")];
	bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_65 = concat(axis = var_1516, interleave = input_65_interleave_0, values = (x_43, var_1518))[name = string("input_65")];
	tensor<int32, [1]> normed_65_axes_0 = const()[name = string("normed_65_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1513_to_fp16 = const()[name = string("op_1513_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_65_cast_fp16 = layer_norm(axes = normed_65_axes_0, epsilon = var_1513_to_fp16, x = input_65)[name = string("normed_65_cast_fp16")];
	tensor<int32, [2]> var_1523_split_sizes_0 = const()[name = string("op_1523_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1523_axis_0 = const()[name = string("op_1523_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_1523_0, tensor<fp16, [1, 1, 1536]> var_1523_1 = split(axis = var_1523_axis_0, split_sizes = var_1523_split_sizes_0, x = normed_65_cast_fp16)[name = string("op_1523")];
	tensor<fp16, [1, 1, 1536]> hidden_states_23 = mul(x = var_1523_0, y = layers_2_post_feedforward_layernorm_weight)[name = string("hidden_states_23")];
	tensor<fp16, [1, 1, 1536]> hidden_states_25_cast_fp16 = add(x = x_41_cast_fp16, y = hidden_states_23)[name = string("hidden_states_25_cast_fp16")];
	tensor<int32, [3]> per_layer_slice_5_begin_0 = const()[name = string("per_layer_slice_5_begin_0"), val = tensor<int32, [3]>([0, 0, 4352])];
	tensor<int32, [3]> per_layer_slice_5_end_0 = const()[name = string("per_layer_slice_5_end_0"), val = tensor<int32, [3]>([1, 1, 4608])];
	tensor<bool, [3]> per_layer_slice_5_end_mask_0 = const()[name = string("per_layer_slice_5_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 1, 256]> per_layer_slice_5_cast_fp16 = slice_by_index(begin = per_layer_slice_5_begin_0, end = per_layer_slice_5_end_0, end_mask = per_layer_slice_5_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_5_cast_fp16")];
	tensor<int32, [3]> var_1551 = const()[name = string("op_1551"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_67_axes_0 = const()[name = string("input_67_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_1552 = transpose(perm = var_1551, x = hidden_states_25_cast_fp16)[name = string("transpose_78")];
	tensor<fp16, [1, 1536, 1, 1]> input_67 = expand_dims(axes = input_67_axes_0, x = var_1552)[name = string("input_67")];
	string gated_13_pad_type_0 = const()[name = string("gated_13_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_13_strides_0 = const()[name = string("gated_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_13_pad_0 = const()[name = string("gated_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_13_dilations_0 = const()[name = string("gated_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_13_groups_0 = const()[name = string("gated_13_groups_0"), val = int32(1)];
	tensor<fp16, [1, 256, 1, 1]> gated_13 = conv(dilations = gated_13_dilations_0, groups = gated_13_groups_0, pad = gated_13_pad_0, pad_type = gated_13_pad_type_0, strides = gated_13_strides_0, weight = layers_2_per_layer_input_gate_weight_palettized, x = input_67)[name = string("gated_13")];
	string gated_15_mode_0 = const()[name = string("gated_15_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 256, 1, 1]> gated_15 = gelu(mode = gated_15_mode_0, x = gated_13)[name = string("gated_15")];
	tensor<int32, [3]> var_1571 = const()[name = string("op_1571"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> per_layer_slice_conv_5_axes_0 = const()[name = string("per_layer_slice_conv_5_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 256, 1]> var_1572_cast_fp16 = transpose(perm = var_1571, x = per_layer_slice_5_cast_fp16)[name = string("transpose_77")];
	tensor<fp16, [1, 256, 1, 1]> per_layer_slice_conv_5_cast_fp16 = expand_dims(axes = per_layer_slice_conv_5_axes_0, x = var_1572_cast_fp16)[name = string("per_layer_slice_conv_5_cast_fp16")];
	tensor<fp16, [1, 256, 1, 1]> input_69_cast_fp16 = mul(x = gated_15, y = per_layer_slice_conv_5_cast_fp16)[name = string("input_69_cast_fp16")];
	string gated_17_pad_type_0 = const()[name = string("gated_17_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_17_strides_0 = const()[name = string("gated_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_17_pad_0 = const()[name = string("gated_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_17_dilations_0 = const()[name = string("gated_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_17_groups_0 = const()[name = string("gated_17_groups_0"), val = int32(1)];
	tensor<fp16, [1536, 256, 1, 1]> layers_2_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309438336))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309635008))))[name = string("layers_2_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1536, 1, 1]> gated_17_cast_fp16 = conv(dilations = gated_17_dilations_0, groups = gated_17_groups_0, pad = gated_17_pad_0, pad_type = gated_17_pad_type_0, strides = gated_17_strides_0, weight = layers_2_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_69_cast_fp16)[name = string("gated_17_cast_fp16")];
	tensor<int32, [1]> var_1588_axes_0 = const()[name = string("op_1588_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_1588_cast_fp16 = squeeze(axes = var_1588_axes_0, x = gated_17_cast_fp16)[name = string("op_1588_cast_fp16")];
	tensor<int32, [3]> var_1592 = const()[name = string("op_1592"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_1598 = const()[name = string("op_1598"), val = int32(-1)];
	fp16 const_22_promoted_to_fp16 = const()[name = string("const_22_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_45_cast_fp16 = transpose(perm = var_1592, x = var_1588_cast_fp16)[name = string("transpose_76")];
	tensor<fp16, [1, 1, 1536]> var_1600_cast_fp16 = mul(x = x_45_cast_fp16, y = const_22_promoted_to_fp16)[name = string("op_1600_cast_fp16")];
	bool input_71_interleave_0 = const()[name = string("input_71_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_71_cast_fp16 = concat(axis = var_1598, interleave = input_71_interleave_0, values = (x_45_cast_fp16, var_1600_cast_fp16))[name = string("input_71_cast_fp16")];
	tensor<int32, [1]> normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1595_to_fp16 = const()[name = string("op_1595_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_1595_to_fp16, x = input_71_cast_fp16)[name = string("normed_69_cast_fp16")];
	tensor<int32, [2]> var_1605_split_sizes_0 = const()[name = string("op_1605_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1605_axis_0 = const()[name = string("op_1605_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_1605_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1605_cast_fp16_1 = split(axis = var_1605_axis_0, split_sizes = var_1605_split_sizes_0, x = normed_69_cast_fp16)[name = string("op_1605_cast_fp16")];
	tensor<fp16, [1536]> layers_2_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_2_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309636608)))];
	tensor<fp16, [1, 1, 1536]> hidden_states_29_cast_fp16 = mul(x = var_1605_cast_fp16_0, y = layers_2_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_29_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_31_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("hidden_states_31_cast_fp16")];
	tensor<fp16, [1]> const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.5p-1])];
	tensor<fp16, [1, 1, 1536]> x_47_cast_fp16 = mul(x = hidden_states_31_cast_fp16, y = const_23_promoted_to_fp16)[name = string("x_47_cast_fp16")];
	int32 var_1620 = const()[name = string("op_1620"), val = int32(-1)];
	fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_1622_cast_fp16 = mul(x = x_47_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_1622_cast_fp16")];
	bool input_73_interleave_0 = const()[name = string("input_73_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_73_cast_fp16 = concat(axis = var_1620, interleave = input_73_interleave_0, values = (x_47_cast_fp16, var_1622_cast_fp16))[name = string("input_73_cast_fp16")];
	tensor<int32, [1]> normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1617_to_fp16 = const()[name = string("op_1617_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_1617_to_fp16, x = input_73_cast_fp16)[name = string("normed_73_cast_fp16")];
	tensor<int32, [2]> var_1627_split_sizes_0 = const()[name = string("op_1627_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1627_axis_0 = const()[name = string("op_1627_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_1627_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1627_cast_fp16_1 = split(axis = var_1627_axis_0, split_sizes = var_1627_split_sizes_0, x = normed_73_cast_fp16)[name = string("op_1627_cast_fp16")];
	tensor<fp16, [1536]> layers_3_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309639744)))];
	tensor<fp16, [1, 1, 1536]> h_19_cast_fp16 = mul(x = var_1627_cast_fp16_0, y = layers_3_input_layernorm_weight_promoted_to_fp16)[name = string("h_19_cast_fp16")];
	tensor<int32, [3]> var_1633 = const()[name = string("op_1633"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_1636_axes_0 = const()[name = string("op_1636_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_1634_cast_fp16 = transpose(perm = var_1633, x = h_19_cast_fp16)[name = string("transpose_75")];
	tensor<fp16, [1, 1536, 1, 1]> var_1636_cast_fp16 = expand_dims(axes = var_1636_axes_0, x = var_1634_cast_fp16)[name = string("op_1636_cast_fp16")];
	string var_1652_pad_type_0 = const()[name = string("op_1652_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_1652_strides_0 = const()[name = string("op_1652_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_1652_pad_0 = const()[name = string("op_1652_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_1652_dilations_0 = const()[name = string("op_1652_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_1652_groups_0 = const()[name = string("op_1652_groups_0"), val = int32(1)];
	tensor<fp16, [1, 2048, 1, 1]> var_1652 = conv(dilations = var_1652_dilations_0, groups = var_1652_groups_0, pad = var_1652_pad_0, pad_type = var_1652_pad_type_0, strides = var_1652_strides_0, weight = layers_3_self_attn_q_proj_weight_palettized, x = var_1636_cast_fp16)[name = string("op_1652")];
	tensor<int32, [4]> var_1657 = const()[name = string("op_1657"), val = tensor<int32, [4]>([1, 8, 256, 1])];
	tensor<fp16, [1, 8, 256, 1]> var_1658 = reshape(shape = var_1657, x = var_1652)[name = string("op_1658")];
	tensor<int32, [4]> var_1663 = const()[name = string("op_1663"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	tensor<int32, [3]> var_1673 = const()[name = string("op_1673"), val = tensor<int32, [3]>([1, 8, 256])];
	tensor<fp16, [1, 8, 1, 256]> var_1664 = transpose(perm = var_1663, x = var_1658)[name = string("transpose_74")];
	tensor<fp16, [1, 8, 256]> x_49 = reshape(shape = var_1673, x = var_1664)[name = string("x_49")];
	int32 var_1679 = const()[name = string("op_1679"), val = int32(-1)];
	fp16 const_25_promoted = const()[name = string("const_25_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 256]> var_1681 = mul(x = x_49, y = const_25_promoted)[name = string("op_1681")];
	bool input_77_interleave_0 = const()[name = string("input_77_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512]> input_77 = concat(axis = var_1679, interleave = input_77_interleave_0, values = (x_49, var_1681))[name = string("input_77")];
	tensor<int32, [1]> normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1676_to_fp16 = const()[name = string("op_1676_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 8, 512]> normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_1676_to_fp16, x = input_77)[name = string("normed_77_cast_fp16")];
	tensor<int32, [2]> var_1686_split_sizes_0 = const()[name = string("op_1686_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_1686_axis_0 = const()[name = string("op_1686_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 256]> var_1686_0, tensor<fp16, [1, 8, 256]> var_1686_1 = split(axis = var_1686_axis_0, split_sizes = var_1686_split_sizes_0, x = normed_77_cast_fp16)[name = string("op_1686")];
	tensor<fp16, [1, 8, 256]> var_1688 = mul(x = var_1686_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_1688")];
	tensor<int32, [4]> var_1693 = const()[name = string("op_1693"), val = tensor<int32, [4]>([1, 8, 1, 256])];
	tensor<fp16, [1, 8, 1, 256]> q_21 = reshape(shape = var_1693, x = var_1688)[name = string("q_21")];
	tensor<fp16, [1, 8, 1, 256]> var_1695_cast_fp16 = mul(x = q_21, y = cos_s)[name = string("op_1695_cast_fp16")];
	tensor<int32, [2]> var_1696_split_sizes_0 = const()[name = string("op_1696_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_1696_axis_0 = const()[name = string("op_1696_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 128]> var_1696_0, tensor<fp16, [1, 8, 1, 128]> var_1696_1 = split(axis = var_1696_axis_0, split_sizes = var_1696_split_sizes_0, x = q_21)[name = string("op_1696")];
	fp16 const_26_promoted = const()[name = string("const_26_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 128]> var_1698 = mul(x = var_1696_1, y = const_26_promoted)[name = string("op_1698")];
	int32 var_1700 = const()[name = string("op_1700"), val = int32(-1)];
	bool var_1701_interleave_0 = const()[name = string("op_1701_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> var_1701 = concat(axis = var_1700, interleave = var_1701_interleave_0, values = (var_1698, var_1696_0))[name = string("op_1701")];
	tensor<fp16, [1, 8, 1, 256]> var_1702_cast_fp16 = mul(x = var_1701, y = sin_s)[name = string("op_1702_cast_fp16")];
	tensor<fp16, [1, 8, 1, 256]> q_23_cast_fp16 = add(x = var_1695_cast_fp16, y = var_1702_cast_fp16)[name = string("q_23_cast_fp16")];
	bool attn_weights_13_transpose_x_0 = const()[name = string("attn_weights_13_transpose_x_0"), val = bool(false)];
	bool attn_weights_13_transpose_y_0 = const()[name = string("attn_weights_13_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 512]> attn_weights_13_cast_fp16 = matmul(transpose_x = attn_weights_13_transpose_x_0, transpose_y = attn_weights_13_transpose_y_0, x = q_23_cast_fp16, y = transpose_40_cast_fp16)[name = string("attn_weights_13_cast_fp16")];
	tensor<fp16, [1, 8, 1, 512]> x_51_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask_sliding)[name = string("x_51_cast_fp16")];
	tensor<int32, [1]> reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor<int32, [1]>([-1])];
	bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 1, 1]> reduce_max_3 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_51_cast_fp16)[name = string("reduce_max_3")];
	tensor<fp16, [1, 8, 1, 512]> var_1734 = sub(x = x_51_cast_fp16, y = reduce_max_3)[name = string("op_1734")];
	tensor<fp16, [1, 8, 1, 512]> var_1740 = exp(x = var_1734)[name = string("op_1740")];
	tensor<int32, [1]> var_1750_axes_0 = const()[name = string("op_1750_axes_0"), val = tensor<int32, [1]>([-1])];
	bool var_1750_keep_dims_0 = const()[name = string("op_1750_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 1, 1]> var_1750 = reduce_sum(axes = var_1750_axes_0, keep_dims = var_1750_keep_dims_0, x = var_1740)[name = string("op_1750")];
	tensor<fp16, [1, 8, 1, 512]> var_1756_cast_fp16 = real_div(x = var_1740, y = var_1750)[name = string("op_1756_cast_fp16")];
	bool attn_output_19_transpose_x_0 = const()[name = string("attn_output_19_transpose_x_0"), val = bool(false)];
	bool attn_output_19_transpose_y_0 = const()[name = string("attn_output_19_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_0, transpose_y = attn_output_19_transpose_y_0, x = var_1756_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_19_cast_fp16")];
	tensor<int32, [4]> var_1767 = const()[name = string("op_1767"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_1774 = const()[name = string("op_1774"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 8, 256]> var_1768_cast_fp16 = transpose(perm = var_1767, x = attn_output_19_cast_fp16)[name = string("transpose_73")];
	tensor<fp16, [1, 1, 2048]> attn_output_21_cast_fp16 = reshape(shape = var_1774, x = var_1768_cast_fp16)[name = string("attn_output_21_cast_fp16")];
	tensor<int32, [3]> var_1779 = const()[name = string("op_1779"), val = tensor<int32, [3]>([0, 2, 1])];
	string var_1795_pad_type_0 = const()[name = string("op_1795_pad_type_0"), val = string("valid")];
	int32 var_1795_groups_0 = const()[name = string("op_1795_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_1795_strides_0 = const()[name = string("op_1795_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_1795_pad_0 = const()[name = string("op_1795_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_1795_dilations_0 = const()[name = string("op_1795_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1536, 2048, 1]> squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309642880))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311215808))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")];
	tensor<fp16, [1, 2048, 1]> var_1780_cast_fp16 = transpose(perm = var_1779, x = attn_output_21_cast_fp16)[name = string("transpose_72")];
	tensor<fp16, [1, 1536, 1]> var_1795_cast_fp16 = conv(dilations = var_1795_dilations_0, groups = var_1795_groups_0, pad = var_1795_pad_0, pad_type = var_1795_pad_type_0, strides = var_1795_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_1780_cast_fp16)[name = string("op_1795_cast_fp16")];
	tensor<int32, [3]> var_1799 = const()[name = string("op_1799"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_1805 = const()[name = string("op_1805"), val = int32(-1)];
	fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_55_cast_fp16 = transpose(perm = var_1799, x = var_1795_cast_fp16)[name = string("transpose_71")];
	tensor<fp16, [1, 1, 1536]> var_1807_cast_fp16 = mul(x = x_55_cast_fp16, y = const_27_promoted_to_fp16)[name = string("op_1807_cast_fp16")];
	bool input_81_interleave_0 = const()[name = string("input_81_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_81_cast_fp16 = concat(axis = var_1805, interleave = input_81_interleave_0, values = (x_55_cast_fp16, var_1807_cast_fp16))[name = string("input_81_cast_fp16")];
	tensor<int32, [1]> normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1802_to_fp16 = const()[name = string("op_1802_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_1802_to_fp16, x = input_81_cast_fp16)[name = string("normed_81_cast_fp16")];
	tensor<int32, [2]> var_1812_split_sizes_0 = const()[name = string("op_1812_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1812_axis_0 = const()[name = string("op_1812_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_1812_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1812_cast_fp16_1 = split(axis = var_1812_axis_0, split_sizes = var_1812_split_sizes_0, x = normed_81_cast_fp16)[name = string("op_1812_cast_fp16")];
	tensor<fp16, [1536]> layers_3_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311217408)))];
	tensor<fp16, [1, 1, 1536]> attn_output_23_cast_fp16 = mul(x = var_1812_cast_fp16_0, y = layers_3_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_23_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> x_57_cast_fp16 = add(x = x_47_cast_fp16, y = attn_output_23_cast_fp16)[name = string("x_57_cast_fp16")];
	int32 var_1821 = const()[name = string("op_1821"), val = int32(-1)];
	fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_1823_cast_fp16 = mul(x = x_57_cast_fp16, y = const_28_promoted_to_fp16)[name = string("op_1823_cast_fp16")];
	bool input_83_interleave_0 = const()[name = string("input_83_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_83_cast_fp16 = concat(axis = var_1821, interleave = input_83_interleave_0, values = (x_57_cast_fp16, var_1823_cast_fp16))[name = string("input_83_cast_fp16")];
	tensor<int32, [1]> normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1818_to_fp16 = const()[name = string("op_1818_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_1818_to_fp16, x = input_83_cast_fp16)[name = string("normed_85_cast_fp16")];
	tensor<int32, [2]> var_1828_split_sizes_0 = const()[name = string("op_1828_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1828_axis_0 = const()[name = string("op_1828_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_1828_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1828_cast_fp16_1 = split(axis = var_1828_axis_0, split_sizes = var_1828_split_sizes_0, x = normed_85_cast_fp16)[name = string("op_1828_cast_fp16")];
	tensor<fp16, [1536]> layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311220544)))];
	tensor<fp16, [1, 1, 1536]> h_21_cast_fp16 = mul(x = var_1828_cast_fp16_0, y = layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_21_cast_fp16")];
	tensor<int32, [3]> var_1839 = const()[name = string("op_1839"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_85_axes_0 = const()[name = string("input_85_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_1840 = transpose(perm = var_1839, x = h_21_cast_fp16)[name = string("transpose_70")];
	tensor<fp16, [1, 1536, 1, 1]> input_85 = expand_dims(axes = input_85_axes_0, x = var_1840)[name = string("input_85")];
	string gate_13_pad_type_0 = const()[name = string("gate_13_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gate_13_strides_0 = const()[name = string("gate_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gate_13_pad_0 = const()[name = string("gate_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gate_13_dilations_0 = const()[name = string("gate_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gate_13_groups_0 = const()[name = string("gate_13_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> gate_13 = conv(dilations = gate_13_dilations_0, groups = gate_13_groups_0, pad = gate_13_pad_0, pad_type = gate_13_pad_type_0, strides = gate_13_strides_0, weight = layers_3_mlp_gate_proj_weight_palettized, x = input_85)[name = string("gate_13")];
	string up_7_pad_type_0 = const()[name = string("up_7_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> up_7_strides_0 = const()[name = string("up_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> up_7_pad_0 = const()[name = string("up_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> up_7_dilations_0 = const()[name = string("up_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 up_7_groups_0 = const()[name = string("up_7_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> up_7 = conv(dilations = up_7_dilations_0, groups = up_7_groups_0, pad = up_7_pad_0, pad_type = up_7_pad_type_0, strides = up_7_strides_0, weight = layers_3_mlp_up_proj_weight_palettized, x = input_85)[name = string("up_7")];
	string gate_15_mode_0 = const()[name = string("gate_15_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 1]> gate_15 = gelu(mode = gate_15_mode_0, x = gate_13)[name = string("gate_15")];
	tensor<fp16, [1, 12288, 1, 1]> input_87 = mul(x = gate_15, y = up_7)[name = string("input_87")];
	string mlp_out_7_pad_type_0 = const()[name = string("mlp_out_7_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_out_7_strides_0 = const()[name = string("mlp_out_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_out_7_pad_0 = const()[name = string("mlp_out_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_out_7_dilations_0 = const()[name = string("mlp_out_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_out_7_groups_0 = const()[name = string("mlp_out_7_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 1]> mlp_out_7 = conv(dilations = mlp_out_7_dilations_0, groups = mlp_out_7_groups_0, pad = mlp_out_7_pad_0, pad_type = mlp_out_7_pad_type_0, strides = mlp_out_7_strides_0, weight = layers_3_mlp_down_proj_weight_palettized, x = input_87)[name = string("mlp_out_7")];
	tensor<int32, [1]> var_1880_axes_0 = const()[name = string("op_1880_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_1880 = squeeze(axes = var_1880_axes_0, x = mlp_out_7)[name = string("op_1880")];
	tensor<int32, [3]> var_1884 = const()[name = string("op_1884"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_1890 = const()[name = string("op_1890"), val = int32(-1)];
	fp16 const_29_promoted = const()[name = string("const_29_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_59 = transpose(perm = var_1884, x = var_1880)[name = string("transpose_69")];
	tensor<fp16, [1, 1, 1536]> var_1892 = mul(x = x_59, y = const_29_promoted)[name = string("op_1892")];
	bool input_89_interleave_0 = const()[name = string("input_89_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_89 = concat(axis = var_1890, interleave = input_89_interleave_0, values = (x_59, var_1892))[name = string("input_89")];
	tensor<int32, [1]> normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1887_to_fp16 = const()[name = string("op_1887_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_1887_to_fp16, x = input_89)[name = string("normed_89_cast_fp16")];
	tensor<int32, [2]> var_1897_split_sizes_0 = const()[name = string("op_1897_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1897_axis_0 = const()[name = string("op_1897_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_1897_0, tensor<fp16, [1, 1, 1536]> var_1897_1 = split(axis = var_1897_axis_0, split_sizes = var_1897_split_sizes_0, x = normed_89_cast_fp16)[name = string("op_1897")];
	tensor<fp16, [1, 1, 1536]> hidden_states_33 = mul(x = var_1897_0, y = layers_3_post_feedforward_layernorm_weight)[name = string("hidden_states_33")];
	tensor<fp16, [1, 1, 1536]> hidden_states_35_cast_fp16 = add(x = x_57_cast_fp16, y = hidden_states_33)[name = string("hidden_states_35_cast_fp16")];
	tensor<int32, [3]> per_layer_slice_7_begin_0 = const()[name = string("per_layer_slice_7_begin_0"), val = tensor<int32, [3]>([0, 0, 4608])];
	tensor<int32, [3]> per_layer_slice_7_end_0 = const()[name = string("per_layer_slice_7_end_0"), val = tensor<int32, [3]>([1, 1, 4864])];
	tensor<bool, [3]> per_layer_slice_7_end_mask_0 = const()[name = string("per_layer_slice_7_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 1, 256]> per_layer_slice_7_cast_fp16 = slice_by_index(begin = per_layer_slice_7_begin_0, end = per_layer_slice_7_end_0, end_mask = per_layer_slice_7_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_7_cast_fp16")];
	tensor<int32, [3]> var_1925 = const()[name = string("op_1925"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_91_axes_0 = const()[name = string("input_91_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_1926 = transpose(perm = var_1925, x = hidden_states_35_cast_fp16)[name = string("transpose_68")];
	tensor<fp16, [1, 1536, 1, 1]> input_91 = expand_dims(axes = input_91_axes_0, x = var_1926)[name = string("input_91")];
	string gated_19_pad_type_0 = const()[name = string("gated_19_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_19_strides_0 = const()[name = string("gated_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_19_pad_0 = const()[name = string("gated_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_19_dilations_0 = const()[name = string("gated_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_19_groups_0 = const()[name = string("gated_19_groups_0"), val = int32(1)];
	tensor<fp16, [1, 256, 1, 1]> gated_19 = conv(dilations = gated_19_dilations_0, groups = gated_19_groups_0, pad = gated_19_pad_0, pad_type = gated_19_pad_type_0, strides = gated_19_strides_0, weight = layers_3_per_layer_input_gate_weight_palettized, x = input_91)[name = string("gated_19")];
	string gated_21_mode_0 = const()[name = string("gated_21_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 256, 1, 1]> gated_21 = gelu(mode = gated_21_mode_0, x = gated_19)[name = string("gated_21")];
	tensor<int32, [3]> var_1945 = const()[name = string("op_1945"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> per_layer_slice_conv_7_axes_0 = const()[name = string("per_layer_slice_conv_7_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 256, 1]> var_1946_cast_fp16 = transpose(perm = var_1945, x = per_layer_slice_7_cast_fp16)[name = string("transpose_67")];
	tensor<fp16, [1, 256, 1, 1]> per_layer_slice_conv_7_cast_fp16 = expand_dims(axes = per_layer_slice_conv_7_axes_0, x = var_1946_cast_fp16)[name = string("per_layer_slice_conv_7_cast_fp16")];
	tensor<fp16, [1, 256, 1, 1]> input_93_cast_fp16 = mul(x = gated_21, y = per_layer_slice_conv_7_cast_fp16)[name = string("input_93_cast_fp16")];
	string gated_23_pad_type_0 = const()[name = string("gated_23_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_23_strides_0 = const()[name = string("gated_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_23_pad_0 = const()[name = string("gated_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_23_dilations_0 = const()[name = string("gated_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_23_groups_0 = const()[name = string("gated_23_groups_0"), val = int32(1)];
	tensor<fp16, [1536, 256, 1, 1]> layers_3_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311223680))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311420352))))[name = string("layers_3_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1536, 1, 1]> gated_23_cast_fp16 = conv(dilations = gated_23_dilations_0, groups = gated_23_groups_0, pad = gated_23_pad_0, pad_type = gated_23_pad_type_0, strides = gated_23_strides_0, weight = layers_3_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_93_cast_fp16)[name = string("gated_23_cast_fp16")];
	tensor<int32, [1]> var_1962_axes_0 = const()[name = string("op_1962_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_1962_cast_fp16 = squeeze(axes = var_1962_axes_0, x = gated_23_cast_fp16)[name = string("op_1962_cast_fp16")];
	tensor<int32, [3]> var_1966 = const()[name = string("op_1966"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_1972 = const()[name = string("op_1972"), val = int32(-1)];
	fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_61_cast_fp16 = transpose(perm = var_1966, x = var_1962_cast_fp16)[name = string("transpose_66")];
	tensor<fp16, [1, 1, 1536]> var_1974_cast_fp16 = mul(x = x_61_cast_fp16, y = const_30_promoted_to_fp16)[name = string("op_1974_cast_fp16")];
	bool input_95_interleave_0 = const()[name = string("input_95_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_95_cast_fp16 = concat(axis = var_1972, interleave = input_95_interleave_0, values = (x_61_cast_fp16, var_1974_cast_fp16))[name = string("input_95_cast_fp16")];
	tensor<int32, [1]> normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1969_to_fp16 = const()[name = string("op_1969_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_1969_to_fp16, x = input_95_cast_fp16)[name = string("normed_93_cast_fp16")];
	tensor<int32, [2]> var_1979_split_sizes_0 = const()[name = string("op_1979_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1979_axis_0 = const()[name = string("op_1979_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_1979_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1979_cast_fp16_1 = split(axis = var_1979_axis_0, split_sizes = var_1979_split_sizes_0, x = normed_93_cast_fp16)[name = string("op_1979_cast_fp16")];
	tensor<fp16, [1536]> layers_3_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_3_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311421952)))];
	tensor<fp16, [1, 1, 1536]> hidden_states_39_cast_fp16 = mul(x = var_1979_cast_fp16_0, y = layers_3_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_39_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_41_cast_fp16 = add(x = hidden_states_35_cast_fp16, y = hidden_states_39_cast_fp16)[name = string("hidden_states_41_cast_fp16")];
	tensor<fp16, [1]> const_31_promoted_to_fp16 = const()[name = string("const_31_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.34p-1])];
	tensor<fp16, [1, 1, 1536]> x_63_cast_fp16 = mul(x = hidden_states_41_cast_fp16, y = const_31_promoted_to_fp16)[name = string("x_63_cast_fp16")];
	int32 var_1994 = const()[name = string("op_1994"), val = int32(-1)];
	fp16 const_32_promoted_to_fp16 = const()[name = string("const_32_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_1996_cast_fp16 = mul(x = x_63_cast_fp16, y = const_32_promoted_to_fp16)[name = string("op_1996_cast_fp16")];
	bool input_97_interleave_0 = const()[name = string("input_97_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_97_cast_fp16 = concat(axis = var_1994, interleave = input_97_interleave_0, values = (x_63_cast_fp16, var_1996_cast_fp16))[name = string("input_97_cast_fp16")];
	tensor<int32, [1]> normed_97_axes_0 = const()[name = string("normed_97_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1991_to_fp16 = const()[name = string("op_1991_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_97_cast_fp16 = layer_norm(axes = normed_97_axes_0, epsilon = var_1991_to_fp16, x = input_97_cast_fp16)[name = string("normed_97_cast_fp16")];
	tensor<int32, [2]> var_2001_split_sizes_0 = const()[name = string("op_2001_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2001_axis_0 = const()[name = string("op_2001_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_2001_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2001_cast_fp16_1 = split(axis = var_2001_axis_0, split_sizes = var_2001_split_sizes_0, x = normed_97_cast_fp16)[name = string("op_2001_cast_fp16")];
	tensor<fp16, [1536]> layers_4_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311425088)))];
	tensor<fp16, [1, 1, 1536]> h_25_cast_fp16 = mul(x = var_2001_cast_fp16_0, y = layers_4_input_layernorm_weight_promoted_to_fp16)[name = string("h_25_cast_fp16")];
	tensor<int32, [3]> var_2007 = const()[name = string("op_2007"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_2010_axes_0 = const()[name = string("op_2010_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_2008_cast_fp16 = transpose(perm = var_2007, x = h_25_cast_fp16)[name = string("transpose_65")];
	tensor<fp16, [1, 1536, 1, 1]> var_2010_cast_fp16 = expand_dims(axes = var_2010_axes_0, x = var_2008_cast_fp16)[name = string("op_2010_cast_fp16")];
	string var_2026_pad_type_0 = const()[name = string("op_2026_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_2026_strides_0 = const()[name = string("op_2026_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_2026_pad_0 = const()[name = string("op_2026_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_2026_dilations_0 = const()[name = string("op_2026_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_2026_groups_0 = const()[name = string("op_2026_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4096, 1, 1]> var_2026 = conv(dilations = var_2026_dilations_0, groups = var_2026_groups_0, pad = var_2026_pad_0, pad_type = var_2026_pad_type_0, strides = var_2026_strides_0, weight = layers_4_self_attn_q_proj_weight_palettized, x = var_2010_cast_fp16)[name = string("op_2026")];
	tensor<int32, [4]> var_2031 = const()[name = string("op_2031"), val = tensor<int32, [4]>([1, 8, 512, 1])];
	tensor<fp16, [1, 8, 512, 1]> var_2032 = reshape(shape = var_2031, x = var_2026)[name = string("op_2032")];
	tensor<int32, [4]> var_2037 = const()[name = string("op_2037"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	tensor<int32, [3]> var_2047 = const()[name = string("op_2047"), val = tensor<int32, [3]>([1, 8, 512])];
	tensor<fp16, [1, 8, 1, 512]> var_2038 = transpose(perm = var_2037, x = var_2032)[name = string("transpose_64")];
	tensor<fp16, [1, 8, 512]> x_65 = reshape(shape = var_2047, x = var_2038)[name = string("x_65")];
	int32 var_2053 = const()[name = string("op_2053"), val = int32(-1)];
	fp16 const_33_promoted = const()[name = string("const_33_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 512]> var_2055 = mul(x = x_65, y = const_33_promoted)[name = string("op_2055")];
	bool input_101_interleave_0 = const()[name = string("input_101_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1024]> input_101 = concat(axis = var_2053, interleave = input_101_interleave_0, values = (x_65, var_2055))[name = string("input_101")];
	tensor<int32, [1]> normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2050_to_fp16 = const()[name = string("op_2050_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 8, 1024]> normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_2050_to_fp16, x = input_101)[name = string("normed_101_cast_fp16")];
	tensor<int32, [2]> var_2060_split_sizes_0 = const()[name = string("op_2060_split_sizes_0"), val = tensor<int32, [2]>([512, 512])];
	int32 var_2060_axis_0 = const()[name = string("op_2060_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 512]> var_2060_0, tensor<fp16, [1, 8, 512]> var_2060_1 = split(axis = var_2060_axis_0, split_sizes = var_2060_split_sizes_0, x = normed_101_cast_fp16)[name = string("op_2060")];
	tensor<fp16, [1, 8, 512]> var_2062 = mul(x = var_2060_0, y = layers_4_self_attn_q_norm_weight)[name = string("op_2062")];
	tensor<int32, [4]> var_2067 = const()[name = string("op_2067"), val = tensor<int32, [4]>([1, 8, 1, 512])];
	tensor<fp16, [1, 8, 1, 512]> q_27 = reshape(shape = var_2067, x = var_2062)[name = string("q_27")];
	tensor<fp16, [1, 8, 1, 512]> var_2069_cast_fp16 = mul(x = q_27, y = cos_f)[name = string("op_2069_cast_fp16")];
	tensor<int32, [2]> var_2070_split_sizes_0 = const()[name = string("op_2070_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_2070_axis_0 = const()[name = string("op_2070_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 256]> var_2070_0, tensor<fp16, [1, 8, 1, 256]> var_2070_1 = split(axis = var_2070_axis_0, split_sizes = var_2070_split_sizes_0, x = q_27)[name = string("op_2070")];
	fp16 const_34_promoted = const()[name = string("const_34_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 256]> var_2072 = mul(x = var_2070_1, y = const_34_promoted)[name = string("op_2072")];
	int32 var_2074 = const()[name = string("op_2074"), val = int32(-1)];
	bool var_2075_interleave_0 = const()[name = string("op_2075_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 512]> var_2075 = concat(axis = var_2074, interleave = var_2075_interleave_0, values = (var_2072, var_2070_0))[name = string("op_2075")];
	tensor<fp16, [1, 8, 1, 512]> var_2076_cast_fp16 = mul(x = var_2075, y = sin_f)[name = string("op_2076_cast_fp16")];
	tensor<fp16, [1, 8, 1, 512]> q_29_cast_fp16 = add(x = var_2069_cast_fp16, y = var_2076_cast_fp16)[name = string("q_29_cast_fp16")];
	tensor<int32, [4]> transpose_16_perm_0 = const()[name = string("transpose_16_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	tensor<int32, [4]> tile_8_reps_0 = const()[name = string("tile_8_reps_0"), val = tensor<int32, [4]>([8, 1, 1, 1])];
	tensor<fp16, [1, 1, 2048, 512]> transpose_16_cast_fp16 = transpose(perm = transpose_16_perm_0, x = kv14_k)[name = string("transpose_63")];
	tensor<fp16, [8, 1, 2048, 512]> tile_8_cast_fp16 = tile(reps = tile_8_reps_0, x = transpose_16_cast_fp16)[name = string("tile_8_cast_fp16")];
	tensor<int32, [5]> concat_16 = const()[name = string("concat_16"), val = tensor<int32, [5]>([8, 1, 1, 2048, 512])];
	tensor<fp16, [8, 1, 1, 2048, 512]> reshape_16_cast_fp16 = reshape(shape = concat_16, x = tile_8_cast_fp16)[name = string("reshape_16_cast_fp16")];
	tensor<int32, [5]> transpose_17_perm_0 = const()[name = string("transpose_17_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])];
	tensor<int32, [4]> concat_17 = const()[name = string("concat_17"), val = tensor<int32, [4]>([-1, 1, 2048, 512])];
	tensor<fp16, [1, 8, 1, 2048, 512]> transpose_17_cast_fp16 = transpose(perm = transpose_17_perm_0, x = reshape_16_cast_fp16)[name = string("transpose_62")];
	tensor<fp16, [8, 1, 2048, 512]> reshape_17_cast_fp16 = reshape(shape = concat_17, x = transpose_17_cast_fp16)[name = string("reshape_17_cast_fp16")];
	tensor<int32, [4]> transpose_44_perm_0 = const()[name = string("transpose_44_perm_0"), val = tensor<int32, [4]>([1, 0, -1, -2])];
	tensor<int32, [4]> transpose_18_perm_0 = const()[name = string("transpose_18_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	tensor<int32, [4]> tile_9_reps_0 = const()[name = string("tile_9_reps_0"), val = tensor<int32, [4]>([8, 1, 1, 1])];
	tensor<fp16, [1, 1, 2048, 512]> transpose_18_cast_fp16 = transpose(perm = transpose_18_perm_0, x = kv14_v)[name = string("transpose_61")];
	tensor<fp16, [8, 1, 2048, 512]> tile_9_cast_fp16 = tile(reps = tile_9_reps_0, x = transpose_18_cast_fp16)[name = string("tile_9_cast_fp16")];
	tensor<int32, [5]> concat_18 = const()[name = string("concat_18"), val = tensor<int32, [5]>([8, 1, 1, 2048, 512])];
	tensor<fp16, [8, 1, 1, 2048, 512]> reshape_18_cast_fp16 = reshape(shape = concat_18, x = tile_9_cast_fp16)[name = string("reshape_18_cast_fp16")];
	tensor<int32, [5]> transpose_19_perm_0 = const()[name = string("transpose_19_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])];
	tensor<int32, [4]> concat_19 = const()[name = string("concat_19"), val = tensor<int32, [4]>([-1, 1, 2048, 512])];
	tensor<fp16, [1, 8, 1, 2048, 512]> transpose_19_cast_fp16 = transpose(perm = transpose_19_perm_0, x = reshape_18_cast_fp16)[name = string("transpose_60")];
	tensor<fp16, [8, 1, 2048, 512]> reshape_19_cast_fp16 = reshape(shape = concat_19, x = transpose_19_cast_fp16)[name = string("reshape_19_cast_fp16")];
	tensor<int32, [4]> V_expanded_9_perm_0 = const()[name = string("V_expanded_9_perm_0"), val = tensor<int32, [4]>([1, 0, -2, -1])];
	bool attn_weights_17_transpose_x_0 = const()[name = string("attn_weights_17_transpose_x_0"), val = bool(false)];
	bool attn_weights_17_transpose_y_0 = const()[name = string("attn_weights_17_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 2048]> transpose_44_cast_fp16 = transpose(perm = transpose_44_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_59")];
	tensor<fp16, [1, 8, 1, 2048]> attn_weights_17_cast_fp16 = matmul(transpose_x = attn_weights_17_transpose_x_0, transpose_y = attn_weights_17_transpose_y_0, x = q_29_cast_fp16, y = transpose_44_cast_fp16)[name = string("attn_weights_17_cast_fp16")];
	tensor<fp16, [1, 8, 1, 2048]> x_67_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask_full)[name = string("x_67_cast_fp16")];
	tensor<int32, [1]> reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor<int32, [1]>([-1])];
	bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 1, 1]> reduce_max_4 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_67_cast_fp16)[name = string("reduce_max_4")];
	tensor<fp16, [1, 8, 1, 2048]> var_2108 = sub(x = x_67_cast_fp16, y = reduce_max_4)[name = string("op_2108")];
	tensor<fp16, [1, 8, 1, 2048]> var_2114 = exp(x = var_2108)[name = string("op_2114")];
	tensor<int32, [1]> var_2124_axes_0 = const()[name = string("op_2124_axes_0"), val = tensor<int32, [1]>([-1])];
	bool var_2124_keep_dims_0 = const()[name = string("op_2124_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 1, 1]> var_2124 = reduce_sum(axes = var_2124_axes_0, keep_dims = var_2124_keep_dims_0, x = var_2114)[name = string("op_2124")];
	tensor<fp16, [1, 8, 1, 2048]> var_2130_cast_fp16 = real_div(x = var_2114, y = var_2124)[name = string("op_2130_cast_fp16")];
	bool attn_output_25_transpose_x_0 = const()[name = string("attn_output_25_transpose_x_0"), val = bool(false)];
	bool attn_output_25_transpose_y_0 = const()[name = string("attn_output_25_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 2048, 512]> V_expanded_9_cast_fp16 = transpose(perm = V_expanded_9_perm_0, x = reshape_19_cast_fp16)[name = string("transpose_58")];
	tensor<fp16, [1, 8, 1, 512]> attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = var_2130_cast_fp16, y = V_expanded_9_cast_fp16)[name = string("attn_output_25_cast_fp16")];
	tensor<int32, [4]> var_2141 = const()[name = string("op_2141"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_2148 = const()[name = string("op_2148"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 8, 512]> var_2142_cast_fp16 = transpose(perm = var_2141, x = attn_output_25_cast_fp16)[name = string("transpose_57")];
	tensor<fp16, [1, 1, 4096]> attn_output_27_cast_fp16 = reshape(shape = var_2148, x = var_2142_cast_fp16)[name = string("attn_output_27_cast_fp16")];
	tensor<int32, [3]> var_2153 = const()[name = string("op_2153"), val = tensor<int32, [3]>([0, 2, 1])];
	string var_2169_pad_type_0 = const()[name = string("op_2169_pad_type_0"), val = string("valid")];
	int32 var_2169_groups_0 = const()[name = string("op_2169_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_2169_strides_0 = const()[name = string("op_2169_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_2169_pad_0 = const()[name = string("op_2169_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_2169_dilations_0 = const()[name = string("op_2169_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1536, 4096, 1]> squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 4096, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311428224))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314574016))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")];
	tensor<fp16, [1, 4096, 1]> var_2154_cast_fp16 = transpose(perm = var_2153, x = attn_output_27_cast_fp16)[name = string("transpose_56")];
	tensor<fp16, [1, 1536, 1]> var_2169_cast_fp16 = conv(dilations = var_2169_dilations_0, groups = var_2169_groups_0, pad = var_2169_pad_0, pad_type = var_2169_pad_type_0, strides = var_2169_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_2154_cast_fp16)[name = string("op_2169_cast_fp16")];
	tensor<int32, [3]> var_2173 = const()[name = string("op_2173"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_2179 = const()[name = string("op_2179"), val = int32(-1)];
	fp16 const_35_promoted_to_fp16 = const()[name = string("const_35_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_71_cast_fp16 = transpose(perm = var_2173, x = var_2169_cast_fp16)[name = string("transpose_55")];
	tensor<fp16, [1, 1, 1536]> var_2181_cast_fp16 = mul(x = x_71_cast_fp16, y = const_35_promoted_to_fp16)[name = string("op_2181_cast_fp16")];
	bool input_105_interleave_0 = const()[name = string("input_105_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_105_cast_fp16 = concat(axis = var_2179, interleave = input_105_interleave_0, values = (x_71_cast_fp16, var_2181_cast_fp16))[name = string("input_105_cast_fp16")];
	tensor<int32, [1]> normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2176_to_fp16 = const()[name = string("op_2176_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_2176_to_fp16, x = input_105_cast_fp16)[name = string("normed_105_cast_fp16")];
	tensor<int32, [2]> var_2186_split_sizes_0 = const()[name = string("op_2186_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2186_axis_0 = const()[name = string("op_2186_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_2186_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2186_cast_fp16_1 = split(axis = var_2186_axis_0, split_sizes = var_2186_split_sizes_0, x = normed_105_cast_fp16)[name = string("op_2186_cast_fp16")];
	tensor<fp16, [1536]> layers_4_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314575616)))];
	tensor<fp16, [1, 1, 1536]> attn_output_29_cast_fp16 = mul(x = var_2186_cast_fp16_0, y = layers_4_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_29_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> x_73_cast_fp16 = add(x = x_63_cast_fp16, y = attn_output_29_cast_fp16)[name = string("x_73_cast_fp16")];
	int32 var_2195 = const()[name = string("op_2195"), val = int32(-1)];
	fp16 const_36_promoted_to_fp16 = const()[name = string("const_36_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_2197_cast_fp16 = mul(x = x_73_cast_fp16, y = const_36_promoted_to_fp16)[name = string("op_2197_cast_fp16")];
	bool input_107_interleave_0 = const()[name = string("input_107_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_107_cast_fp16 = concat(axis = var_2195, interleave = input_107_interleave_0, values = (x_73_cast_fp16, var_2197_cast_fp16))[name = string("input_107_cast_fp16")];
	tensor<int32, [1]> normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2192_to_fp16 = const()[name = string("op_2192_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_2192_to_fp16, x = input_107_cast_fp16)[name = string("normed_109_cast_fp16")];
	tensor<int32, [2]> var_2202_split_sizes_0 = const()[name = string("op_2202_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2202_axis_0 = const()[name = string("op_2202_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_2202_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2202_cast_fp16_1 = split(axis = var_2202_axis_0, split_sizes = var_2202_split_sizes_0, x = normed_109_cast_fp16)[name = string("op_2202_cast_fp16")];
	tensor<fp16, [1536]> layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314578752)))];
	tensor<fp16, [1, 1, 1536]> h_27_cast_fp16 = mul(x = var_2202_cast_fp16_0, y = layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_27_cast_fp16")];
	tensor<int32, [3]> var_2213 = const()[name = string("op_2213"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_109_axes_0 = const()[name = string("input_109_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_2214 = transpose(perm = var_2213, x = h_27_cast_fp16)[name = string("transpose_54")];
	tensor<fp16, [1, 1536, 1, 1]> input_109 = expand_dims(axes = input_109_axes_0, x = var_2214)[name = string("input_109")];
	string gate_17_pad_type_0 = const()[name = string("gate_17_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gate_17_strides_0 = const()[name = string("gate_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gate_17_pad_0 = const()[name = string("gate_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gate_17_dilations_0 = const()[name = string("gate_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gate_17_groups_0 = const()[name = string("gate_17_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> gate_17 = conv(dilations = gate_17_dilations_0, groups = gate_17_groups_0, pad = gate_17_pad_0, pad_type = gate_17_pad_type_0, strides = gate_17_strides_0, weight = layers_4_mlp_gate_proj_weight_palettized, x = input_109)[name = string("gate_17")];
	string up_9_pad_type_0 = const()[name = string("up_9_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> up_9_strides_0 = const()[name = string("up_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> up_9_pad_0 = const()[name = string("up_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> up_9_dilations_0 = const()[name = string("up_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 up_9_groups_0 = const()[name = string("up_9_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> up_9 = conv(dilations = up_9_dilations_0, groups = up_9_groups_0, pad = up_9_pad_0, pad_type = up_9_pad_type_0, strides = up_9_strides_0, weight = layers_4_mlp_up_proj_weight_palettized, x = input_109)[name = string("up_9")];
	string gate_19_mode_0 = const()[name = string("gate_19_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 1]> gate_19 = gelu(mode = gate_19_mode_0, x = gate_17)[name = string("gate_19")];
	tensor<fp16, [1, 12288, 1, 1]> input_111 = mul(x = gate_19, y = up_9)[name = string("input_111")];
	string mlp_out_9_pad_type_0 = const()[name = string("mlp_out_9_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_out_9_strides_0 = const()[name = string("mlp_out_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_out_9_pad_0 = const()[name = string("mlp_out_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_out_9_dilations_0 = const()[name = string("mlp_out_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_out_9_groups_0 = const()[name = string("mlp_out_9_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 1]> mlp_out_9 = conv(dilations = mlp_out_9_dilations_0, groups = mlp_out_9_groups_0, pad = mlp_out_9_pad_0, pad_type = mlp_out_9_pad_type_0, strides = mlp_out_9_strides_0, weight = layers_4_mlp_down_proj_weight_palettized, x = input_111)[name = string("mlp_out_9")];
	tensor<int32, [1]> var_2254_axes_0 = const()[name = string("op_2254_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_2254 = squeeze(axes = var_2254_axes_0, x = mlp_out_9)[name = string("op_2254")];
	tensor<int32, [3]> var_2258 = const()[name = string("op_2258"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_2264 = const()[name = string("op_2264"), val = int32(-1)];
	fp16 const_37_promoted = const()[name = string("const_37_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_75 = transpose(perm = var_2258, x = var_2254)[name = string("transpose_53")];
	tensor<fp16, [1, 1, 1536]> var_2266 = mul(x = x_75, y = const_37_promoted)[name = string("op_2266")];
	bool input_113_interleave_0 = const()[name = string("input_113_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_113 = concat(axis = var_2264, interleave = input_113_interleave_0, values = (x_75, var_2266))[name = string("input_113")];
	tensor<int32, [1]> normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2261_to_fp16 = const()[name = string("op_2261_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_2261_to_fp16, x = input_113)[name = string("normed_113_cast_fp16")];
	tensor<int32, [2]> var_2271_split_sizes_0 = const()[name = string("op_2271_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2271_axis_0 = const()[name = string("op_2271_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_2271_0, tensor<fp16, [1, 1, 1536]> var_2271_1 = split(axis = var_2271_axis_0, split_sizes = var_2271_split_sizes_0, x = normed_113_cast_fp16)[name = string("op_2271")];
	tensor<fp16, [1, 1, 1536]> hidden_states_43 = mul(x = var_2271_0, y = layers_4_post_feedforward_layernorm_weight)[name = string("hidden_states_43")];
	tensor<fp16, [1, 1, 1536]> hidden_states_45_cast_fp16 = add(x = x_73_cast_fp16, y = hidden_states_43)[name = string("hidden_states_45_cast_fp16")];
	tensor<int32, [3]> per_layer_slice_9_begin_0 = const()[name = string("per_layer_slice_9_begin_0"), val = tensor<int32, [3]>([0, 0, 4864])];
	tensor<int32, [3]> per_layer_slice_9_end_0 = const()[name = string("per_layer_slice_9_end_0"), val = tensor<int32, [3]>([1, 1, 5120])];
	tensor<bool, [3]> per_layer_slice_9_end_mask_0 = const()[name = string("per_layer_slice_9_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 1, 256]> per_layer_slice_9_cast_fp16 = slice_by_index(begin = per_layer_slice_9_begin_0, end = per_layer_slice_9_end_0, end_mask = per_layer_slice_9_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_9_cast_fp16")];
	tensor<int32, [3]> var_2299 = const()[name = string("op_2299"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_115_axes_0 = const()[name = string("input_115_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_2300 = transpose(perm = var_2299, x = hidden_states_45_cast_fp16)[name = string("transpose_52")];
	tensor<fp16, [1, 1536, 1, 1]> input_115 = expand_dims(axes = input_115_axes_0, x = var_2300)[name = string("input_115")];
	string gated_25_pad_type_0 = const()[name = string("gated_25_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_25_strides_0 = const()[name = string("gated_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_25_pad_0 = const()[name = string("gated_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_25_dilations_0 = const()[name = string("gated_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_25_groups_0 = const()[name = string("gated_25_groups_0"), val = int32(1)];
	tensor<fp16, [1, 256, 1, 1]> gated_25 = conv(dilations = gated_25_dilations_0, groups = gated_25_groups_0, pad = gated_25_pad_0, pad_type = gated_25_pad_type_0, strides = gated_25_strides_0, weight = layers_4_per_layer_input_gate_weight_palettized, x = input_115)[name = string("gated_25")];
	string gated_27_mode_0 = const()[name = string("gated_27_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 256, 1, 1]> gated_27 = gelu(mode = gated_27_mode_0, x = gated_25)[name = string("gated_27")];
	tensor<int32, [3]> var_2319 = const()[name = string("op_2319"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> per_layer_slice_conv_9_axes_0 = const()[name = string("per_layer_slice_conv_9_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 256, 1]> var_2320_cast_fp16 = transpose(perm = var_2319, x = per_layer_slice_9_cast_fp16)[name = string("transpose_51")];
	tensor<fp16, [1, 256, 1, 1]> per_layer_slice_conv_9_cast_fp16 = expand_dims(axes = per_layer_slice_conv_9_axes_0, x = var_2320_cast_fp16)[name = string("per_layer_slice_conv_9_cast_fp16")];
	tensor<fp16, [1, 256, 1, 1]> input_117_cast_fp16 = mul(x = gated_27, y = per_layer_slice_conv_9_cast_fp16)[name = string("input_117_cast_fp16")];
	string gated_29_pad_type_0 = const()[name = string("gated_29_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_29_strides_0 = const()[name = string("gated_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_29_pad_0 = const()[name = string("gated_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_29_dilations_0 = const()[name = string("gated_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_29_groups_0 = const()[name = string("gated_29_groups_0"), val = int32(1)];
	tensor<fp16, [1536, 256, 1, 1]> layers_4_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314581888))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314778560))))[name = string("layers_4_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1536, 1, 1]> gated_29_cast_fp16 = conv(dilations = gated_29_dilations_0, groups = gated_29_groups_0, pad = gated_29_pad_0, pad_type = gated_29_pad_type_0, strides = gated_29_strides_0, weight = layers_4_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("gated_29_cast_fp16")];
	tensor<int32, [1]> var_2336_axes_0 = const()[name = string("op_2336_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_2336_cast_fp16 = squeeze(axes = var_2336_axes_0, x = gated_29_cast_fp16)[name = string("op_2336_cast_fp16")];
	tensor<int32, [3]> var_2340 = const()[name = string("op_2340"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_2346 = const()[name = string("op_2346"), val = int32(-1)];
	fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_77_cast_fp16 = transpose(perm = var_2340, x = var_2336_cast_fp16)[name = string("transpose_50")];
	tensor<fp16, [1, 1, 1536]> var_2348_cast_fp16 = mul(x = x_77_cast_fp16, y = const_38_promoted_to_fp16)[name = string("op_2348_cast_fp16")];
	bool input_119_interleave_0 = const()[name = string("input_119_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_119_cast_fp16 = concat(axis = var_2346, interleave = input_119_interleave_0, values = (x_77_cast_fp16, var_2348_cast_fp16))[name = string("input_119_cast_fp16")];
	tensor<int32, [1]> normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2343_to_fp16 = const()[name = string("op_2343_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_2343_to_fp16, x = input_119_cast_fp16)[name = string("normed_117_cast_fp16")];
	tensor<int32, [2]> var_2353_split_sizes_0 = const()[name = string("op_2353_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2353_axis_0 = const()[name = string("op_2353_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_2353_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2353_cast_fp16_1 = split(axis = var_2353_axis_0, split_sizes = var_2353_split_sizes_0, x = normed_117_cast_fp16)[name = string("op_2353_cast_fp16")];
	tensor<fp16, [1536]> layers_4_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_4_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314780160)))];
	tensor<fp16, [1, 1, 1536]> hidden_states_49_cast_fp16 = mul(x = var_2353_cast_fp16_0, y = layers_4_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_49_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_51_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = hidden_states_49_cast_fp16)[name = string("hidden_states_51_cast_fp16")];
	tensor<fp16, [1]> const_39_promoted_to_fp16 = const()[name = string("const_39_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.14p-1])];
	tensor<fp16, [1, 1, 1536]> x_79_cast_fp16 = mul(x = hidden_states_51_cast_fp16, y = const_39_promoted_to_fp16)[name = string("x_79_cast_fp16")];
	int32 var_2368 = const()[name = string("op_2368"), val = int32(-1)];
	fp16 const_40_promoted_to_fp16 = const()[name = string("const_40_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_2370_cast_fp16 = mul(x = x_79_cast_fp16, y = const_40_promoted_to_fp16)[name = string("op_2370_cast_fp16")];
	bool input_121_interleave_0 = const()[name = string("input_121_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_121_cast_fp16 = concat(axis = var_2368, interleave = input_121_interleave_0, values = (x_79_cast_fp16, var_2370_cast_fp16))[name = string("input_121_cast_fp16")];
	tensor<int32, [1]> normed_121_axes_0 = const()[name = string("normed_121_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2365_to_fp16 = const()[name = string("op_2365_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_121_cast_fp16 = layer_norm(axes = normed_121_axes_0, epsilon = var_2365_to_fp16, x = input_121_cast_fp16)[name = string("normed_121_cast_fp16")];
	tensor<int32, [2]> var_2375_split_sizes_0 = const()[name = string("op_2375_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2375_axis_0 = const()[name = string("op_2375_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_2375_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2375_cast_fp16_1 = split(axis = var_2375_axis_0, split_sizes = var_2375_split_sizes_0, x = normed_121_cast_fp16)[name = string("op_2375_cast_fp16")];
	tensor<fp16, [1536]> layers_5_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314783296)))];
	tensor<fp16, [1, 1, 1536]> h_31_cast_fp16 = mul(x = var_2375_cast_fp16_0, y = layers_5_input_layernorm_weight_promoted_to_fp16)[name = string("h_31_cast_fp16")];
	tensor<int32, [3]> var_2381 = const()[name = string("op_2381"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_2384_axes_0 = const()[name = string("op_2384_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_2382_cast_fp16 = transpose(perm = var_2381, x = h_31_cast_fp16)[name = string("transpose_49")];
	tensor<fp16, [1, 1536, 1, 1]> var_2384_cast_fp16 = expand_dims(axes = var_2384_axes_0, x = var_2382_cast_fp16)[name = string("op_2384_cast_fp16")];
	string var_2400_pad_type_0 = const()[name = string("op_2400_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_2400_strides_0 = const()[name = string("op_2400_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_2400_pad_0 = const()[name = string("op_2400_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_2400_dilations_0 = const()[name = string("op_2400_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_2400_groups_0 = const()[name = string("op_2400_groups_0"), val = int32(1)];
	tensor<fp16, [1, 2048, 1, 1]> var_2400 = conv(dilations = var_2400_dilations_0, groups = var_2400_groups_0, pad = var_2400_pad_0, pad_type = var_2400_pad_type_0, strides = var_2400_strides_0, weight = layers_5_self_attn_q_proj_weight_palettized, x = var_2384_cast_fp16)[name = string("op_2400")];
	tensor<int32, [4]> var_2405 = const()[name = string("op_2405"), val = tensor<int32, [4]>([1, 8, 256, 1])];
	tensor<fp16, [1, 8, 256, 1]> var_2406 = reshape(shape = var_2405, x = var_2400)[name = string("op_2406")];
	tensor<int32, [4]> var_2411 = const()[name = string("op_2411"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	tensor<int32, [3]> var_2421 = const()[name = string("op_2421"), val = tensor<int32, [3]>([1, 8, 256])];
	tensor<fp16, [1, 8, 1, 256]> var_2412 = transpose(perm = var_2411, x = var_2406)[name = string("transpose_48")];
	tensor<fp16, [1, 8, 256]> x_81 = reshape(shape = var_2421, x = var_2412)[name = string("x_81")];
	int32 var_2427 = const()[name = string("op_2427"), val = int32(-1)];
	fp16 const_41_promoted = const()[name = string("const_41_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 256]> var_2429 = mul(x = x_81, y = const_41_promoted)[name = string("op_2429")];
	bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512]> input_125 = concat(axis = var_2427, interleave = input_125_interleave_0, values = (x_81, var_2429))[name = string("input_125")];
	tensor<int32, [1]> normed_125_axes_0 = const()[name = string("normed_125_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2424_to_fp16 = const()[name = string("op_2424_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 8, 512]> normed_125_cast_fp16 = layer_norm(axes = normed_125_axes_0, epsilon = var_2424_to_fp16, x = input_125)[name = string("normed_125_cast_fp16")];
	tensor<int32, [2]> var_2434_split_sizes_0 = const()[name = string("op_2434_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_2434_axis_0 = const()[name = string("op_2434_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 256]> var_2434_0, tensor<fp16, [1, 8, 256]> var_2434_1 = split(axis = var_2434_axis_0, split_sizes = var_2434_split_sizes_0, x = normed_125_cast_fp16)[name = string("op_2434")];
	tensor<fp16, [1, 8, 256]> var_2436 = mul(x = var_2434_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_2436")];
	tensor<int32, [4]> var_2441 = const()[name = string("op_2441"), val = tensor<int32, [4]>([1, 8, 1, 256])];
	tensor<fp16, [1, 8, 1, 256]> q_33 = reshape(shape = var_2441, x = var_2436)[name = string("q_33")];
	tensor<fp16, [1, 8, 1, 256]> var_2443_cast_fp16 = mul(x = q_33, y = cos_s)[name = string("op_2443_cast_fp16")];
	tensor<int32, [2]> var_2444_split_sizes_0 = const()[name = string("op_2444_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_2444_axis_0 = const()[name = string("op_2444_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 128]> var_2444_0, tensor<fp16, [1, 8, 1, 128]> var_2444_1 = split(axis = var_2444_axis_0, split_sizes = var_2444_split_sizes_0, x = q_33)[name = string("op_2444")];
	fp16 const_42_promoted = const()[name = string("const_42_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 128]> var_2446 = mul(x = var_2444_1, y = const_42_promoted)[name = string("op_2446")];
	int32 var_2448 = const()[name = string("op_2448"), val = int32(-1)];
	bool var_2449_interleave_0 = const()[name = string("op_2449_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> var_2449 = concat(axis = var_2448, interleave = var_2449_interleave_0, values = (var_2446, var_2444_0))[name = string("op_2449")];
	tensor<fp16, [1, 8, 1, 256]> var_2450_cast_fp16 = mul(x = var_2449, y = sin_s)[name = string("op_2450_cast_fp16")];
	tensor<fp16, [1, 8, 1, 256]> q_35_cast_fp16 = add(x = var_2443_cast_fp16, y = var_2450_cast_fp16)[name = string("q_35_cast_fp16")];
	bool attn_weights_21_transpose_x_0 = const()[name = string("attn_weights_21_transpose_x_0"), val = bool(false)];
	bool attn_weights_21_transpose_y_0 = const()[name = string("attn_weights_21_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 512]> attn_weights_21_cast_fp16 = matmul(transpose_x = attn_weights_21_transpose_x_0, transpose_y = attn_weights_21_transpose_y_0, x = q_35_cast_fp16, y = transpose_40_cast_fp16)[name = string("attn_weights_21_cast_fp16")];
	tensor<fp16, [1, 8, 1, 512]> x_83_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask_sliding)[name = string("x_83_cast_fp16")];
	tensor<int32, [1]> reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor<int32, [1]>([-1])];
	bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 1, 1]> reduce_max_5 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_83_cast_fp16)[name = string("reduce_max_5")];
	tensor<fp16, [1, 8, 1, 512]> var_2482 = sub(x = x_83_cast_fp16, y = reduce_max_5)[name = string("op_2482")];
	tensor<fp16, [1, 8, 1, 512]> var_2488 = exp(x = var_2482)[name = string("op_2488")];
	tensor<int32, [1]> var_2498_axes_0 = const()[name = string("op_2498_axes_0"), val = tensor<int32, [1]>([-1])];
	bool var_2498_keep_dims_0 = const()[name = string("op_2498_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 1, 1]> var_2498 = reduce_sum(axes = var_2498_axes_0, keep_dims = var_2498_keep_dims_0, x = var_2488)[name = string("op_2498")];
	tensor<fp16, [1, 8, 1, 512]> var_2504_cast_fp16 = real_div(x = var_2488, y = var_2498)[name = string("op_2504_cast_fp16")];
	bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)];
	bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = var_2504_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_31_cast_fp16")];
	tensor<int32, [4]> var_2515 = const()[name = string("op_2515"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_2522 = const()[name = string("op_2522"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 8, 256]> var_2516_cast_fp16 = transpose(perm = var_2515, x = attn_output_31_cast_fp16)[name = string("transpose_47")];
	tensor<fp16, [1, 1, 2048]> attn_output_33_cast_fp16 = reshape(shape = var_2522, x = var_2516_cast_fp16)[name = string("attn_output_33_cast_fp16")];
	tensor<int32, [3]> var_2527 = const()[name = string("op_2527"), val = tensor<int32, [3]>([0, 2, 1])];
	string var_2543_pad_type_0 = const()[name = string("op_2543_pad_type_0"), val = string("valid")];
	int32 var_2543_groups_0 = const()[name = string("op_2543_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_2543_strides_0 = const()[name = string("op_2543_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_2543_pad_0 = const()[name = string("op_2543_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_2543_dilations_0 = const()[name = string("op_2543_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1536, 2048, 1]> squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314786432))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316359360))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")];
	tensor<fp16, [1, 2048, 1]> var_2528_cast_fp16 = transpose(perm = var_2527, x = attn_output_33_cast_fp16)[name = string("transpose_46")];
	tensor<fp16, [1, 1536, 1]> var_2543_cast_fp16 = conv(dilations = var_2543_dilations_0, groups = var_2543_groups_0, pad = var_2543_pad_0, pad_type = var_2543_pad_type_0, strides = var_2543_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_2528_cast_fp16)[name = string("op_2543_cast_fp16")];
	tensor<int32, [3]> var_2547 = const()[name = string("op_2547"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_2553 = const()[name = string("op_2553"), val = int32(-1)];
	fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_87_cast_fp16 = transpose(perm = var_2547, x = var_2543_cast_fp16)[name = string("transpose_45")];
	tensor<fp16, [1, 1, 1536]> var_2555_cast_fp16 = mul(x = x_87_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_2555_cast_fp16")];
	bool input_129_interleave_0 = const()[name = string("input_129_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_129_cast_fp16 = concat(axis = var_2553, interleave = input_129_interleave_0, values = (x_87_cast_fp16, var_2555_cast_fp16))[name = string("input_129_cast_fp16")];
	tensor<int32, [1]> normed_129_axes_0 = const()[name = string("normed_129_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2550_to_fp16 = const()[name = string("op_2550_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_129_cast_fp16 = layer_norm(axes = normed_129_axes_0, epsilon = var_2550_to_fp16, x = input_129_cast_fp16)[name = string("normed_129_cast_fp16")];
	tensor<int32, [2]> var_2560_split_sizes_0 = const()[name = string("op_2560_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2560_axis_0 = const()[name = string("op_2560_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_2560_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2560_cast_fp16_1 = split(axis = var_2560_axis_0, split_sizes = var_2560_split_sizes_0, x = normed_129_cast_fp16)[name = string("op_2560_cast_fp16")];
	tensor<fp16, [1536]> layers_5_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316360960)))];
	tensor<fp16, [1, 1, 1536]> attn_output_35_cast_fp16 = mul(x = var_2560_cast_fp16_0, y = layers_5_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_35_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> x_89_cast_fp16 = add(x = x_79_cast_fp16, y = attn_output_35_cast_fp16)[name = string("x_89_cast_fp16")];
	int32 var_2569 = const()[name = string("op_2569"), val = int32(-1)];
	fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_2571_cast_fp16 = mul(x = x_89_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_2571_cast_fp16")];
	bool input_131_interleave_0 = const()[name = string("input_131_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_131_cast_fp16 = concat(axis = var_2569, interleave = input_131_interleave_0, values = (x_89_cast_fp16, var_2571_cast_fp16))[name = string("input_131_cast_fp16")];
	tensor<int32, [1]> normed_133_axes_0 = const()[name = string("normed_133_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2566_to_fp16 = const()[name = string("op_2566_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_133_cast_fp16 = layer_norm(axes = normed_133_axes_0, epsilon = var_2566_to_fp16, x = input_131_cast_fp16)[name = string("normed_133_cast_fp16")];
	tensor<int32, [2]> var_2576_split_sizes_0 = const()[name = string("op_2576_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2576_axis_0 = const()[name = string("op_2576_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_2576_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2576_cast_fp16_1 = split(axis = var_2576_axis_0, split_sizes = var_2576_split_sizes_0, x = normed_133_cast_fp16)[name = string("op_2576_cast_fp16")];
	tensor<fp16, [1536]> layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316364096)))];
	tensor<fp16, [1, 1, 1536]> h_33_cast_fp16 = mul(x = var_2576_cast_fp16_0, y = layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_33_cast_fp16")];
	tensor<int32, [3]> var_2587 = const()[name = string("op_2587"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_133_axes_0 = const()[name = string("input_133_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_2588 = transpose(perm = var_2587, x = h_33_cast_fp16)[name = string("transpose_44")];
	tensor<fp16, [1, 1536, 1, 1]> input_133 = expand_dims(axes = input_133_axes_0, x = var_2588)[name = string("input_133")];
	string gate_21_pad_type_0 = const()[name = string("gate_21_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gate_21_strides_0 = const()[name = string("gate_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gate_21_pad_0 = const()[name = string("gate_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gate_21_dilations_0 = const()[name = string("gate_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gate_21_groups_0 = const()[name = string("gate_21_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> gate_21 = conv(dilations = gate_21_dilations_0, groups = gate_21_groups_0, pad = gate_21_pad_0, pad_type = gate_21_pad_type_0, strides = gate_21_strides_0, weight = layers_5_mlp_gate_proj_weight_palettized, x = input_133)[name = string("gate_21")];
	string up_11_pad_type_0 = const()[name = string("up_11_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> up_11_strides_0 = const()[name = string("up_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> up_11_pad_0 = const()[name = string("up_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> up_11_dilations_0 = const()[name = string("up_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 up_11_groups_0 = const()[name = string("up_11_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> up_11 = conv(dilations = up_11_dilations_0, groups = up_11_groups_0, pad = up_11_pad_0, pad_type = up_11_pad_type_0, strides = up_11_strides_0, weight = layers_5_mlp_up_proj_weight_palettized, x = input_133)[name = string("up_11")];
	string gate_23_mode_0 = const()[name = string("gate_23_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 1]> gate_23 = gelu(mode = gate_23_mode_0, x = gate_21)[name = string("gate_23")];
	tensor<fp16, [1, 12288, 1, 1]> input_135 = mul(x = gate_23, y = up_11)[name = string("input_135")];
	string mlp_out_11_pad_type_0 = const()[name = string("mlp_out_11_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_out_11_strides_0 = const()[name = string("mlp_out_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_out_11_pad_0 = const()[name = string("mlp_out_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_out_11_dilations_0 = const()[name = string("mlp_out_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_out_11_groups_0 = const()[name = string("mlp_out_11_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 1]> mlp_out_11 = conv(dilations = mlp_out_11_dilations_0, groups = mlp_out_11_groups_0, pad = mlp_out_11_pad_0, pad_type = mlp_out_11_pad_type_0, strides = mlp_out_11_strides_0, weight = layers_5_mlp_down_proj_weight_palettized, x = input_135)[name = string("mlp_out_11")];
	tensor<int32, [1]> var_2628_axes_0 = const()[name = string("op_2628_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_2628 = squeeze(axes = var_2628_axes_0, x = mlp_out_11)[name = string("op_2628")];
	tensor<int32, [3]> var_2632 = const()[name = string("op_2632"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_2638 = const()[name = string("op_2638"), val = int32(-1)];
	fp16 const_45_promoted = const()[name = string("const_45_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_91 = transpose(perm = var_2632, x = var_2628)[name = string("transpose_43")];
	tensor<fp16, [1, 1, 1536]> var_2640 = mul(x = x_91, y = const_45_promoted)[name = string("op_2640")];
	bool input_137_interleave_0 = const()[name = string("input_137_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_137 = concat(axis = var_2638, interleave = input_137_interleave_0, values = (x_91, var_2640))[name = string("input_137")];
	tensor<int32, [1]> normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2635_to_fp16 = const()[name = string("op_2635_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_2635_to_fp16, x = input_137)[name = string("normed_137_cast_fp16")];
	tensor<int32, [2]> var_2645_split_sizes_0 = const()[name = string("op_2645_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2645_axis_0 = const()[name = string("op_2645_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_2645_0, tensor<fp16, [1, 1, 1536]> var_2645_1 = split(axis = var_2645_axis_0, split_sizes = var_2645_split_sizes_0, x = normed_137_cast_fp16)[name = string("op_2645")];
	tensor<fp16, [1, 1, 1536]> hidden_states_53 = mul(x = var_2645_0, y = layers_5_post_feedforward_layernorm_weight)[name = string("hidden_states_53")];
	tensor<fp16, [1, 1, 1536]> hidden_states_55_cast_fp16 = add(x = x_89_cast_fp16, y = hidden_states_53)[name = string("hidden_states_55_cast_fp16")];
	tensor<int32, [3]> per_layer_slice_11_begin_0 = const()[name = string("per_layer_slice_11_begin_0"), val = tensor<int32, [3]>([0, 0, 5120])];
	tensor<int32, [3]> per_layer_slice_11_end_0 = const()[name = string("per_layer_slice_11_end_0"), val = tensor<int32, [3]>([1, 1, 5376])];
	tensor<bool, [3]> per_layer_slice_11_end_mask_0 = const()[name = string("per_layer_slice_11_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 1, 256]> per_layer_slice_11_cast_fp16 = slice_by_index(begin = per_layer_slice_11_begin_0, end = per_layer_slice_11_end_0, end_mask = per_layer_slice_11_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_11_cast_fp16")];
	tensor<int32, [3]> var_2673 = const()[name = string("op_2673"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_139_axes_0 = const()[name = string("input_139_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_2674 = transpose(perm = var_2673, x = hidden_states_55_cast_fp16)[name = string("transpose_42")];
	tensor<fp16, [1, 1536, 1, 1]> input_139 = expand_dims(axes = input_139_axes_0, x = var_2674)[name = string("input_139")];
	string gated_31_pad_type_0 = const()[name = string("gated_31_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_31_strides_0 = const()[name = string("gated_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_31_pad_0 = const()[name = string("gated_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_31_dilations_0 = const()[name = string("gated_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_31_groups_0 = const()[name = string("gated_31_groups_0"), val = int32(1)];
	tensor<fp16, [1, 256, 1, 1]> gated_31 = conv(dilations = gated_31_dilations_0, groups = gated_31_groups_0, pad = gated_31_pad_0, pad_type = gated_31_pad_type_0, strides = gated_31_strides_0, weight = layers_5_per_layer_input_gate_weight_palettized, x = input_139)[name = string("gated_31")];
	string gated_33_mode_0 = const()[name = string("gated_33_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 256, 1, 1]> gated_33 = gelu(mode = gated_33_mode_0, x = gated_31)[name = string("gated_33")];
	tensor<int32, [3]> var_2693 = const()[name = string("op_2693"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> per_layer_slice_conv_11_axes_0 = const()[name = string("per_layer_slice_conv_11_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 256, 1]> var_2694_cast_fp16 = transpose(perm = var_2693, x = per_layer_slice_11_cast_fp16)[name = string("transpose_41")];
	tensor<fp16, [1, 256, 1, 1]> per_layer_slice_conv_11_cast_fp16 = expand_dims(axes = per_layer_slice_conv_11_axes_0, x = var_2694_cast_fp16)[name = string("per_layer_slice_conv_11_cast_fp16")];
	tensor<fp16, [1, 256, 1, 1]> input_141_cast_fp16 = mul(x = gated_33, y = per_layer_slice_conv_11_cast_fp16)[name = string("input_141_cast_fp16")];
	string gated_35_pad_type_0 = const()[name = string("gated_35_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_35_strides_0 = const()[name = string("gated_35_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_35_pad_0 = const()[name = string("gated_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_35_dilations_0 = const()[name = string("gated_35_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_35_groups_0 = const()[name = string("gated_35_groups_0"), val = int32(1)];
	tensor<fp16, [1536, 256, 1, 1]> layers_5_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316367232))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316563904))))[name = string("layers_5_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1536, 1, 1]> gated_35_cast_fp16 = conv(dilations = gated_35_dilations_0, groups = gated_35_groups_0, pad = gated_35_pad_0, pad_type = gated_35_pad_type_0, strides = gated_35_strides_0, weight = layers_5_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_141_cast_fp16)[name = string("gated_35_cast_fp16")];
	tensor<int32, [1]> var_2710_axes_0 = const()[name = string("op_2710_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_2710_cast_fp16 = squeeze(axes = var_2710_axes_0, x = gated_35_cast_fp16)[name = string("op_2710_cast_fp16")];
	tensor<int32, [3]> var_2714 = const()[name = string("op_2714"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_2720 = const()[name = string("op_2720"), val = int32(-1)];
	fp16 const_46_promoted_to_fp16 = const()[name = string("const_46_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_93_cast_fp16 = transpose(perm = var_2714, x = var_2710_cast_fp16)[name = string("transpose_40")];
	tensor<fp16, [1, 1, 1536]> var_2722_cast_fp16 = mul(x = x_93_cast_fp16, y = const_46_promoted_to_fp16)[name = string("op_2722_cast_fp16")];
	bool input_143_interleave_0 = const()[name = string("input_143_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_143_cast_fp16 = concat(axis = var_2720, interleave = input_143_interleave_0, values = (x_93_cast_fp16, var_2722_cast_fp16))[name = string("input_143_cast_fp16")];
	tensor<int32, [1]> normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2717_to_fp16 = const()[name = string("op_2717_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_2717_to_fp16, x = input_143_cast_fp16)[name = string("normed_141_cast_fp16")];
	tensor<int32, [2]> var_2727_split_sizes_0 = const()[name = string("op_2727_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2727_axis_0 = const()[name = string("op_2727_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_2727_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2727_cast_fp16_1 = split(axis = var_2727_axis_0, split_sizes = var_2727_split_sizes_0, x = normed_141_cast_fp16)[name = string("op_2727_cast_fp16")];
	tensor<fp16, [1536]> layers_5_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_5_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316565504)))];
	tensor<fp16, [1, 1, 1536]> hidden_states_59_cast_fp16 = mul(x = var_2727_cast_fp16_0, y = layers_5_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_59_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_61_cast_fp16 = add(x = hidden_states_55_cast_fp16, y = hidden_states_59_cast_fp16)[name = string("hidden_states_61_cast_fp16")];
	tensor<fp16, [1]> const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.fap-2])];
	tensor<fp16, [1, 1, 1536]> x_95_cast_fp16 = mul(x = hidden_states_61_cast_fp16, y = const_47_promoted_to_fp16)[name = string("x_95_cast_fp16")];
	int32 var_2742 = const()[name = string("op_2742"), val = int32(-1)];
	fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_2744_cast_fp16 = mul(x = x_95_cast_fp16, y = const_48_promoted_to_fp16)[name = string("op_2744_cast_fp16")];
	bool input_145_interleave_0 = const()[name = string("input_145_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_145_cast_fp16 = concat(axis = var_2742, interleave = input_145_interleave_0, values = (x_95_cast_fp16, var_2744_cast_fp16))[name = string("input_145_cast_fp16")];
	tensor<int32, [1]> normed_145_axes_0 = const()[name = string("normed_145_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2739_to_fp16 = const()[name = string("op_2739_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_145_cast_fp16 = layer_norm(axes = normed_145_axes_0, epsilon = var_2739_to_fp16, x = input_145_cast_fp16)[name = string("normed_145_cast_fp16")];
	tensor<int32, [2]> var_2749_split_sizes_0 = const()[name = string("op_2749_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2749_axis_0 = const()[name = string("op_2749_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_2749_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2749_cast_fp16_1 = split(axis = var_2749_axis_0, split_sizes = var_2749_split_sizes_0, x = normed_145_cast_fp16)[name = string("op_2749_cast_fp16")];
	tensor<fp16, [1536]> layers_6_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316568640)))];
	tensor<fp16, [1, 1, 1536]> h_37_cast_fp16 = mul(x = var_2749_cast_fp16_0, y = layers_6_input_layernorm_weight_promoted_to_fp16)[name = string("h_37_cast_fp16")];
	tensor<int32, [3]> var_2755 = const()[name = string("op_2755"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_2758_axes_0 = const()[name = string("op_2758_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_2756_cast_fp16 = transpose(perm = var_2755, x = h_37_cast_fp16)[name = string("transpose_39")];
	tensor<fp16, [1, 1536, 1, 1]> var_2758_cast_fp16 = expand_dims(axes = var_2758_axes_0, x = var_2756_cast_fp16)[name = string("op_2758_cast_fp16")];
	string var_2774_pad_type_0 = const()[name = string("op_2774_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_2774_strides_0 = const()[name = string("op_2774_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_2774_pad_0 = const()[name = string("op_2774_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_2774_dilations_0 = const()[name = string("op_2774_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_2774_groups_0 = const()[name = string("op_2774_groups_0"), val = int32(1)];
	tensor<fp16, [1, 2048, 1, 1]> var_2774 = conv(dilations = var_2774_dilations_0, groups = var_2774_groups_0, pad = var_2774_pad_0, pad_type = var_2774_pad_type_0, strides = var_2774_strides_0, weight = layers_6_self_attn_q_proj_weight_palettized, x = var_2758_cast_fp16)[name = string("op_2774")];
	tensor<int32, [4]> var_2779 = const()[name = string("op_2779"), val = tensor<int32, [4]>([1, 8, 256, 1])];
	tensor<fp16, [1, 8, 256, 1]> var_2780 = reshape(shape = var_2779, x = var_2774)[name = string("op_2780")];
	tensor<int32, [4]> var_2785 = const()[name = string("op_2785"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	tensor<int32, [3]> var_2795 = const()[name = string("op_2795"), val = tensor<int32, [3]>([1, 8, 256])];
	tensor<fp16, [1, 8, 1, 256]> var_2786 = transpose(perm = var_2785, x = var_2780)[name = string("transpose_38")];
	tensor<fp16, [1, 8, 256]> x_97 = reshape(shape = var_2795, x = var_2786)[name = string("x_97")];
	int32 var_2801 = const()[name = string("op_2801"), val = int32(-1)];
	fp16 const_49_promoted = const()[name = string("const_49_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 256]> var_2803 = mul(x = x_97, y = const_49_promoted)[name = string("op_2803")];
	bool input_149_interleave_0 = const()[name = string("input_149_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512]> input_149 = concat(axis = var_2801, interleave = input_149_interleave_0, values = (x_97, var_2803))[name = string("input_149")];
	tensor<int32, [1]> normed_149_axes_0 = const()[name = string("normed_149_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2798_to_fp16 = const()[name = string("op_2798_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 8, 512]> normed_149_cast_fp16 = layer_norm(axes = normed_149_axes_0, epsilon = var_2798_to_fp16, x = input_149)[name = string("normed_149_cast_fp16")];
	tensor<int32, [2]> var_2808_split_sizes_0 = const()[name = string("op_2808_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_2808_axis_0 = const()[name = string("op_2808_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 256]> var_2808_0, tensor<fp16, [1, 8, 256]> var_2808_1 = split(axis = var_2808_axis_0, split_sizes = var_2808_split_sizes_0, x = normed_149_cast_fp16)[name = string("op_2808")];
	tensor<fp16, [1, 8, 256]> var_2810 = mul(x = var_2808_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_2810")];
	tensor<int32, [4]> var_2815 = const()[name = string("op_2815"), val = tensor<int32, [4]>([1, 8, 1, 256])];
	tensor<fp16, [1, 8, 1, 256]> q_39 = reshape(shape = var_2815, x = var_2810)[name = string("q_39")];
	tensor<fp16, [1, 8, 1, 256]> var_2817_cast_fp16 = mul(x = q_39, y = cos_s)[name = string("op_2817_cast_fp16")];
	tensor<int32, [2]> var_2818_split_sizes_0 = const()[name = string("op_2818_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_2818_axis_0 = const()[name = string("op_2818_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 128]> var_2818_0, tensor<fp16, [1, 8, 1, 128]> var_2818_1 = split(axis = var_2818_axis_0, split_sizes = var_2818_split_sizes_0, x = q_39)[name = string("op_2818")];
	fp16 const_50_promoted = const()[name = string("const_50_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 128]> var_2820 = mul(x = var_2818_1, y = const_50_promoted)[name = string("op_2820")];
	int32 var_2822 = const()[name = string("op_2822"), val = int32(-1)];
	bool var_2823_interleave_0 = const()[name = string("op_2823_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> var_2823 = concat(axis = var_2822, interleave = var_2823_interleave_0, values = (var_2820, var_2818_0))[name = string("op_2823")];
	tensor<fp16, [1, 8, 1, 256]> var_2824_cast_fp16 = mul(x = var_2823, y = sin_s)[name = string("op_2824_cast_fp16")];
	tensor<fp16, [1, 8, 1, 256]> q_41_cast_fp16 = add(x = var_2817_cast_fp16, y = var_2824_cast_fp16)[name = string("q_41_cast_fp16")];
	bool attn_weights_25_transpose_x_0 = const()[name = string("attn_weights_25_transpose_x_0"), val = bool(false)];
	bool attn_weights_25_transpose_y_0 = const()[name = string("attn_weights_25_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 512]> attn_weights_25_cast_fp16 = matmul(transpose_x = attn_weights_25_transpose_x_0, transpose_y = attn_weights_25_transpose_y_0, x = q_41_cast_fp16, y = transpose_40_cast_fp16)[name = string("attn_weights_25_cast_fp16")];
	tensor<fp16, [1, 8, 1, 512]> x_99_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask_sliding)[name = string("x_99_cast_fp16")];
	tensor<int32, [1]> reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor<int32, [1]>([-1])];
	bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 1, 1]> reduce_max_6 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_99_cast_fp16)[name = string("reduce_max_6")];
	tensor<fp16, [1, 8, 1, 512]> var_2856 = sub(x = x_99_cast_fp16, y = reduce_max_6)[name = string("op_2856")];
	tensor<fp16, [1, 8, 1, 512]> var_2862 = exp(x = var_2856)[name = string("op_2862")];
	tensor<int32, [1]> var_2872_axes_0 = const()[name = string("op_2872_axes_0"), val = tensor<int32, [1]>([-1])];
	bool var_2872_keep_dims_0 = const()[name = string("op_2872_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 1, 1]> var_2872 = reduce_sum(axes = var_2872_axes_0, keep_dims = var_2872_keep_dims_0, x = var_2862)[name = string("op_2872")];
	tensor<fp16, [1, 8, 1, 512]> var_2878_cast_fp16 = real_div(x = var_2862, y = var_2872)[name = string("op_2878_cast_fp16")];
	bool attn_output_37_transpose_x_0 = const()[name = string("attn_output_37_transpose_x_0"), val = bool(false)];
	bool attn_output_37_transpose_y_0 = const()[name = string("attn_output_37_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = var_2878_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_37_cast_fp16")];
	tensor<int32, [4]> var_2889 = const()[name = string("op_2889"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_2896 = const()[name = string("op_2896"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 8, 256]> var_2890_cast_fp16 = transpose(perm = var_2889, x = attn_output_37_cast_fp16)[name = string("transpose_37")];
	tensor<fp16, [1, 1, 2048]> attn_output_39_cast_fp16 = reshape(shape = var_2896, x = var_2890_cast_fp16)[name = string("attn_output_39_cast_fp16")];
	tensor<int32, [3]> var_2901 = const()[name = string("op_2901"), val = tensor<int32, [3]>([0, 2, 1])];
	string var_2917_pad_type_0 = const()[name = string("op_2917_pad_type_0"), val = string("valid")];
	int32 var_2917_groups_0 = const()[name = string("op_2917_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_2917_strides_0 = const()[name = string("op_2917_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_2917_pad_0 = const()[name = string("op_2917_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_2917_dilations_0 = const()[name = string("op_2917_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1536, 2048, 1]> squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316571776))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318144704))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")];
	tensor<fp16, [1, 2048, 1]> var_2902_cast_fp16 = transpose(perm = var_2901, x = attn_output_39_cast_fp16)[name = string("transpose_36")];
	tensor<fp16, [1, 1536, 1]> var_2917_cast_fp16 = conv(dilations = var_2917_dilations_0, groups = var_2917_groups_0, pad = var_2917_pad_0, pad_type = var_2917_pad_type_0, strides = var_2917_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_2902_cast_fp16)[name = string("op_2917_cast_fp16")];
	tensor<int32, [3]> var_2921 = const()[name = string("op_2921"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_2927 = const()[name = string("op_2927"), val = int32(-1)];
	fp16 const_51_promoted_to_fp16 = const()[name = string("const_51_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_103_cast_fp16 = transpose(perm = var_2921, x = var_2917_cast_fp16)[name = string("transpose_35")];
	tensor<fp16, [1, 1, 1536]> var_2929_cast_fp16 = mul(x = x_103_cast_fp16, y = const_51_promoted_to_fp16)[name = string("op_2929_cast_fp16")];
	bool input_153_interleave_0 = const()[name = string("input_153_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_153_cast_fp16 = concat(axis = var_2927, interleave = input_153_interleave_0, values = (x_103_cast_fp16, var_2929_cast_fp16))[name = string("input_153_cast_fp16")];
	tensor<int32, [1]> normed_153_axes_0 = const()[name = string("normed_153_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2924_to_fp16 = const()[name = string("op_2924_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_153_cast_fp16 = layer_norm(axes = normed_153_axes_0, epsilon = var_2924_to_fp16, x = input_153_cast_fp16)[name = string("normed_153_cast_fp16")];
	tensor<int32, [2]> var_2934_split_sizes_0 = const()[name = string("op_2934_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2934_axis_0 = const()[name = string("op_2934_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_2934_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2934_cast_fp16_1 = split(axis = var_2934_axis_0, split_sizes = var_2934_split_sizes_0, x = normed_153_cast_fp16)[name = string("op_2934_cast_fp16")];
	tensor<fp16, [1536]> layers_6_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318146304)))];
	tensor<fp16, [1, 1, 1536]> attn_output_41_cast_fp16 = mul(x = var_2934_cast_fp16_0, y = layers_6_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_41_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> x_105_cast_fp16 = add(x = x_95_cast_fp16, y = attn_output_41_cast_fp16)[name = string("x_105_cast_fp16")];
	int32 var_2943 = const()[name = string("op_2943"), val = int32(-1)];
	fp16 const_52_promoted_to_fp16 = const()[name = string("const_52_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_2945_cast_fp16 = mul(x = x_105_cast_fp16, y = const_52_promoted_to_fp16)[name = string("op_2945_cast_fp16")];
	bool input_155_interleave_0 = const()[name = string("input_155_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_155_cast_fp16 = concat(axis = var_2943, interleave = input_155_interleave_0, values = (x_105_cast_fp16, var_2945_cast_fp16))[name = string("input_155_cast_fp16")];
	tensor<int32, [1]> normed_157_axes_0 = const()[name = string("normed_157_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2940_to_fp16 = const()[name = string("op_2940_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_157_cast_fp16 = layer_norm(axes = normed_157_axes_0, epsilon = var_2940_to_fp16, x = input_155_cast_fp16)[name = string("normed_157_cast_fp16")];
	tensor<int32, [2]> var_2950_split_sizes_0 = const()[name = string("op_2950_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2950_axis_0 = const()[name = string("op_2950_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_2950_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2950_cast_fp16_1 = split(axis = var_2950_axis_0, split_sizes = var_2950_split_sizes_0, x = normed_157_cast_fp16)[name = string("op_2950_cast_fp16")];
	tensor<fp16, [1536]> layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318149440)))];
	tensor<fp16, [1, 1, 1536]> h_39_cast_fp16 = mul(x = var_2950_cast_fp16_0, y = layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_39_cast_fp16")];
	tensor<int32, [3]> var_2961 = const()[name = string("op_2961"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_157_axes_0 = const()[name = string("input_157_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_2962 = transpose(perm = var_2961, x = h_39_cast_fp16)[name = string("transpose_34")];
	tensor<fp16, [1, 1536, 1, 1]> input_157 = expand_dims(axes = input_157_axes_0, x = var_2962)[name = string("input_157")];
	string gate_25_pad_type_0 = const()[name = string("gate_25_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gate_25_strides_0 = const()[name = string("gate_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gate_25_pad_0 = const()[name = string("gate_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gate_25_dilations_0 = const()[name = string("gate_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gate_25_groups_0 = const()[name = string("gate_25_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> gate_25 = conv(dilations = gate_25_dilations_0, groups = gate_25_groups_0, pad = gate_25_pad_0, pad_type = gate_25_pad_type_0, strides = gate_25_strides_0, weight = layers_6_mlp_gate_proj_weight_palettized, x = input_157)[name = string("gate_25")];
	string up_13_pad_type_0 = const()[name = string("up_13_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> up_13_strides_0 = const()[name = string("up_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> up_13_pad_0 = const()[name = string("up_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> up_13_dilations_0 = const()[name = string("up_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 up_13_groups_0 = const()[name = string("up_13_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> up_13 = conv(dilations = up_13_dilations_0, groups = up_13_groups_0, pad = up_13_pad_0, pad_type = up_13_pad_type_0, strides = up_13_strides_0, weight = layers_6_mlp_up_proj_weight_palettized, x = input_157)[name = string("up_13")];
	string gate_27_mode_0 = const()[name = string("gate_27_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 1]> gate_27 = gelu(mode = gate_27_mode_0, x = gate_25)[name = string("gate_27")];
	tensor<fp16, [1, 12288, 1, 1]> input_159 = mul(x = gate_27, y = up_13)[name = string("input_159")];
	string mlp_out_13_pad_type_0 = const()[name = string("mlp_out_13_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_out_13_strides_0 = const()[name = string("mlp_out_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_out_13_pad_0 = const()[name = string("mlp_out_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_out_13_dilations_0 = const()[name = string("mlp_out_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_out_13_groups_0 = const()[name = string("mlp_out_13_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 1]> mlp_out_13 = conv(dilations = mlp_out_13_dilations_0, groups = mlp_out_13_groups_0, pad = mlp_out_13_pad_0, pad_type = mlp_out_13_pad_type_0, strides = mlp_out_13_strides_0, weight = layers_6_mlp_down_proj_weight_palettized, x = input_159)[name = string("mlp_out_13")];
	tensor<int32, [1]> var_3002_axes_0 = const()[name = string("op_3002_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_3002 = squeeze(axes = var_3002_axes_0, x = mlp_out_13)[name = string("op_3002")];
	tensor<int32, [3]> var_3006 = const()[name = string("op_3006"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_3012 = const()[name = string("op_3012"), val = int32(-1)];
	fp16 const_53_promoted = const()[name = string("const_53_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_107 = transpose(perm = var_3006, x = var_3002)[name = string("transpose_33")];
	tensor<fp16, [1, 1, 1536]> var_3014 = mul(x = x_107, y = const_53_promoted)[name = string("op_3014")];
	bool input_161_interleave_0 = const()[name = string("input_161_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_161 = concat(axis = var_3012, interleave = input_161_interleave_0, values = (x_107, var_3014))[name = string("input_161")];
	tensor<int32, [1]> normed_161_axes_0 = const()[name = string("normed_161_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3009_to_fp16 = const()[name = string("op_3009_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_161_cast_fp16 = layer_norm(axes = normed_161_axes_0, epsilon = var_3009_to_fp16, x = input_161)[name = string("normed_161_cast_fp16")];
	tensor<int32, [2]> var_3019_split_sizes_0 = const()[name = string("op_3019_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3019_axis_0 = const()[name = string("op_3019_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3019_0, tensor<fp16, [1, 1, 1536]> var_3019_1 = split(axis = var_3019_axis_0, split_sizes = var_3019_split_sizes_0, x = normed_161_cast_fp16)[name = string("op_3019")];
	tensor<fp16, [1, 1, 1536]> hidden_states_63 = mul(x = var_3019_0, y = layers_6_post_feedforward_layernorm_weight)[name = string("hidden_states_63")];
	tensor<fp16, [1, 1, 1536]> hidden_states_65_cast_fp16 = add(x = x_105_cast_fp16, y = hidden_states_63)[name = string("hidden_states_65_cast_fp16")];
	tensor<int32, [3]> per_layer_slice_13_begin_0 = const()[name = string("per_layer_slice_13_begin_0"), val = tensor<int32, [3]>([0, 0, 5376])];
	tensor<int32, [3]> per_layer_slice_13_end_0 = const()[name = string("per_layer_slice_13_end_0"), val = tensor<int32, [3]>([1, 1, 5632])];
	tensor<bool, [3]> per_layer_slice_13_end_mask_0 = const()[name = string("per_layer_slice_13_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 1, 256]> per_layer_slice_13_cast_fp16 = slice_by_index(begin = per_layer_slice_13_begin_0, end = per_layer_slice_13_end_0, end_mask = per_layer_slice_13_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_13_cast_fp16")];
	tensor<int32, [3]> var_3047 = const()[name = string("op_3047"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_163_axes_0 = const()[name = string("input_163_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_3048 = transpose(perm = var_3047, x = hidden_states_65_cast_fp16)[name = string("transpose_32")];
	tensor<fp16, [1, 1536, 1, 1]> input_163 = expand_dims(axes = input_163_axes_0, x = var_3048)[name = string("input_163")];
	string gated_37_pad_type_0 = const()[name = string("gated_37_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_37_strides_0 = const()[name = string("gated_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_37_pad_0 = const()[name = string("gated_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_37_dilations_0 = const()[name = string("gated_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_37_groups_0 = const()[name = string("gated_37_groups_0"), val = int32(1)];
	tensor<fp16, [1, 256, 1, 1]> gated_37 = conv(dilations = gated_37_dilations_0, groups = gated_37_groups_0, pad = gated_37_pad_0, pad_type = gated_37_pad_type_0, strides = gated_37_strides_0, weight = layers_6_per_layer_input_gate_weight_palettized, x = input_163)[name = string("gated_37")];
	string gated_39_mode_0 = const()[name = string("gated_39_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 256, 1, 1]> gated_39 = gelu(mode = gated_39_mode_0, x = gated_37)[name = string("gated_39")];
	tensor<int32, [3]> var_3067 = const()[name = string("op_3067"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> per_layer_slice_conv_13_axes_0 = const()[name = string("per_layer_slice_conv_13_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 256, 1]> var_3068_cast_fp16 = transpose(perm = var_3067, x = per_layer_slice_13_cast_fp16)[name = string("transpose_31")];
	tensor<fp16, [1, 256, 1, 1]> per_layer_slice_conv_13_cast_fp16 = expand_dims(axes = per_layer_slice_conv_13_axes_0, x = var_3068_cast_fp16)[name = string("per_layer_slice_conv_13_cast_fp16")];
	tensor<fp16, [1, 256, 1, 1]> input_165_cast_fp16 = mul(x = gated_39, y = per_layer_slice_conv_13_cast_fp16)[name = string("input_165_cast_fp16")];
	string gated_41_pad_type_0 = const()[name = string("gated_41_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_41_strides_0 = const()[name = string("gated_41_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_41_pad_0 = const()[name = string("gated_41_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_41_dilations_0 = const()[name = string("gated_41_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_41_groups_0 = const()[name = string("gated_41_groups_0"), val = int32(1)];
	tensor<fp16, [1536, 256, 1, 1]> layers_6_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318152576))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318349248))))[name = string("layers_6_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1536, 1, 1]> gated_41_cast_fp16 = conv(dilations = gated_41_dilations_0, groups = gated_41_groups_0, pad = gated_41_pad_0, pad_type = gated_41_pad_type_0, strides = gated_41_strides_0, weight = layers_6_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_165_cast_fp16)[name = string("gated_41_cast_fp16")];
	tensor<int32, [1]> var_3084_axes_0 = const()[name = string("op_3084_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_3084_cast_fp16 = squeeze(axes = var_3084_axes_0, x = gated_41_cast_fp16)[name = string("op_3084_cast_fp16")];
	tensor<int32, [3]> var_3088 = const()[name = string("op_3088"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_3094 = const()[name = string("op_3094"), val = int32(-1)];
	fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_109_cast_fp16 = transpose(perm = var_3088, x = var_3084_cast_fp16)[name = string("transpose_30")];
	tensor<fp16, [1, 1, 1536]> var_3096_cast_fp16 = mul(x = x_109_cast_fp16, y = const_54_promoted_to_fp16)[name = string("op_3096_cast_fp16")];
	bool input_167_interleave_0 = const()[name = string("input_167_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_167_cast_fp16 = concat(axis = var_3094, interleave = input_167_interleave_0, values = (x_109_cast_fp16, var_3096_cast_fp16))[name = string("input_167_cast_fp16")];
	tensor<int32, [1]> normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3091_to_fp16 = const()[name = string("op_3091_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_3091_to_fp16, x = input_167_cast_fp16)[name = string("normed_165_cast_fp16")];
	tensor<int32, [2]> var_3101_split_sizes_0 = const()[name = string("op_3101_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3101_axis_0 = const()[name = string("op_3101_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3101_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3101_cast_fp16_1 = split(axis = var_3101_axis_0, split_sizes = var_3101_split_sizes_0, x = normed_165_cast_fp16)[name = string("op_3101_cast_fp16")];
	tensor<fp16, [1536]> layers_6_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_6_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318350848)))];
	tensor<fp16, [1, 1, 1536]> hidden_states_69_cast_fp16 = mul(x = var_3101_cast_fp16_0, y = layers_6_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_69_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_71_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = hidden_states_69_cast_fp16)[name = string("hidden_states_71_cast_fp16")];
	tensor<fp16, [1]> const_55_promoted_to_fp16 = const()[name = string("const_55_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.4ap-1])];
	tensor<fp16, [1, 1, 1536]> x_111_cast_fp16 = mul(x = hidden_states_71_cast_fp16, y = const_55_promoted_to_fp16)[name = string("x_111_cast_fp16")];
	int32 var_3116 = const()[name = string("op_3116"), val = int32(-1)];
	fp16 const_56_promoted_to_fp16 = const()[name = string("const_56_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_3118_cast_fp16 = mul(x = x_111_cast_fp16, y = const_56_promoted_to_fp16)[name = string("op_3118_cast_fp16")];
	bool input_169_interleave_0 = const()[name = string("input_169_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_169_cast_fp16 = concat(axis = var_3116, interleave = input_169_interleave_0, values = (x_111_cast_fp16, var_3118_cast_fp16))[name = string("input_169_cast_fp16")];
	tensor<int32, [1]> normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3113_to_fp16 = const()[name = string("op_3113_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_3113_to_fp16, x = input_169_cast_fp16)[name = string("normed_169_cast_fp16")];
	tensor<int32, [2]> var_3123_split_sizes_0 = const()[name = string("op_3123_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3123_axis_0 = const()[name = string("op_3123_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3123_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3123_cast_fp16_1 = split(axis = var_3123_axis_0, split_sizes = var_3123_split_sizes_0, x = normed_169_cast_fp16)[name = string("op_3123_cast_fp16")];
	tensor<fp16, [1536]> layers_7_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318353984)))];
	tensor<fp16, [1, 1, 1536]> h_43_cast_fp16 = mul(x = var_3123_cast_fp16_0, y = layers_7_input_layernorm_weight_promoted_to_fp16)[name = string("h_43_cast_fp16")];
	tensor<int32, [3]> var_3129 = const()[name = string("op_3129"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_3132_axes_0 = const()[name = string("op_3132_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_3130_cast_fp16 = transpose(perm = var_3129, x = h_43_cast_fp16)[name = string("transpose_29")];
	tensor<fp16, [1, 1536, 1, 1]> var_3132_cast_fp16 = expand_dims(axes = var_3132_axes_0, x = var_3130_cast_fp16)[name = string("op_3132_cast_fp16")];
	string var_3148_pad_type_0 = const()[name = string("op_3148_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_3148_strides_0 = const()[name = string("op_3148_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_3148_pad_0 = const()[name = string("op_3148_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_3148_dilations_0 = const()[name = string("op_3148_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_3148_groups_0 = const()[name = string("op_3148_groups_0"), val = int32(1)];
	tensor<fp16, [1, 2048, 1, 1]> var_3148 = conv(dilations = var_3148_dilations_0, groups = var_3148_groups_0, pad = var_3148_pad_0, pad_type = var_3148_pad_type_0, strides = var_3148_strides_0, weight = layers_7_self_attn_q_proj_weight_palettized, x = var_3132_cast_fp16)[name = string("op_3148")];
	tensor<int32, [4]> var_3153 = const()[name = string("op_3153"), val = tensor<int32, [4]>([1, 8, 256, 1])];
	tensor<fp16, [1, 8, 256, 1]> var_3154 = reshape(shape = var_3153, x = var_3148)[name = string("op_3154")];
	tensor<int32, [4]> var_3159 = const()[name = string("op_3159"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	tensor<int32, [3]> var_3169 = const()[name = string("op_3169"), val = tensor<int32, [3]>([1, 8, 256])];
	tensor<fp16, [1, 8, 1, 256]> var_3160 = transpose(perm = var_3159, x = var_3154)[name = string("transpose_28")];
	tensor<fp16, [1, 8, 256]> x_113 = reshape(shape = var_3169, x = var_3160)[name = string("x_113")];
	int32 var_3175 = const()[name = string("op_3175"), val = int32(-1)];
	fp16 const_57_promoted = const()[name = string("const_57_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 256]> var_3177 = mul(x = x_113, y = const_57_promoted)[name = string("op_3177")];
	bool input_173_interleave_0 = const()[name = string("input_173_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512]> input_173 = concat(axis = var_3175, interleave = input_173_interleave_0, values = (x_113, var_3177))[name = string("input_173")];
	tensor<int32, [1]> normed_173_axes_0 = const()[name = string("normed_173_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3172_to_fp16 = const()[name = string("op_3172_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 8, 512]> normed_173_cast_fp16 = layer_norm(axes = normed_173_axes_0, epsilon = var_3172_to_fp16, x = input_173)[name = string("normed_173_cast_fp16")];
	tensor<int32, [2]> var_3182_split_sizes_0 = const()[name = string("op_3182_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_3182_axis_0 = const()[name = string("op_3182_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 256]> var_3182_0, tensor<fp16, [1, 8, 256]> var_3182_1 = split(axis = var_3182_axis_0, split_sizes = var_3182_split_sizes_0, x = normed_173_cast_fp16)[name = string("op_3182")];
	tensor<fp16, [1, 8, 256]> var_3184 = mul(x = var_3182_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_3184")];
	tensor<int32, [4]> var_3189 = const()[name = string("op_3189"), val = tensor<int32, [4]>([1, 8, 1, 256])];
	tensor<fp16, [1, 8, 1, 256]> q_45 = reshape(shape = var_3189, x = var_3184)[name = string("q_45")];
	tensor<fp16, [1, 8, 1, 256]> var_3191_cast_fp16 = mul(x = q_45, y = cos_s)[name = string("op_3191_cast_fp16")];
	tensor<int32, [2]> var_3192_split_sizes_0 = const()[name = string("op_3192_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_3192_axis_0 = const()[name = string("op_3192_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 128]> var_3192_0, tensor<fp16, [1, 8, 1, 128]> var_3192_1 = split(axis = var_3192_axis_0, split_sizes = var_3192_split_sizes_0, x = q_45)[name = string("op_3192")];
	fp16 const_58_promoted = const()[name = string("const_58_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 128]> var_3194 = mul(x = var_3192_1, y = const_58_promoted)[name = string("op_3194")];
	int32 var_3196 = const()[name = string("op_3196"), val = int32(-1)];
	bool var_3197_interleave_0 = const()[name = string("op_3197_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> var_3197 = concat(axis = var_3196, interleave = var_3197_interleave_0, values = (var_3194, var_3192_0))[name = string("op_3197")];
	tensor<fp16, [1, 8, 1, 256]> var_3198_cast_fp16 = mul(x = var_3197, y = sin_s)[name = string("op_3198_cast_fp16")];
	tensor<fp16, [1, 8, 1, 256]> q_47_cast_fp16 = add(x = var_3191_cast_fp16, y = var_3198_cast_fp16)[name = string("q_47_cast_fp16")];
	bool attn_weights_29_transpose_x_0 = const()[name = string("attn_weights_29_transpose_x_0"), val = bool(false)];
	bool attn_weights_29_transpose_y_0 = const()[name = string("attn_weights_29_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 512]> attn_weights_29_cast_fp16 = matmul(transpose_x = attn_weights_29_transpose_x_0, transpose_y = attn_weights_29_transpose_y_0, x = q_47_cast_fp16, y = transpose_40_cast_fp16)[name = string("attn_weights_29_cast_fp16")];
	tensor<fp16, [1, 8, 1, 512]> x_115_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = causal_mask_sliding)[name = string("x_115_cast_fp16")];
	tensor<int32, [1]> reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor<int32, [1]>([-1])];
	bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 1, 1]> reduce_max_7 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_115_cast_fp16)[name = string("reduce_max_7")];
	tensor<fp16, [1, 8, 1, 512]> var_3230 = sub(x = x_115_cast_fp16, y = reduce_max_7)[name = string("op_3230")];
	tensor<fp16, [1, 8, 1, 512]> var_3236 = exp(x = var_3230)[name = string("op_3236")];
	tensor<int32, [1]> var_3246_axes_0 = const()[name = string("op_3246_axes_0"), val = tensor<int32, [1]>([-1])];
	bool var_3246_keep_dims_0 = const()[name = string("op_3246_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 1, 1]> var_3246 = reduce_sum(axes = var_3246_axes_0, keep_dims = var_3246_keep_dims_0, x = var_3236)[name = string("op_3246")];
	tensor<fp16, [1, 8, 1, 512]> var_3252_cast_fp16 = real_div(x = var_3236, y = var_3246)[name = string("op_3252_cast_fp16")];
	bool attn_output_43_transpose_x_0 = const()[name = string("attn_output_43_transpose_x_0"), val = bool(false)];
	bool attn_output_43_transpose_y_0 = const()[name = string("attn_output_43_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> attn_output_43_cast_fp16 = matmul(transpose_x = attn_output_43_transpose_x_0, transpose_y = attn_output_43_transpose_y_0, x = var_3252_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_43_cast_fp16")];
	tensor<int32, [4]> var_3263 = const()[name = string("op_3263"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_3270 = const()[name = string("op_3270"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 8, 256]> var_3264_cast_fp16 = transpose(perm = var_3263, x = attn_output_43_cast_fp16)[name = string("transpose_27")];
	tensor<fp16, [1, 1, 2048]> attn_output_45_cast_fp16 = reshape(shape = var_3270, x = var_3264_cast_fp16)[name = string("attn_output_45_cast_fp16")];
	tensor<int32, [3]> var_3275 = const()[name = string("op_3275"), val = tensor<int32, [3]>([0, 2, 1])];
	string var_3291_pad_type_0 = const()[name = string("op_3291_pad_type_0"), val = string("valid")];
	int32 var_3291_groups_0 = const()[name = string("op_3291_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_3291_strides_0 = const()[name = string("op_3291_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_3291_pad_0 = const()[name = string("op_3291_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_3291_dilations_0 = const()[name = string("op_3291_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1536, 2048, 1]> squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318357120))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319930048))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")];
	tensor<fp16, [1, 2048, 1]> var_3276_cast_fp16 = transpose(perm = var_3275, x = attn_output_45_cast_fp16)[name = string("transpose_26")];
	tensor<fp16, [1, 1536, 1]> var_3291_cast_fp16 = conv(dilations = var_3291_dilations_0, groups = var_3291_groups_0, pad = var_3291_pad_0, pad_type = var_3291_pad_type_0, strides = var_3291_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_3276_cast_fp16)[name = string("op_3291_cast_fp16")];
	tensor<int32, [3]> var_3295 = const()[name = string("op_3295"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_3301 = const()[name = string("op_3301"), val = int32(-1)];
	fp16 const_59_promoted_to_fp16 = const()[name = string("const_59_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_119_cast_fp16 = transpose(perm = var_3295, x = var_3291_cast_fp16)[name = string("transpose_25")];
	tensor<fp16, [1, 1, 1536]> var_3303_cast_fp16 = mul(x = x_119_cast_fp16, y = const_59_promoted_to_fp16)[name = string("op_3303_cast_fp16")];
	bool input_177_interleave_0 = const()[name = string("input_177_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_177_cast_fp16 = concat(axis = var_3301, interleave = input_177_interleave_0, values = (x_119_cast_fp16, var_3303_cast_fp16))[name = string("input_177_cast_fp16")];
	tensor<int32, [1]> normed_177_axes_0 = const()[name = string("normed_177_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3298_to_fp16 = const()[name = string("op_3298_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_177_cast_fp16 = layer_norm(axes = normed_177_axes_0, epsilon = var_3298_to_fp16, x = input_177_cast_fp16)[name = string("normed_177_cast_fp16")];
	tensor<int32, [2]> var_3308_split_sizes_0 = const()[name = string("op_3308_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3308_axis_0 = const()[name = string("op_3308_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3308_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3308_cast_fp16_1 = split(axis = var_3308_axis_0, split_sizes = var_3308_split_sizes_0, x = normed_177_cast_fp16)[name = string("op_3308_cast_fp16")];
	tensor<fp16, [1536]> layers_7_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319931648)))];
	tensor<fp16, [1, 1, 1536]> attn_output_47_cast_fp16 = mul(x = var_3308_cast_fp16_0, y = layers_7_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_47_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> x_121_cast_fp16 = add(x = x_111_cast_fp16, y = attn_output_47_cast_fp16)[name = string("x_121_cast_fp16")];
	int32 var_3317 = const()[name = string("op_3317"), val = int32(-1)];
	fp16 const_60_promoted_to_fp16 = const()[name = string("const_60_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_3319_cast_fp16 = mul(x = x_121_cast_fp16, y = const_60_promoted_to_fp16)[name = string("op_3319_cast_fp16")];
	bool input_179_interleave_0 = const()[name = string("input_179_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_179_cast_fp16 = concat(axis = var_3317, interleave = input_179_interleave_0, values = (x_121_cast_fp16, var_3319_cast_fp16))[name = string("input_179_cast_fp16")];
	tensor<int32, [1]> normed_181_axes_0 = const()[name = string("normed_181_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3314_to_fp16 = const()[name = string("op_3314_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_181_cast_fp16 = layer_norm(axes = normed_181_axes_0, epsilon = var_3314_to_fp16, x = input_179_cast_fp16)[name = string("normed_181_cast_fp16")];
	tensor<int32, [2]> var_3324_split_sizes_0 = const()[name = string("op_3324_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3324_axis_0 = const()[name = string("op_3324_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3324_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3324_cast_fp16_1 = split(axis = var_3324_axis_0, split_sizes = var_3324_split_sizes_0, x = normed_181_cast_fp16)[name = string("op_3324_cast_fp16")];
	tensor<fp16, [1536]> layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319934784)))];
	tensor<fp16, [1, 1, 1536]> h_45_cast_fp16 = mul(x = var_3324_cast_fp16_0, y = layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_45_cast_fp16")];
	tensor<int32, [3]> var_3335 = const()[name = string("op_3335"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_181_axes_0 = const()[name = string("input_181_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_3336 = transpose(perm = var_3335, x = h_45_cast_fp16)[name = string("transpose_24")];
	tensor<fp16, [1, 1536, 1, 1]> input_181 = expand_dims(axes = input_181_axes_0, x = var_3336)[name = string("input_181")];
	string gate_29_pad_type_0 = const()[name = string("gate_29_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gate_29_strides_0 = const()[name = string("gate_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gate_29_pad_0 = const()[name = string("gate_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gate_29_dilations_0 = const()[name = string("gate_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gate_29_groups_0 = const()[name = string("gate_29_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> gate_29 = conv(dilations = gate_29_dilations_0, groups = gate_29_groups_0, pad = gate_29_pad_0, pad_type = gate_29_pad_type_0, strides = gate_29_strides_0, weight = layers_7_mlp_gate_proj_weight_palettized, x = input_181)[name = string("gate_29")];
	string up_15_pad_type_0 = const()[name = string("up_15_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> up_15_strides_0 = const()[name = string("up_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> up_15_pad_0 = const()[name = string("up_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> up_15_dilations_0 = const()[name = string("up_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 up_15_groups_0 = const()[name = string("up_15_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> up_15 = conv(dilations = up_15_dilations_0, groups = up_15_groups_0, pad = up_15_pad_0, pad_type = up_15_pad_type_0, strides = up_15_strides_0, weight = layers_7_mlp_up_proj_weight_palettized, x = input_181)[name = string("up_15")];
	string gate_31_mode_0 = const()[name = string("gate_31_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 1]> gate_31 = gelu(mode = gate_31_mode_0, x = gate_29)[name = string("gate_31")];
	tensor<fp16, [1, 12288, 1, 1]> input_183 = mul(x = gate_31, y = up_15)[name = string("input_183")];
	string mlp_out_15_pad_type_0 = const()[name = string("mlp_out_15_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_out_15_strides_0 = const()[name = string("mlp_out_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_out_15_pad_0 = const()[name = string("mlp_out_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_out_15_dilations_0 = const()[name = string("mlp_out_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_out_15_groups_0 = const()[name = string("mlp_out_15_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 1]> mlp_out_15 = conv(dilations = mlp_out_15_dilations_0, groups = mlp_out_15_groups_0, pad = mlp_out_15_pad_0, pad_type = mlp_out_15_pad_type_0, strides = mlp_out_15_strides_0, weight = layers_7_mlp_down_proj_weight_palettized, x = input_183)[name = string("mlp_out_15")];
	tensor<int32, [1]> var_3376_axes_0 = const()[name = string("op_3376_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_3376 = squeeze(axes = var_3376_axes_0, x = mlp_out_15)[name = string("op_3376")];
	tensor<int32, [3]> var_3380 = const()[name = string("op_3380"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_3386 = const()[name = string("op_3386"), val = int32(-1)];
	fp16 const_61_promoted = const()[name = string("const_61_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_123 = transpose(perm = var_3380, x = var_3376)[name = string("transpose_23")];
	tensor<fp16, [1, 1, 1536]> var_3388 = mul(x = x_123, y = const_61_promoted)[name = string("op_3388")];
	bool input_185_interleave_0 = const()[name = string("input_185_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_185 = concat(axis = var_3386, interleave = input_185_interleave_0, values = (x_123, var_3388))[name = string("input_185")];
	tensor<int32, [1]> normed_185_axes_0 = const()[name = string("normed_185_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3383_to_fp16 = const()[name = string("op_3383_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_185_cast_fp16 = layer_norm(axes = normed_185_axes_0, epsilon = var_3383_to_fp16, x = input_185)[name = string("normed_185_cast_fp16")];
	tensor<int32, [2]> var_3393_split_sizes_0 = const()[name = string("op_3393_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3393_axis_0 = const()[name = string("op_3393_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3393_0, tensor<fp16, [1, 1, 1536]> var_3393_1 = split(axis = var_3393_axis_0, split_sizes = var_3393_split_sizes_0, x = normed_185_cast_fp16)[name = string("op_3393")];
	tensor<fp16, [1, 1, 1536]> hidden_states_73 = mul(x = var_3393_0, y = layers_7_post_feedforward_layernorm_weight)[name = string("hidden_states_73")];
	tensor<fp16, [1, 1, 1536]> hidden_states_75_cast_fp16 = add(x = x_121_cast_fp16, y = hidden_states_73)[name = string("hidden_states_75_cast_fp16")];
	tensor<int32, [3]> per_layer_slice_15_begin_0 = const()[name = string("per_layer_slice_15_begin_0"), val = tensor<int32, [3]>([0, 0, 5632])];
	tensor<int32, [3]> per_layer_slice_15_end_0 = const()[name = string("per_layer_slice_15_end_0"), val = tensor<int32, [3]>([1, 1, 5888])];
	tensor<bool, [3]> per_layer_slice_15_end_mask_0 = const()[name = string("per_layer_slice_15_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 1, 256]> per_layer_slice_15_cast_fp16 = slice_by_index(begin = per_layer_slice_15_begin_0, end = per_layer_slice_15_end_0, end_mask = per_layer_slice_15_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_15_cast_fp16")];
	tensor<int32, [3]> var_3421 = const()[name = string("op_3421"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_187_axes_0 = const()[name = string("input_187_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_3422 = transpose(perm = var_3421, x = hidden_states_75_cast_fp16)[name = string("transpose_22")];
	tensor<fp16, [1, 1536, 1, 1]> input_187 = expand_dims(axes = input_187_axes_0, x = var_3422)[name = string("input_187")];
	string gated_43_pad_type_0 = const()[name = string("gated_43_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_43_strides_0 = const()[name = string("gated_43_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_43_pad_0 = const()[name = string("gated_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_43_dilations_0 = const()[name = string("gated_43_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_43_groups_0 = const()[name = string("gated_43_groups_0"), val = int32(1)];
	tensor<fp16, [1, 256, 1, 1]> gated_43 = conv(dilations = gated_43_dilations_0, groups = gated_43_groups_0, pad = gated_43_pad_0, pad_type = gated_43_pad_type_0, strides = gated_43_strides_0, weight = layers_7_per_layer_input_gate_weight_palettized, x = input_187)[name = string("gated_43")];
	string gated_45_mode_0 = const()[name = string("gated_45_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 256, 1, 1]> gated_45 = gelu(mode = gated_45_mode_0, x = gated_43)[name = string("gated_45")];
	tensor<int32, [3]> var_3441 = const()[name = string("op_3441"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> per_layer_slice_conv_15_axes_0 = const()[name = string("per_layer_slice_conv_15_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 256, 1]> var_3442_cast_fp16 = transpose(perm = var_3441, x = per_layer_slice_15_cast_fp16)[name = string("transpose_21")];
	tensor<fp16, [1, 256, 1, 1]> per_layer_slice_conv_15_cast_fp16 = expand_dims(axes = per_layer_slice_conv_15_axes_0, x = var_3442_cast_fp16)[name = string("per_layer_slice_conv_15_cast_fp16")];
	tensor<fp16, [1, 256, 1, 1]> input_189_cast_fp16 = mul(x = gated_45, y = per_layer_slice_conv_15_cast_fp16)[name = string("input_189_cast_fp16")];
	string gated_47_pad_type_0 = const()[name = string("gated_47_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_47_strides_0 = const()[name = string("gated_47_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_47_pad_0 = const()[name = string("gated_47_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_47_dilations_0 = const()[name = string("gated_47_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_47_groups_0 = const()[name = string("gated_47_groups_0"), val = int32(1)];
	tensor<fp16, [1536, 256, 1, 1]> layers_7_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319937920))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320134592))))[name = string("layers_7_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1536, 1, 1]> gated_47_cast_fp16 = conv(dilations = gated_47_dilations_0, groups = gated_47_groups_0, pad = gated_47_pad_0, pad_type = gated_47_pad_type_0, strides = gated_47_strides_0, weight = layers_7_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_189_cast_fp16)[name = string("gated_47_cast_fp16")];
	tensor<int32, [1]> var_3458_axes_0 = const()[name = string("op_3458_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_3458_cast_fp16 = squeeze(axes = var_3458_axes_0, x = gated_47_cast_fp16)[name = string("op_3458_cast_fp16")];
	tensor<int32, [3]> var_3462 = const()[name = string("op_3462"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_3468 = const()[name = string("op_3468"), val = int32(-1)];
	fp16 const_62_promoted_to_fp16 = const()[name = string("const_62_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_125_cast_fp16 = transpose(perm = var_3462, x = var_3458_cast_fp16)[name = string("transpose_20")];
	tensor<fp16, [1, 1, 1536]> var_3470_cast_fp16 = mul(x = x_125_cast_fp16, y = const_62_promoted_to_fp16)[name = string("op_3470_cast_fp16")];
	bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_191_cast_fp16 = concat(axis = var_3468, interleave = input_191_interleave_0, values = (x_125_cast_fp16, var_3470_cast_fp16))[name = string("input_191_cast_fp16")];
	tensor<int32, [1]> normed_189_axes_0 = const()[name = string("normed_189_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3465_to_fp16 = const()[name = string("op_3465_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_189_cast_fp16 = layer_norm(axes = normed_189_axes_0, epsilon = var_3465_to_fp16, x = input_191_cast_fp16)[name = string("normed_189_cast_fp16")];
	tensor<int32, [2]> var_3475_split_sizes_0 = const()[name = string("op_3475_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3475_axis_0 = const()[name = string("op_3475_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3475_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3475_cast_fp16_1 = split(axis = var_3475_axis_0, split_sizes = var_3475_split_sizes_0, x = normed_189_cast_fp16)[name = string("op_3475_cast_fp16")];
	tensor<fp16, [1536]> layers_7_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_7_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320136192)))];
	tensor<fp16, [1, 1, 1536]> hidden_states_79_cast_fp16 = mul(x = var_3475_cast_fp16_0, y = layers_7_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_79_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_81_cast_fp16 = add(x = hidden_states_75_cast_fp16, y = hidden_states_79_cast_fp16)[name = string("hidden_states_81_cast_fp16")];
	tensor<fp16, [1]> const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.44p-1])];
	tensor<fp16, [1, 1, 1536]> x_127_cast_fp16 = mul(x = hidden_states_81_cast_fp16, y = const_63_promoted_to_fp16)[name = string("x_127_cast_fp16")];
	int32 var_3490 = const()[name = string("op_3490"), val = int32(-1)];
	fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_3492_cast_fp16 = mul(x = x_127_cast_fp16, y = const_64_promoted_to_fp16)[name = string("op_3492_cast_fp16")];
	bool input_193_interleave_0 = const()[name = string("input_193_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_193_cast_fp16 = concat(axis = var_3490, interleave = input_193_interleave_0, values = (x_127_cast_fp16, var_3492_cast_fp16))[name = string("input_193_cast_fp16")];
	tensor<int32, [1]> normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3487_to_fp16 = const()[name = string("op_3487_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_3487_to_fp16, x = input_193_cast_fp16)[name = string("normed_193_cast_fp16")];
	tensor<int32, [2]> var_3497_split_sizes_0 = const()[name = string("op_3497_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3497_axis_0 = const()[name = string("op_3497_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3497_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3497_cast_fp16_1 = split(axis = var_3497_axis_0, split_sizes = var_3497_split_sizes_0, x = normed_193_cast_fp16)[name = string("op_3497_cast_fp16")];
	tensor<fp16, [1536]> layers_8_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320139328)))];
	tensor<fp16, [1, 1, 1536]> h_49_cast_fp16 = mul(x = var_3497_cast_fp16_0, y = layers_8_input_layernorm_weight_promoted_to_fp16)[name = string("h_49_cast_fp16")];
	tensor<int32, [3]> var_3503 = const()[name = string("op_3503"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_3506_axes_0 = const()[name = string("op_3506_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_3504_cast_fp16 = transpose(perm = var_3503, x = h_49_cast_fp16)[name = string("transpose_19")];
	tensor<fp16, [1, 1536, 1, 1]> var_3506_cast_fp16 = expand_dims(axes = var_3506_axes_0, x = var_3504_cast_fp16)[name = string("op_3506_cast_fp16")];
	string var_3522_pad_type_0 = const()[name = string("op_3522_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_3522_strides_0 = const()[name = string("op_3522_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_3522_pad_0 = const()[name = string("op_3522_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_3522_dilations_0 = const()[name = string("op_3522_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_3522_groups_0 = const()[name = string("op_3522_groups_0"), val = int32(1)];
	tensor<fp16, [1, 2048, 1, 1]> var_3522 = conv(dilations = var_3522_dilations_0, groups = var_3522_groups_0, pad = var_3522_pad_0, pad_type = var_3522_pad_type_0, strides = var_3522_strides_0, weight = layers_8_self_attn_q_proj_weight_palettized, x = var_3506_cast_fp16)[name = string("op_3522")];
	tensor<int32, [4]> var_3527 = const()[name = string("op_3527"), val = tensor<int32, [4]>([1, 8, 256, 1])];
	tensor<fp16, [1, 8, 256, 1]> var_3528 = reshape(shape = var_3527, x = var_3522)[name = string("op_3528")];
	tensor<int32, [4]> var_3533 = const()[name = string("op_3533"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	tensor<int32, [3]> var_3543 = const()[name = string("op_3543"), val = tensor<int32, [3]>([1, 8, 256])];
	tensor<fp16, [1, 8, 1, 256]> var_3534 = transpose(perm = var_3533, x = var_3528)[name = string("transpose_18")];
	tensor<fp16, [1, 8, 256]> x_129 = reshape(shape = var_3543, x = var_3534)[name = string("x_129")];
	int32 var_3549 = const()[name = string("op_3549"), val = int32(-1)];
	fp16 const_65_promoted = const()[name = string("const_65_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 256]> var_3551 = mul(x = x_129, y = const_65_promoted)[name = string("op_3551")];
	bool input_197_interleave_0 = const()[name = string("input_197_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512]> input_197 = concat(axis = var_3549, interleave = input_197_interleave_0, values = (x_129, var_3551))[name = string("input_197")];
	tensor<int32, [1]> normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3546_to_fp16 = const()[name = string("op_3546_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 8, 512]> normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_3546_to_fp16, x = input_197)[name = string("normed_197_cast_fp16")];
	tensor<int32, [2]> var_3556_split_sizes_0 = const()[name = string("op_3556_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_3556_axis_0 = const()[name = string("op_3556_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 256]> var_3556_0, tensor<fp16, [1, 8, 256]> var_3556_1 = split(axis = var_3556_axis_0, split_sizes = var_3556_split_sizes_0, x = normed_197_cast_fp16)[name = string("op_3556")];
	tensor<fp16, [1, 8, 256]> var_3558 = mul(x = var_3556_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_3558")];
	tensor<int32, [4]> var_3563 = const()[name = string("op_3563"), val = tensor<int32, [4]>([1, 8, 1, 256])];
	tensor<fp16, [1, 8, 1, 256]> q_51 = reshape(shape = var_3563, x = var_3558)[name = string("q_51")];
	tensor<fp16, [1, 8, 1, 256]> var_3565_cast_fp16 = mul(x = q_51, y = cos_s)[name = string("op_3565_cast_fp16")];
	tensor<int32, [2]> var_3566_split_sizes_0 = const()[name = string("op_3566_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_3566_axis_0 = const()[name = string("op_3566_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 128]> var_3566_0, tensor<fp16, [1, 8, 1, 128]> var_3566_1 = split(axis = var_3566_axis_0, split_sizes = var_3566_split_sizes_0, x = q_51)[name = string("op_3566")];
	fp16 const_66_promoted = const()[name = string("const_66_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 128]> var_3568 = mul(x = var_3566_1, y = const_66_promoted)[name = string("op_3568")];
	int32 var_3570 = const()[name = string("op_3570"), val = int32(-1)];
	bool var_3571_interleave_0 = const()[name = string("op_3571_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> var_3571 = concat(axis = var_3570, interleave = var_3571_interleave_0, values = (var_3568, var_3566_0))[name = string("op_3571")];
	tensor<fp16, [1, 8, 1, 256]> var_3572_cast_fp16 = mul(x = var_3571, y = sin_s)[name = string("op_3572_cast_fp16")];
	tensor<fp16, [1, 8, 1, 256]> q_53_cast_fp16 = add(x = var_3565_cast_fp16, y = var_3572_cast_fp16)[name = string("q_53_cast_fp16")];
	bool attn_weights_33_transpose_x_0 = const()[name = string("attn_weights_33_transpose_x_0"), val = bool(false)];
	bool attn_weights_33_transpose_y_0 = const()[name = string("attn_weights_33_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 512]> attn_weights_33_cast_fp16 = matmul(transpose_x = attn_weights_33_transpose_x_0, transpose_y = attn_weights_33_transpose_y_0, x = q_53_cast_fp16, y = transpose_40_cast_fp16)[name = string("attn_weights_33_cast_fp16")];
	tensor<fp16, [1, 8, 1, 512]> x_131_cast_fp16 = add(x = attn_weights_33_cast_fp16, y = causal_mask_sliding)[name = string("x_131_cast_fp16")];
	tensor<int32, [1]> reduce_max_8_axes_0 = const()[name = string("reduce_max_8_axes_0"), val = tensor<int32, [1]>([-1])];
	bool reduce_max_8_keep_dims_0 = const()[name = string("reduce_max_8_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 1, 1]> reduce_max_8 = reduce_max(axes = reduce_max_8_axes_0, keep_dims = reduce_max_8_keep_dims_0, x = x_131_cast_fp16)[name = string("reduce_max_8")];
	tensor<fp16, [1, 8, 1, 512]> var_3604 = sub(x = x_131_cast_fp16, y = reduce_max_8)[name = string("op_3604")];
	tensor<fp16, [1, 8, 1, 512]> var_3610 = exp(x = var_3604)[name = string("op_3610")];
	tensor<int32, [1]> var_3620_axes_0 = const()[name = string("op_3620_axes_0"), val = tensor<int32, [1]>([-1])];
	bool var_3620_keep_dims_0 = const()[name = string("op_3620_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 1, 1]> var_3620 = reduce_sum(axes = var_3620_axes_0, keep_dims = var_3620_keep_dims_0, x = var_3610)[name = string("op_3620")];
	tensor<fp16, [1, 8, 1, 512]> var_3626_cast_fp16 = real_div(x = var_3610, y = var_3620)[name = string("op_3626_cast_fp16")];
	bool attn_output_49_transpose_x_0 = const()[name = string("attn_output_49_transpose_x_0"), val = bool(false)];
	bool attn_output_49_transpose_y_0 = const()[name = string("attn_output_49_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_0, transpose_y = attn_output_49_transpose_y_0, x = var_3626_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_49_cast_fp16")];
	tensor<int32, [4]> var_3637 = const()[name = string("op_3637"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_3644 = const()[name = string("op_3644"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 8, 256]> var_3638_cast_fp16 = transpose(perm = var_3637, x = attn_output_49_cast_fp16)[name = string("transpose_17")];
	tensor<fp16, [1, 1, 2048]> attn_output_51_cast_fp16 = reshape(shape = var_3644, x = var_3638_cast_fp16)[name = string("attn_output_51_cast_fp16")];
	tensor<int32, [3]> var_3649 = const()[name = string("op_3649"), val = tensor<int32, [3]>([0, 2, 1])];
	string var_3665_pad_type_0 = const()[name = string("op_3665_pad_type_0"), val = string("valid")];
	int32 var_3665_groups_0 = const()[name = string("op_3665_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_3665_strides_0 = const()[name = string("op_3665_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_3665_pad_0 = const()[name = string("op_3665_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_3665_dilations_0 = const()[name = string("op_3665_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1536, 2048, 1]> squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320142464))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321715392))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")];
	tensor<fp16, [1, 2048, 1]> var_3650_cast_fp16 = transpose(perm = var_3649, x = attn_output_51_cast_fp16)[name = string("transpose_16")];
	tensor<fp16, [1, 1536, 1]> var_3665_cast_fp16 = conv(dilations = var_3665_dilations_0, groups = var_3665_groups_0, pad = var_3665_pad_0, pad_type = var_3665_pad_type_0, strides = var_3665_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_3650_cast_fp16)[name = string("op_3665_cast_fp16")];
	tensor<int32, [3]> var_3669 = const()[name = string("op_3669"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_3675 = const()[name = string("op_3675"), val = int32(-1)];
	fp16 const_67_promoted_to_fp16 = const()[name = string("const_67_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_135_cast_fp16 = transpose(perm = var_3669, x = var_3665_cast_fp16)[name = string("transpose_15")];
	tensor<fp16, [1, 1, 1536]> var_3677_cast_fp16 = mul(x = x_135_cast_fp16, y = const_67_promoted_to_fp16)[name = string("op_3677_cast_fp16")];
	bool input_201_interleave_0 = const()[name = string("input_201_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_201_cast_fp16 = concat(axis = var_3675, interleave = input_201_interleave_0, values = (x_135_cast_fp16, var_3677_cast_fp16))[name = string("input_201_cast_fp16")];
	tensor<int32, [1]> normed_201_axes_0 = const()[name = string("normed_201_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3672_to_fp16 = const()[name = string("op_3672_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_201_cast_fp16 = layer_norm(axes = normed_201_axes_0, epsilon = var_3672_to_fp16, x = input_201_cast_fp16)[name = string("normed_201_cast_fp16")];
	tensor<int32, [2]> var_3682_split_sizes_0 = const()[name = string("op_3682_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3682_axis_0 = const()[name = string("op_3682_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3682_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3682_cast_fp16_1 = split(axis = var_3682_axis_0, split_sizes = var_3682_split_sizes_0, x = normed_201_cast_fp16)[name = string("op_3682_cast_fp16")];
	tensor<fp16, [1536]> layers_8_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321716992)))];
	tensor<fp16, [1, 1, 1536]> attn_output_53_cast_fp16 = mul(x = var_3682_cast_fp16_0, y = layers_8_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_53_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> x_137_cast_fp16 = add(x = x_127_cast_fp16, y = attn_output_53_cast_fp16)[name = string("x_137_cast_fp16")];
	int32 var_3691 = const()[name = string("op_3691"), val = int32(-1)];
	fp16 const_68_promoted_to_fp16 = const()[name = string("const_68_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_3693_cast_fp16 = mul(x = x_137_cast_fp16, y = const_68_promoted_to_fp16)[name = string("op_3693_cast_fp16")];
	bool input_203_interleave_0 = const()[name = string("input_203_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_203_cast_fp16 = concat(axis = var_3691, interleave = input_203_interleave_0, values = (x_137_cast_fp16, var_3693_cast_fp16))[name = string("input_203_cast_fp16")];
	tensor<int32, [1]> normed_205_axes_0 = const()[name = string("normed_205_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3688_to_fp16 = const()[name = string("op_3688_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_205_cast_fp16 = layer_norm(axes = normed_205_axes_0, epsilon = var_3688_to_fp16, x = input_203_cast_fp16)[name = string("normed_205_cast_fp16")];
	tensor<int32, [2]> var_3698_split_sizes_0 = const()[name = string("op_3698_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3698_axis_0 = const()[name = string("op_3698_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3698_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3698_cast_fp16_1 = split(axis = var_3698_axis_0, split_sizes = var_3698_split_sizes_0, x = normed_205_cast_fp16)[name = string("op_3698_cast_fp16")];
	tensor<fp16, [1536]> layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321720128)))];
	tensor<fp16, [1, 1, 1536]> h_51_cast_fp16 = mul(x = var_3698_cast_fp16_0, y = layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_51_cast_fp16")];
	tensor<int32, [3]> var_3709 = const()[name = string("op_3709"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_205_axes_0 = const()[name = string("input_205_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_3710 = transpose(perm = var_3709, x = h_51_cast_fp16)[name = string("transpose_14")];
	tensor<fp16, [1, 1536, 1, 1]> input_205 = expand_dims(axes = input_205_axes_0, x = var_3710)[name = string("input_205")];
	string gate_33_pad_type_0 = const()[name = string("gate_33_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gate_33_strides_0 = const()[name = string("gate_33_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gate_33_pad_0 = const()[name = string("gate_33_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gate_33_dilations_0 = const()[name = string("gate_33_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gate_33_groups_0 = const()[name = string("gate_33_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> gate_33 = conv(dilations = gate_33_dilations_0, groups = gate_33_groups_0, pad = gate_33_pad_0, pad_type = gate_33_pad_type_0, strides = gate_33_strides_0, weight = layers_8_mlp_gate_proj_weight_palettized, x = input_205)[name = string("gate_33")];
	string up_17_pad_type_0 = const()[name = string("up_17_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> up_17_strides_0 = const()[name = string("up_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> up_17_pad_0 = const()[name = string("up_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> up_17_dilations_0 = const()[name = string("up_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 up_17_groups_0 = const()[name = string("up_17_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> up_17 = conv(dilations = up_17_dilations_0, groups = up_17_groups_0, pad = up_17_pad_0, pad_type = up_17_pad_type_0, strides = up_17_strides_0, weight = layers_8_mlp_up_proj_weight_palettized, x = input_205)[name = string("up_17")];
	string gate_35_mode_0 = const()[name = string("gate_35_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 1]> gate_35 = gelu(mode = gate_35_mode_0, x = gate_33)[name = string("gate_35")];
	tensor<fp16, [1, 12288, 1, 1]> input_207 = mul(x = gate_35, y = up_17)[name = string("input_207")];
	string mlp_out_17_pad_type_0 = const()[name = string("mlp_out_17_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_out_17_strides_0 = const()[name = string("mlp_out_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_out_17_pad_0 = const()[name = string("mlp_out_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_out_17_dilations_0 = const()[name = string("mlp_out_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_out_17_groups_0 = const()[name = string("mlp_out_17_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 1]> mlp_out_17 = conv(dilations = mlp_out_17_dilations_0, groups = mlp_out_17_groups_0, pad = mlp_out_17_pad_0, pad_type = mlp_out_17_pad_type_0, strides = mlp_out_17_strides_0, weight = layers_8_mlp_down_proj_weight_palettized, x = input_207)[name = string("mlp_out_17")];
	tensor<int32, [1]> var_3750_axes_0 = const()[name = string("op_3750_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_3750 = squeeze(axes = var_3750_axes_0, x = mlp_out_17)[name = string("op_3750")];
	tensor<int32, [3]> var_3754 = const()[name = string("op_3754"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_3760 = const()[name = string("op_3760"), val = int32(-1)];
	fp16 const_69_promoted = const()[name = string("const_69_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_139 = transpose(perm = var_3754, x = var_3750)[name = string("transpose_13")];
	tensor<fp16, [1, 1, 1536]> var_3762 = mul(x = x_139, y = const_69_promoted)[name = string("op_3762")];
	bool input_209_interleave_0 = const()[name = string("input_209_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_209 = concat(axis = var_3760, interleave = input_209_interleave_0, values = (x_139, var_3762))[name = string("input_209")];
	tensor<int32, [1]> normed_209_axes_0 = const()[name = string("normed_209_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3757_to_fp16 = const()[name = string("op_3757_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_209_cast_fp16 = layer_norm(axes = normed_209_axes_0, epsilon = var_3757_to_fp16, x = input_209)[name = string("normed_209_cast_fp16")];
	tensor<int32, [2]> var_3767_split_sizes_0 = const()[name = string("op_3767_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3767_axis_0 = const()[name = string("op_3767_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3767_0, tensor<fp16, [1, 1, 1536]> var_3767_1 = split(axis = var_3767_axis_0, split_sizes = var_3767_split_sizes_0, x = normed_209_cast_fp16)[name = string("op_3767")];
	tensor<fp16, [1, 1, 1536]> hidden_states_83 = mul(x = var_3767_0, y = layers_8_post_feedforward_layernorm_weight)[name = string("hidden_states_83")];
	tensor<fp16, [1, 1, 1536]> hidden_states_85_cast_fp16 = add(x = x_137_cast_fp16, y = hidden_states_83)[name = string("hidden_states_85_cast_fp16")];
	tensor<int32, [3]> per_layer_slice_17_begin_0 = const()[name = string("per_layer_slice_17_begin_0"), val = tensor<int32, [3]>([0, 0, 5888])];
	tensor<int32, [3]> per_layer_slice_17_end_0 = const()[name = string("per_layer_slice_17_end_0"), val = tensor<int32, [3]>([1, 1, 6144])];
	tensor<bool, [3]> per_layer_slice_17_end_mask_0 = const()[name = string("per_layer_slice_17_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 1, 256]> per_layer_slice_17_cast_fp16 = slice_by_index(begin = per_layer_slice_17_begin_0, end = per_layer_slice_17_end_0, end_mask = per_layer_slice_17_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_17_cast_fp16")];
	tensor<int32, [3]> var_3795 = const()[name = string("op_3795"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_211_axes_0 = const()[name = string("input_211_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_3796 = transpose(perm = var_3795, x = hidden_states_85_cast_fp16)[name = string("transpose_12")];
	tensor<fp16, [1, 1536, 1, 1]> input_211 = expand_dims(axes = input_211_axes_0, x = var_3796)[name = string("input_211")];
	string gated_49_pad_type_0 = const()[name = string("gated_49_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_49_strides_0 = const()[name = string("gated_49_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_49_pad_0 = const()[name = string("gated_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_49_dilations_0 = const()[name = string("gated_49_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_49_groups_0 = const()[name = string("gated_49_groups_0"), val = int32(1)];
	tensor<fp16, [1, 256, 1, 1]> gated_49 = conv(dilations = gated_49_dilations_0, groups = gated_49_groups_0, pad = gated_49_pad_0, pad_type = gated_49_pad_type_0, strides = gated_49_strides_0, weight = layers_8_per_layer_input_gate_weight_palettized, x = input_211)[name = string("gated_49")];
	string gated_51_mode_0 = const()[name = string("gated_51_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 256, 1, 1]> gated_51 = gelu(mode = gated_51_mode_0, x = gated_49)[name = string("gated_51")];
	tensor<int32, [3]> var_3815 = const()[name = string("op_3815"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> per_layer_slice_conv_17_axes_0 = const()[name = string("per_layer_slice_conv_17_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 256, 1]> var_3816_cast_fp16 = transpose(perm = var_3815, x = per_layer_slice_17_cast_fp16)[name = string("transpose_11")];
	tensor<fp16, [1, 256, 1, 1]> per_layer_slice_conv_17_cast_fp16 = expand_dims(axes = per_layer_slice_conv_17_axes_0, x = var_3816_cast_fp16)[name = string("per_layer_slice_conv_17_cast_fp16")];
	tensor<fp16, [1, 256, 1, 1]> input_213_cast_fp16 = mul(x = gated_51, y = per_layer_slice_conv_17_cast_fp16)[name = string("input_213_cast_fp16")];
	string gated_53_pad_type_0 = const()[name = string("gated_53_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_53_strides_0 = const()[name = string("gated_53_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_53_pad_0 = const()[name = string("gated_53_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_53_dilations_0 = const()[name = string("gated_53_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_53_groups_0 = const()[name = string("gated_53_groups_0"), val = int32(1)];
	tensor<fp16, [1536, 256, 1, 1]> layers_8_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321723264))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321919936))))[name = string("layers_8_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1536, 1, 1]> gated_53_cast_fp16 = conv(dilations = gated_53_dilations_0, groups = gated_53_groups_0, pad = gated_53_pad_0, pad_type = gated_53_pad_type_0, strides = gated_53_strides_0, weight = layers_8_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_213_cast_fp16)[name = string("gated_53_cast_fp16")];
	tensor<int32, [1]> var_3832_axes_0 = const()[name = string("op_3832_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_3832_cast_fp16 = squeeze(axes = var_3832_axes_0, x = gated_53_cast_fp16)[name = string("op_3832_cast_fp16")];
	tensor<int32, [3]> var_3836 = const()[name = string("op_3836"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_3842 = const()[name = string("op_3842"), val = int32(-1)];
	fp16 const_70_promoted_to_fp16 = const()[name = string("const_70_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_141_cast_fp16 = transpose(perm = var_3836, x = var_3832_cast_fp16)[name = string("transpose_10")];
	tensor<fp16, [1, 1, 1536]> var_3844_cast_fp16 = mul(x = x_141_cast_fp16, y = const_70_promoted_to_fp16)[name = string("op_3844_cast_fp16")];
	bool input_215_interleave_0 = const()[name = string("input_215_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_215_cast_fp16 = concat(axis = var_3842, interleave = input_215_interleave_0, values = (x_141_cast_fp16, var_3844_cast_fp16))[name = string("input_215_cast_fp16")];
	tensor<int32, [1]> normed_213_axes_0 = const()[name = string("normed_213_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3839_to_fp16 = const()[name = string("op_3839_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_213_cast_fp16 = layer_norm(axes = normed_213_axes_0, epsilon = var_3839_to_fp16, x = input_215_cast_fp16)[name = string("normed_213_cast_fp16")];
	tensor<int32, [2]> var_3849_split_sizes_0 = const()[name = string("op_3849_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3849_axis_0 = const()[name = string("op_3849_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3849_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3849_cast_fp16_1 = split(axis = var_3849_axis_0, split_sizes = var_3849_split_sizes_0, x = normed_213_cast_fp16)[name = string("op_3849_cast_fp16")];
	tensor<fp16, [1536]> layers_8_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_8_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321921536)))];
	tensor<fp16, [1, 1, 1536]> hidden_states_89_cast_fp16 = mul(x = var_3849_cast_fp16_0, y = layers_8_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_89_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_91_cast_fp16 = add(x = hidden_states_85_cast_fp16, y = hidden_states_89_cast_fp16)[name = string("hidden_states_91_cast_fp16")];
	tensor<fp16, [1]> const_71_promoted_to_fp16 = const()[name = string("const_71_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.bap-2])];
	tensor<fp16, [1, 1, 1536]> x_143_cast_fp16 = mul(x = hidden_states_91_cast_fp16, y = const_71_promoted_to_fp16)[name = string("x_143_cast_fp16")];
	int32 var_3864 = const()[name = string("op_3864"), val = int32(-1)];
	fp16 const_72_promoted_to_fp16 = const()[name = string("const_72_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_3866_cast_fp16 = mul(x = x_143_cast_fp16, y = const_72_promoted_to_fp16)[name = string("op_3866_cast_fp16")];
	bool input_217_interleave_0 = const()[name = string("input_217_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_217_cast_fp16 = concat(axis = var_3864, interleave = input_217_interleave_0, values = (x_143_cast_fp16, var_3866_cast_fp16))[name = string("input_217_cast_fp16")];
	tensor<int32, [1]> normed_217_axes_0 = const()[name = string("normed_217_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3861_to_fp16 = const()[name = string("op_3861_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_217_cast_fp16 = layer_norm(axes = normed_217_axes_0, epsilon = var_3861_to_fp16, x = input_217_cast_fp16)[name = string("normed_217_cast_fp16")];
	tensor<int32, [2]> var_3871_split_sizes_0 = const()[name = string("op_3871_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3871_axis_0 = const()[name = string("op_3871_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3871_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3871_cast_fp16_1 = split(axis = var_3871_axis_0, split_sizes = var_3871_split_sizes_0, x = normed_217_cast_fp16)[name = string("op_3871_cast_fp16")];
	tensor<fp16, [1536]> layers_9_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_9_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321924672)))];
	tensor<fp16, [1, 1, 1536]> h_55_cast_fp16 = mul(x = var_3871_cast_fp16_0, y = layers_9_input_layernorm_weight_promoted_to_fp16)[name = string("h_55_cast_fp16")];
	tensor<int32, [3]> var_3877 = const()[name = string("op_3877"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_3880_axes_0 = const()[name = string("op_3880_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_3878_cast_fp16 = transpose(perm = var_3877, x = h_55_cast_fp16)[name = string("transpose_9")];
	tensor<fp16, [1, 1536, 1, 1]> var_3880_cast_fp16 = expand_dims(axes = var_3880_axes_0, x = var_3878_cast_fp16)[name = string("op_3880_cast_fp16")];
	string var_3896_pad_type_0 = const()[name = string("op_3896_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_3896_strides_0 = const()[name = string("op_3896_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_3896_pad_0 = const()[name = string("op_3896_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_3896_dilations_0 = const()[name = string("op_3896_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_3896_groups_0 = const()[name = string("op_3896_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4096, 1, 1]> var_3896 = conv(dilations = var_3896_dilations_0, groups = var_3896_groups_0, pad = var_3896_pad_0, pad_type = var_3896_pad_type_0, strides = var_3896_strides_0, weight = layers_9_self_attn_q_proj_weight_palettized, x = var_3880_cast_fp16)[name = string("op_3896")];
	tensor<int32, [4]> var_3901 = const()[name = string("op_3901"), val = tensor<int32, [4]>([1, 8, 512, 1])];
	tensor<fp16, [1, 8, 512, 1]> var_3902 = reshape(shape = var_3901, x = var_3896)[name = string("op_3902")];
	tensor<int32, [4]> var_3907 = const()[name = string("op_3907"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	tensor<int32, [3]> var_3917 = const()[name = string("op_3917"), val = tensor<int32, [3]>([1, 8, 512])];
	tensor<fp16, [1, 8, 1, 512]> var_3908 = transpose(perm = var_3907, x = var_3902)[name = string("transpose_8")];
	tensor<fp16, [1, 8, 512]> x_145 = reshape(shape = var_3917, x = var_3908)[name = string("x_145")];
	int32 var_3923 = const()[name = string("op_3923"), val = int32(-1)];
	fp16 const_73_promoted = const()[name = string("const_73_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 512]> var_3925 = mul(x = x_145, y = const_73_promoted)[name = string("op_3925")];
	bool input_221_interleave_0 = const()[name = string("input_221_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1024]> input_221 = concat(axis = var_3923, interleave = input_221_interleave_0, values = (x_145, var_3925))[name = string("input_221")];
	tensor<int32, [1]> normed_221_axes_0 = const()[name = string("normed_221_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3920_to_fp16 = const()[name = string("op_3920_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 8, 1024]> normed_221_cast_fp16 = layer_norm(axes = normed_221_axes_0, epsilon = var_3920_to_fp16, x = input_221)[name = string("normed_221_cast_fp16")];
	tensor<int32, [2]> var_3930_split_sizes_0 = const()[name = string("op_3930_split_sizes_0"), val = tensor<int32, [2]>([512, 512])];
	int32 var_3930_axis_0 = const()[name = string("op_3930_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 512]> var_3930_0, tensor<fp16, [1, 8, 512]> var_3930_1 = split(axis = var_3930_axis_0, split_sizes = var_3930_split_sizes_0, x = normed_221_cast_fp16)[name = string("op_3930")];
	tensor<fp16, [1, 8, 512]> var_3932 = mul(x = var_3930_0, y = layers_4_self_attn_q_norm_weight)[name = string("op_3932")];
	tensor<int32, [4]> var_3937 = const()[name = string("op_3937"), val = tensor<int32, [4]>([1, 8, 1, 512])];
	tensor<fp16, [1, 8, 1, 512]> q_57 = reshape(shape = var_3937, x = var_3932)[name = string("q_57")];
	tensor<fp16, [1, 8, 1, 512]> var_3939_cast_fp16 = mul(x = q_57, y = cos_f)[name = string("op_3939_cast_fp16")];
	tensor<int32, [2]> var_3940_split_sizes_0 = const()[name = string("op_3940_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_3940_axis_0 = const()[name = string("op_3940_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 256]> var_3940_0, tensor<fp16, [1, 8, 1, 256]> var_3940_1 = split(axis = var_3940_axis_0, split_sizes = var_3940_split_sizes_0, x = q_57)[name = string("op_3940")];
	fp16 const_74_promoted = const()[name = string("const_74_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 256]> var_3942 = mul(x = var_3940_1, y = const_74_promoted)[name = string("op_3942")];
	int32 var_3944 = const()[name = string("op_3944"), val = int32(-1)];
	bool var_3945_interleave_0 = const()[name = string("op_3945_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 512]> var_3945 = concat(axis = var_3944, interleave = var_3945_interleave_0, values = (var_3942, var_3940_0))[name = string("op_3945")];
	tensor<fp16, [1, 8, 1, 512]> var_3946_cast_fp16 = mul(x = var_3945, y = sin_f)[name = string("op_3946_cast_fp16")];
	tensor<fp16, [1, 8, 1, 512]> q_cast_fp16 = add(x = var_3939_cast_fp16, y = var_3946_cast_fp16)[name = string("q_cast_fp16")];
	bool attn_weights_37_transpose_x_0 = const()[name = string("attn_weights_37_transpose_x_0"), val = bool(false)];
	bool attn_weights_37_transpose_y_0 = const()[name = string("attn_weights_37_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 2048]> attn_weights_37_cast_fp16 = matmul(transpose_x = attn_weights_37_transpose_x_0, transpose_y = attn_weights_37_transpose_y_0, x = q_cast_fp16, y = transpose_44_cast_fp16)[name = string("attn_weights_37_cast_fp16")];
	tensor<fp16, [1, 8, 1, 2048]> x_147_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = causal_mask_full)[name = string("x_147_cast_fp16")];
	tensor<int32, [1]> reduce_max_9_axes_0 = const()[name = string("reduce_max_9_axes_0"), val = tensor<int32, [1]>([-1])];
	bool reduce_max_9_keep_dims_0 = const()[name = string("reduce_max_9_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 1, 1]> reduce_max_9 = reduce_max(axes = reduce_max_9_axes_0, keep_dims = reduce_max_9_keep_dims_0, x = x_147_cast_fp16)[name = string("reduce_max_9")];
	tensor<fp16, [1, 8, 1, 2048]> var_3978 = sub(x = x_147_cast_fp16, y = reduce_max_9)[name = string("op_3978")];
	tensor<fp16, [1, 8, 1, 2048]> var_3984 = exp(x = var_3978)[name = string("op_3984")];
	tensor<int32, [1]> var_3994_axes_0 = const()[name = string("op_3994_axes_0"), val = tensor<int32, [1]>([-1])];
	bool var_3994_keep_dims_0 = const()[name = string("op_3994_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 1, 1]> var_3994 = reduce_sum(axes = var_3994_axes_0, keep_dims = var_3994_keep_dims_0, x = var_3984)[name = string("op_3994")];
	tensor<fp16, [1, 8, 1, 2048]> var_4000_cast_fp16 = real_div(x = var_3984, y = var_3994)[name = string("op_4000_cast_fp16")];
	bool attn_output_55_transpose_x_0 = const()[name = string("attn_output_55_transpose_x_0"), val = bool(false)];
	bool attn_output_55_transpose_y_0 = const()[name = string("attn_output_55_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 512]> attn_output_55_cast_fp16 = matmul(transpose_x = attn_output_55_transpose_x_0, transpose_y = attn_output_55_transpose_y_0, x = var_4000_cast_fp16, y = V_expanded_9_cast_fp16)[name = string("attn_output_55_cast_fp16")];
	tensor<int32, [4]> var_4011 = const()[name = string("op_4011"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_4018 = const()[name = string("op_4018"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 8, 512]> var_4012_cast_fp16 = transpose(perm = var_4011, x = attn_output_55_cast_fp16)[name = string("transpose_7")];
	tensor<fp16, [1, 1, 4096]> attn_output_57_cast_fp16 = reshape(shape = var_4018, x = var_4012_cast_fp16)[name = string("attn_output_57_cast_fp16")];
	tensor<int32, [3]> var_4023 = const()[name = string("op_4023"), val = tensor<int32, [3]>([0, 2, 1])];
	string var_4039_pad_type_0 = const()[name = string("op_4039_pad_type_0"), val = string("valid")];
	int32 var_4039_groups_0 = const()[name = string("op_4039_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_4039_strides_0 = const()[name = string("op_4039_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_4039_pad_0 = const()[name = string("op_4039_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_4039_dilations_0 = const()[name = string("op_4039_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1536, 4096, 1]> squeeze_9_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 4096, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321927808))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325073600))))[name = string("squeeze_9_cast_fp16_to_fp32_to_fp16_palettized")];
	tensor<fp16, [1, 4096, 1]> var_4024_cast_fp16 = transpose(perm = var_4023, x = attn_output_57_cast_fp16)[name = string("transpose_6")];
	tensor<fp16, [1, 1536, 1]> var_4039_cast_fp16 = conv(dilations = var_4039_dilations_0, groups = var_4039_groups_0, pad = var_4039_pad_0, pad_type = var_4039_pad_type_0, strides = var_4039_strides_0, weight = squeeze_9_cast_fp16_to_fp32_to_fp16_palettized, x = var_4024_cast_fp16)[name = string("op_4039_cast_fp16")];
	tensor<int32, [3]> var_4043 = const()[name = string("op_4043"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_4049 = const()[name = string("op_4049"), val = int32(-1)];
	fp16 const_75_promoted_to_fp16 = const()[name = string("const_75_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_151_cast_fp16 = transpose(perm = var_4043, x = var_4039_cast_fp16)[name = string("transpose_5")];
	tensor<fp16, [1, 1, 1536]> var_4051_cast_fp16 = mul(x = x_151_cast_fp16, y = const_75_promoted_to_fp16)[name = string("op_4051_cast_fp16")];
	bool input_225_interleave_0 = const()[name = string("input_225_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_225_cast_fp16 = concat(axis = var_4049, interleave = input_225_interleave_0, values = (x_151_cast_fp16, var_4051_cast_fp16))[name = string("input_225_cast_fp16")];
	tensor<int32, [1]> normed_225_axes_0 = const()[name = string("normed_225_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4046_to_fp16 = const()[name = string("op_4046_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_225_cast_fp16 = layer_norm(axes = normed_225_axes_0, epsilon = var_4046_to_fp16, x = input_225_cast_fp16)[name = string("normed_225_cast_fp16")];
	tensor<int32, [2]> var_4056_split_sizes_0 = const()[name = string("op_4056_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_4056_axis_0 = const()[name = string("op_4056_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_4056_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_4056_cast_fp16_1 = split(axis = var_4056_axis_0, split_sizes = var_4056_split_sizes_0, x = normed_225_cast_fp16)[name = string("op_4056_cast_fp16")];
	tensor<fp16, [1536]> layers_9_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_9_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325075200)))];
	tensor<fp16, [1, 1, 1536]> attn_output_cast_fp16 = mul(x = var_4056_cast_fp16_0, y = layers_9_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> x_153_cast_fp16 = add(x = x_143_cast_fp16, y = attn_output_cast_fp16)[name = string("x_153_cast_fp16")];
	int32 var_4065 = const()[name = string("op_4065"), val = int32(-1)];
	fp16 const_76_promoted_to_fp16 = const()[name = string("const_76_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_4067_cast_fp16 = mul(x = x_153_cast_fp16, y = const_76_promoted_to_fp16)[name = string("op_4067_cast_fp16")];
	bool input_227_interleave_0 = const()[name = string("input_227_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_227_cast_fp16 = concat(axis = var_4065, interleave = input_227_interleave_0, values = (x_153_cast_fp16, var_4067_cast_fp16))[name = string("input_227_cast_fp16")];
	tensor<int32, [1]> normed_229_axes_0 = const()[name = string("normed_229_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4062_to_fp16 = const()[name = string("op_4062_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_229_cast_fp16 = layer_norm(axes = normed_229_axes_0, epsilon = var_4062_to_fp16, x = input_227_cast_fp16)[name = string("normed_229_cast_fp16")];
	tensor<int32, [2]> var_4072_split_sizes_0 = const()[name = string("op_4072_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_4072_axis_0 = const()[name = string("op_4072_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_4072_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_4072_cast_fp16_1 = split(axis = var_4072_axis_0, split_sizes = var_4072_split_sizes_0, x = normed_229_cast_fp16)[name = string("op_4072_cast_fp16")];
	tensor<fp16, [1536]> layers_9_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_9_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325078336)))];
	tensor<fp16, [1, 1, 1536]> h_57_cast_fp16 = mul(x = var_4072_cast_fp16_0, y = layers_9_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_57_cast_fp16")];
	tensor<int32, [3]> var_4083 = const()[name = string("op_4083"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_229_axes_0 = const()[name = string("input_229_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_4084 = transpose(perm = var_4083, x = h_57_cast_fp16)[name = string("transpose_4")];
	tensor<fp16, [1, 1536, 1, 1]> input_229 = expand_dims(axes = input_229_axes_0, x = var_4084)[name = string("input_229")];
	string gate_37_pad_type_0 = const()[name = string("gate_37_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gate_37_strides_0 = const()[name = string("gate_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gate_37_pad_0 = const()[name = string("gate_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gate_37_dilations_0 = const()[name = string("gate_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gate_37_groups_0 = const()[name = string("gate_37_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> gate_37 = conv(dilations = gate_37_dilations_0, groups = gate_37_groups_0, pad = gate_37_pad_0, pad_type = gate_37_pad_type_0, strides = gate_37_strides_0, weight = layers_9_mlp_gate_proj_weight_palettized, x = input_229)[name = string("gate_37")];
	string up_pad_type_0 = const()[name = string("up_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> up_strides_0 = const()[name = string("up_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> up_pad_0 = const()[name = string("up_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> up_dilations_0 = const()[name = string("up_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 up_groups_0 = const()[name = string("up_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> up = conv(dilations = up_dilations_0, groups = up_groups_0, pad = up_pad_0, pad_type = up_pad_type_0, strides = up_strides_0, weight = layers_9_mlp_up_proj_weight_palettized, x = input_229)[name = string("up")];
	string gate_mode_0 = const()[name = string("gate_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 1]> gate = gelu(mode = gate_mode_0, x = gate_37)[name = string("gate")];
	tensor<fp16, [1, 12288, 1, 1]> input_231 = mul(x = gate, y = up)[name = string("input_231")];
	string mlp_out_pad_type_0 = const()[name = string("mlp_out_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_out_strides_0 = const()[name = string("mlp_out_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_out_pad_0 = const()[name = string("mlp_out_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_out_dilations_0 = const()[name = string("mlp_out_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_out_groups_0 = const()[name = string("mlp_out_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 1]> mlp_out = conv(dilations = mlp_out_dilations_0, groups = mlp_out_groups_0, pad = mlp_out_pad_0, pad_type = mlp_out_pad_type_0, strides = mlp_out_strides_0, weight = layers_9_mlp_down_proj_weight_palettized, x = input_231)[name = string("mlp_out")];
	tensor<int32, [1]> var_4124_axes_0 = const()[name = string("op_4124_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_4124 = squeeze(axes = var_4124_axes_0, x = mlp_out)[name = string("op_4124")];
	tensor<int32, [3]> var_4128 = const()[name = string("op_4128"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_4134 = const()[name = string("op_4134"), val = int32(-1)];
	fp16 const_77_promoted = const()[name = string("const_77_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_155 = transpose(perm = var_4128, x = var_4124)[name = string("transpose_3")];
	tensor<fp16, [1, 1, 1536]> var_4136 = mul(x = x_155, y = const_77_promoted)[name = string("op_4136")];
	bool input_233_interleave_0 = const()[name = string("input_233_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_233 = concat(axis = var_4134, interleave = input_233_interleave_0, values = (x_155, var_4136))[name = string("input_233")];
	tensor<int32, [1]> normed_233_axes_0 = const()[name = string("normed_233_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4131_to_fp16 = const()[name = string("op_4131_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_233_cast_fp16 = layer_norm(axes = normed_233_axes_0, epsilon = var_4131_to_fp16, x = input_233)[name = string("normed_233_cast_fp16")];
	tensor<int32, [2]> var_4141_split_sizes_0 = const()[name = string("op_4141_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_4141_axis_0 = const()[name = string("op_4141_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_4141_0, tensor<fp16, [1, 1, 1536]> var_4141_1 = split(axis = var_4141_axis_0, split_sizes = var_4141_split_sizes_0, x = normed_233_cast_fp16)[name = string("op_4141")];
	tensor<fp16, [1, 1, 1536]> hidden_states_93 = mul(x = var_4141_0, y = layers_9_post_feedforward_layernorm_weight)[name = string("hidden_states_93")];
	tensor<fp16, [1, 1, 1536]> hidden_states_95_cast_fp16 = add(x = x_153_cast_fp16, y = hidden_states_93)[name = string("hidden_states_95_cast_fp16")];
	tensor<int32, [3]> per_layer_slice_begin_0 = const()[name = string("per_layer_slice_begin_0"), val = tensor<int32, [3]>([0, 0, 6144])];
	tensor<int32, [3]> per_layer_slice_end_0 = const()[name = string("per_layer_slice_end_0"), val = tensor<int32, [3]>([1, 1, 6400])];
	tensor<bool, [3]> per_layer_slice_end_mask_0 = const()[name = string("per_layer_slice_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 1, 256]> per_layer_slice_cast_fp16 = slice_by_index(begin = per_layer_slice_begin_0, end = per_layer_slice_end_0, end_mask = per_layer_slice_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_cast_fp16")];
	tensor<int32, [3]> var_4169 = const()[name = string("op_4169"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_235_axes_0 = const()[name = string("input_235_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_4170 = transpose(perm = var_4169, x = hidden_states_95_cast_fp16)[name = string("transpose_2")];
	tensor<fp16, [1, 1536, 1, 1]> input_235 = expand_dims(axes = input_235_axes_0, x = var_4170)[name = string("input_235")];
	string gated_55_pad_type_0 = const()[name = string("gated_55_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_55_strides_0 = const()[name = string("gated_55_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_55_pad_0 = const()[name = string("gated_55_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_55_dilations_0 = const()[name = string("gated_55_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_55_groups_0 = const()[name = string("gated_55_groups_0"), val = int32(1)];
	tensor<fp16, [1, 256, 1, 1]> gated_55 = conv(dilations = gated_55_dilations_0, groups = gated_55_groups_0, pad = gated_55_pad_0, pad_type = gated_55_pad_type_0, strides = gated_55_strides_0, weight = layers_9_per_layer_input_gate_weight_palettized, x = input_235)[name = string("gated_55")];
	string gated_57_mode_0 = const()[name = string("gated_57_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 256, 1, 1]> gated_57 = gelu(mode = gated_57_mode_0, x = gated_55)[name = string("gated_57")];
	tensor<int32, [3]> var_4189 = const()[name = string("op_4189"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> per_layer_slice_conv_axes_0 = const()[name = string("per_layer_slice_conv_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 256, 1]> var_4190_cast_fp16 = transpose(perm = var_4189, x = per_layer_slice_cast_fp16)[name = string("transpose_1")];
	tensor<fp16, [1, 256, 1, 1]> per_layer_slice_conv_cast_fp16 = expand_dims(axes = per_layer_slice_conv_axes_0, x = var_4190_cast_fp16)[name = string("per_layer_slice_conv_cast_fp16")];
	tensor<fp16, [1, 256, 1, 1]> input_237_cast_fp16 = mul(x = gated_57, y = per_layer_slice_conv_cast_fp16)[name = string("input_237_cast_fp16")];
	string gated_pad_type_0 = const()[name = string("gated_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_strides_0 = const()[name = string("gated_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_pad_0 = const()[name = string("gated_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_dilations_0 = const()[name = string("gated_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_groups_0 = const()[name = string("gated_groups_0"), val = int32(1)];
	tensor<fp16, [1536, 256, 1, 1]> layers_9_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325081472))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325278144))))[name = string("layers_9_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1536, 1, 1]> gated_cast_fp16 = conv(dilations = gated_dilations_0, groups = gated_groups_0, pad = gated_pad_0, pad_type = gated_pad_type_0, strides = gated_strides_0, weight = layers_9_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_237_cast_fp16)[name = string("gated_cast_fp16")];
	tensor<int32, [1]> var_4206_axes_0 = const()[name = string("op_4206_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_4206_cast_fp16 = squeeze(axes = var_4206_axes_0, x = gated_cast_fp16)[name = string("op_4206_cast_fp16")];
	tensor<int32, [3]> var_4210 = const()[name = string("op_4210"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_4216 = const()[name = string("op_4216"), val = int32(-1)];
	fp16 const_78_promoted_to_fp16 = const()[name = string("const_78_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_cast_fp16 = transpose(perm = var_4210, x = var_4206_cast_fp16)[name = string("transpose_0")];
	tensor<fp16, [1, 1, 1536]> var_4218_cast_fp16 = mul(x = x_cast_fp16, y = const_78_promoted_to_fp16)[name = string("op_4218_cast_fp16")];
	bool input_interleave_0 = const()[name = string("input_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_cast_fp16 = concat(axis = var_4216, interleave = input_interleave_0, values = (x_cast_fp16, var_4218_cast_fp16))[name = string("input_cast_fp16")];
	tensor<int32, [1]> normed_237_axes_0 = const()[name = string("normed_237_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4213_to_fp16 = const()[name = string("op_4213_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_237_cast_fp16 = layer_norm(axes = normed_237_axes_0, epsilon = var_4213_to_fp16, x = input_cast_fp16)[name = string("normed_237_cast_fp16")];
	tensor<int32, [2]> var_4223_split_sizes_0 = const()[name = string("op_4223_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_4223_axis_0 = const()[name = string("op_4223_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_4223_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_4223_cast_fp16_1 = split(axis = var_4223_axis_0, split_sizes = var_4223_split_sizes_0, x = normed_237_cast_fp16)[name = string("op_4223_cast_fp16")];
	tensor<fp16, [1536]> layers_9_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_9_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325279744)))];
	tensor<fp16, [1, 1, 1536]> hidden_states_99_cast_fp16 = mul(x = var_4223_cast_fp16_0, y = layers_9_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_99_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_cast_fp16 = add(x = hidden_states_95_cast_fp16, y = hidden_states_99_cast_fp16)[name = string("hidden_states_cast_fp16")];
	tensor<fp16, [1]> const_79_promoted_to_fp16 = const()[name = string("const_79_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.cp-2])];
	tensor<fp16, [1, 1, 1536]> hidden_states_out = mul(x = hidden_states_cast_fp16, y = const_79_promoted_to_fp16)[name = string("op_4233_cast_fp16")];
	tensor<fp16, [1, 1, 2048, 1]> update_mask_tmp = identity(x = update_mask)[name = string("update_mask_tmp")];
	} -> (hidden_states_out);
	}