prefill: bump prefillN from 64 to 512 (~8x faster TTFT on 50-500 token prompts)

dfaa01b verified 8 days ago

341 kB

	program(1.3)
	[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}})]
	{
	func main<ios18>(tensor<fp16, [1, 1, 512, 512]> causal_mask, tensor<fp16, [1, 1, 512, 512]> cos_f, tensor<fp16, [1, 1, 512, 256]> cos_s, tensor<fp16, [1, 512, 1536]> hidden_states, tensor<fp16, [1, 1, 512, 256]> kv13_k, tensor<fp16, [1, 1, 512, 256]> kv13_v, tensor<fp16, [1, 1, 512, 512]> kv14_k, tensor<fp16, [1, 1, 512, 512]> kv14_v, tensor<fp16, [1, 512, 1]> last_position_mask, tensor<fp16, [1, 512, 8960]> per_layer_combined, tensor<fp16, [1, 1, 512, 512]> sin_f, tensor<fp16, [1, 1, 512, 256]> sin_s) {
	tensor<fp16, [2048, 1536, 1, 1]> layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1572992))))[name = string("layers_0_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [256]> layers_0_self_attn_q_norm_weight = const()[name = string("layers_0_self_attn_q_norm_weight"), val = tensor<fp16, [256]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1575104)))];
	tensor<fp16, [12288, 1536, 1, 1]> layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1575680))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11012928))))[name = string("layers_0_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11025280))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20462528))))[name = string("layers_0_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20474880))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29912128))))[name = string("layers_0_mlp_down_proj_weight_palettized")];
	tensor<fp16, [1536]> layers_0_post_feedforward_layernorm_weight = const()[name = string("layers_0_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29913728)))];
	tensor<fp16, [256, 1536, 1, 1]> layers_0_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29916864))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30113536))))[name = string("layers_0_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [2048, 1536, 1, 1]> layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30113856))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31686784))))[name = string("layers_1_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31688896))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41126144))))[name = string("layers_1_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41138496))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50575744))))[name = string("layers_1_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50588096))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60025344))))[name = string("layers_1_mlp_down_proj_weight_palettized")];
	tensor<fp16, [1536]> layers_1_post_feedforward_layernorm_weight = const()[name = string("layers_1_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60026944)))];
	tensor<fp16, [256, 1536, 1, 1]> layers_1_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60030080))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60226752))))[name = string("layers_1_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [2048, 1536, 1, 1]> layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60227072))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61800000))))[name = string("layers_2_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61802112))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71239360))))[name = string("layers_2_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71251712))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80688960))))[name = string("layers_2_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80701312))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90138560))))[name = string("layers_2_mlp_down_proj_weight_palettized")];
	tensor<fp16, [1536]> layers_2_post_feedforward_layernorm_weight = const()[name = string("layers_2_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90140160)))];
	tensor<fp16, [256, 1536, 1, 1]> layers_2_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90143296))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90339968))))[name = string("layers_2_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [2048, 1536, 1, 1]> layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90340288))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91913216))))[name = string("layers_3_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91915328))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101352576))))[name = string("layers_3_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101364928))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110802176))))[name = string("layers_3_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110814528))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120251776))))[name = string("layers_3_mlp_down_proj_weight_palettized")];
	tensor<fp16, [1536]> layers_3_post_feedforward_layernorm_weight = const()[name = string("layers_3_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120253376)))];
	tensor<fp16, [256, 1536, 1, 1]> layers_3_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120256512))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120453184))))[name = string("layers_3_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [4096, 1536, 1, 1]> layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [4096, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120453504))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123599296))))[name = string("layers_4_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [512]> layers_4_self_attn_q_norm_weight = const()[name = string("layers_4_self_attn_q_norm_weight"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123603456)))];
	tensor<fp16, [12288, 1536, 1, 1]> layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123604544))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133041792))))[name = string("layers_4_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133054144))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142491392))))[name = string("layers_4_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142503744))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151940992))))[name = string("layers_4_mlp_down_proj_weight_palettized")];
	tensor<fp16, [1536]> layers_4_post_feedforward_layernorm_weight = const()[name = string("layers_4_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151942592)))];
	tensor<fp16, [256, 1536, 1, 1]> layers_4_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151945728))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152142400))))[name = string("layers_4_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [2048, 1536, 1, 1]> layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152142720))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153715648))))[name = string("layers_5_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153717760))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163155008))))[name = string("layers_5_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163167360))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172604608))))[name = string("layers_5_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172616960))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182054208))))[name = string("layers_5_mlp_down_proj_weight_palettized")];
	tensor<fp16, [1536]> layers_5_post_feedforward_layernorm_weight = const()[name = string("layers_5_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182055808)))];
	tensor<fp16, [256, 1536, 1, 1]> layers_5_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182058944))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182255616))))[name = string("layers_5_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [2048, 1536, 1, 1]> layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182255936))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183828864))))[name = string("layers_6_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183830976))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193268224))))[name = string("layers_6_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193280576))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202717824))))[name = string("layers_6_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202730176))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212167424))))[name = string("layers_6_mlp_down_proj_weight_palettized")];
	tensor<fp16, [1536]> layers_6_post_feedforward_layernorm_weight = const()[name = string("layers_6_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212169024)))];
	tensor<fp16, [256, 1536, 1, 1]> layers_6_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212172160))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212368832))))[name = string("layers_6_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [2048, 1536, 1, 1]> layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212369152))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213942080))))[name = string("layers_7_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213944192))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223381440))))[name = string("layers_7_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223393792))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232831040))))[name = string("layers_7_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232843392))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242280640))))[name = string("layers_7_mlp_down_proj_weight_palettized")];
	tensor<fp16, [1536]> layers_7_post_feedforward_layernorm_weight = const()[name = string("layers_7_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242282240)))];
	tensor<fp16, [256, 1536, 1, 1]> layers_7_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242285376))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242482048))))[name = string("layers_7_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [2048, 1536, 1, 1]> layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242482368))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244055296))))[name = string("layers_8_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244057408))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253494656))))[name = string("layers_8_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253507008))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262944256))))[name = string("layers_8_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262956608))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272393856))))[name = string("layers_8_mlp_down_proj_weight_palettized")];
	tensor<fp16, [1536]> layers_8_post_feedforward_layernorm_weight = const()[name = string("layers_8_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272395456)))];
	tensor<fp16, [256, 1536, 1, 1]> layers_8_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272398592))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272595264))))[name = string("layers_8_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [4096, 1536, 1, 1]> layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [4096, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272595584))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275741376))))[name = string("layers_9_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_9_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275745536))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285182784))))[name = string("layers_9_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_9_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285195136))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294632384))))[name = string("layers_9_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_9_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294644736))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304081984))))[name = string("layers_9_mlp_down_proj_weight_palettized")];
	tensor<fp16, [1536]> layers_9_post_feedforward_layernorm_weight = const()[name = string("layers_9_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304083584)))];
	tensor<fp16, [256, 1536, 1, 1]> layers_9_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304086720))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304283392))))[name = string("layers_9_per_layer_input_gate_weight_palettized")];
	int32 var_499 = const()[name = string("op_499"), val = int32(-1)];
	fp16 const_0_promoted_to_fp16 = const()[name = string("const_0_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> var_501_cast_fp16 = mul(x = hidden_states, y = const_0_promoted_to_fp16)[name = string("op_501_cast_fp16")];
	bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_1_cast_fp16 = concat(axis = var_499, interleave = input_1_interleave_0, values = (hidden_states, var_501_cast_fp16))[name = string("input_1_cast_fp16")];
	tensor<int32, [1]> normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_496_to_fp16 = const()[name = string("op_496_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_496_to_fp16, x = input_1_cast_fp16)[name = string("normed_1_cast_fp16")];
	tensor<int32, [2]> var_506_split_sizes_0 = const()[name = string("op_506_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_506_axis_0 = const()[name = string("op_506_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_506_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_506_cast_fp16_1 = split(axis = var_506_axis_0, split_sizes = var_506_split_sizes_0, x = normed_1_cast_fp16)[name = string("op_506_cast_fp16")];
	tensor<fp16, [1536]> layers_0_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304283712)))];
	tensor<fp16, [1, 512, 1536]> h_1_cast_fp16 = mul(x = var_506_cast_fp16_0, y = layers_0_input_layernorm_weight_promoted_to_fp16)[name = string("h_1_cast_fp16")];
	tensor<int32, [3]> var_512 = const()[name = string("op_512"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_515_axes_0 = const()[name = string("op_515_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_513_cast_fp16 = transpose(perm = var_512, x = h_1_cast_fp16)[name = string("transpose_123")];
	tensor<fp16, [1, 1536, 1, 512]> var_515_cast_fp16 = expand_dims(axes = var_515_axes_0, x = var_513_cast_fp16)[name = string("op_515_cast_fp16")];
	string q_raw_1_pad_type_0 = const()[name = string("q_raw_1_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> q_raw_1_strides_0 = const()[name = string("q_raw_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> q_raw_1_pad_0 = const()[name = string("q_raw_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> q_raw_1_dilations_0 = const()[name = string("q_raw_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 q_raw_1_groups_0 = const()[name = string("q_raw_1_groups_0"), val = int32(1)];
	tensor<fp16, [1, 2048, 1, 512]> q_raw_1 = conv(dilations = q_raw_1_dilations_0, groups = q_raw_1_groups_0, pad = q_raw_1_pad_0, pad_type = q_raw_1_pad_type_0, strides = q_raw_1_strides_0, weight = layers_0_self_attn_q_proj_weight_palettized, x = var_515_cast_fp16)[name = string("q_raw_1")];
	tensor<int32, [4]> var_536 = const()[name = string("op_536"), val = tensor<int32, [4]>([1, 8, 256, 512])];
	tensor<fp16, [1, 8, 256, 512]> var_537 = reshape(shape = var_536, x = q_raw_1)[name = string("op_537")];
	tensor<int32, [4]> transpose_40_perm_0 = const()[name = string("transpose_40_perm_0"), val = tensor<int32, [4]>([0, 3, 1, 2])];
	tensor<int32, [3]> var_560 = const()[name = string("op_560"), val = tensor<int32, [3]>([512, 8, 256])];
	tensor<fp16, [1, 512, 8, 256]> transpose_40 = transpose(perm = transpose_40_perm_0, x = var_537)[name = string("transpose_122")];
	tensor<fp16, [512, 8, 256]> x_1 = reshape(shape = var_560, x = transpose_40)[name = string("x_1")];
	int32 var_566 = const()[name = string("op_566"), val = int32(-1)];
	fp16 const_1_promoted = const()[name = string("const_1_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [512, 8, 256]> var_568 = mul(x = x_1, y = const_1_promoted)[name = string("op_568")];
	bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)];
	tensor<fp16, [512, 8, 512]> input_5 = concat(axis = var_566, interleave = input_5_interleave_0, values = (x_1, var_568))[name = string("input_5")];
	tensor<int32, [1]> normed_5_axes_0 = const()[name = string("normed_5_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_563_to_fp16 = const()[name = string("op_563_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [512, 8, 512]> normed_5_cast_fp16 = layer_norm(axes = normed_5_axes_0, epsilon = var_563_to_fp16, x = input_5)[name = string("normed_5_cast_fp16")];
	tensor<int32, [2]> var_573_split_sizes_0 = const()[name = string("op_573_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_573_axis_0 = const()[name = string("op_573_axis_0"), val = int32(-1)];
	tensor<fp16, [512, 8, 256]> var_573_0, tensor<fp16, [512, 8, 256]> var_573_1 = split(axis = var_573_axis_0, split_sizes = var_573_split_sizes_0, x = normed_5_cast_fp16)[name = string("op_573")];
	tensor<fp16, [512, 8, 256]> q_3 = mul(x = var_573_0, y = layers_0_self_attn_q_norm_weight)[name = string("q_3")];
	tensor<int32, [4]> var_580 = const()[name = string("op_580"), val = tensor<int32, [4]>([1, 512, 8, 256])];
	tensor<fp16, [1, 512, 8, 256]> var_581 = reshape(shape = var_580, x = q_3)[name = string("op_581")];
	tensor<int32, [4]> var_586 = const()[name = string("op_586"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<fp16, [1, 8, 512, 256]> q_5 = transpose(perm = var_586, x = var_581)[name = string("transpose_121")];
	tensor<fp16, [1, 8, 512, 256]> var_588_cast_fp16 = mul(x = q_5, y = cos_s)[name = string("op_588_cast_fp16")];
	tensor<int32, [2]> var_589_split_sizes_0 = const()[name = string("op_589_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_589_axis_0 = const()[name = string("op_589_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 512, 128]> var_589_0, tensor<fp16, [1, 8, 512, 128]> var_589_1 = split(axis = var_589_axis_0, split_sizes = var_589_split_sizes_0, x = q_5)[name = string("op_589")];
	fp16 const_2_promoted = const()[name = string("const_2_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 512, 128]> var_591 = mul(x = var_589_1, y = const_2_promoted)[name = string("op_591")];
	int32 var_593 = const()[name = string("op_593"), val = int32(-1)];
	bool var_594_interleave_0 = const()[name = string("op_594_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 256]> var_594 = concat(axis = var_593, interleave = var_594_interleave_0, values = (var_591, var_589_0))[name = string("op_594")];
	tensor<fp16, [1, 8, 512, 256]> var_595_cast_fp16 = mul(x = var_594, y = sin_s)[name = string("op_595_cast_fp16")];
	tensor<fp16, [1, 8, 512, 256]> q_7_cast_fp16 = add(x = var_588_cast_fp16, y = var_595_cast_fp16)[name = string("q_7_cast_fp16")];
	tensor<int32, [4]> transpose_0_perm_0 = const()[name = string("transpose_0_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	tensor<int32, [4]> tile_0_reps_0 = const()[name = string("tile_0_reps_0"), val = tensor<int32, [4]>([8, 1, 1, 1])];
	tensor<fp16, [1, 1, 512, 256]> transpose_0_cast_fp16 = transpose(perm = transpose_0_perm_0, x = kv13_k)[name = string("transpose_120")];
	tensor<fp16, [8, 1, 512, 256]> tile_0_cast_fp16 = tile(reps = tile_0_reps_0, x = transpose_0_cast_fp16)[name = string("tile_0_cast_fp16")];
	tensor<int32, [5]> concat_0 = const()[name = string("concat_0"), val = tensor<int32, [5]>([8, 1, 1, 512, 256])];
	tensor<fp16, [8, 1, 1, 512, 256]> reshape_0_cast_fp16 = reshape(shape = concat_0, x = tile_0_cast_fp16)[name = string("reshape_0_cast_fp16")];
	tensor<int32, [5]> transpose_1_perm_0 = const()[name = string("transpose_1_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])];
	tensor<int32, [4]> concat_1 = const()[name = string("concat_1"), val = tensor<int32, [4]>([-1, 1, 512, 256])];
	tensor<fp16, [1, 8, 1, 512, 256]> transpose_1_cast_fp16 = transpose(perm = transpose_1_perm_0, x = reshape_0_cast_fp16)[name = string("transpose_119")];
	tensor<fp16, [8, 1, 512, 256]> reshape_1_cast_fp16 = reshape(shape = concat_1, x = transpose_1_cast_fp16)[name = string("reshape_1_cast_fp16")];
	tensor<int32, [4]> transpose_41_perm_0 = const()[name = string("transpose_41_perm_0"), val = tensor<int32, [4]>([1, 0, -1, -2])];
	tensor<int32, [4]> transpose_2_perm_0 = const()[name = string("transpose_2_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	tensor<int32, [4]> tile_1_reps_0 = const()[name = string("tile_1_reps_0"), val = tensor<int32, [4]>([8, 1, 1, 1])];
	tensor<fp16, [1, 1, 512, 256]> transpose_2_cast_fp16 = transpose(perm = transpose_2_perm_0, x = kv13_v)[name = string("transpose_118")];
	tensor<fp16, [8, 1, 512, 256]> tile_1_cast_fp16 = tile(reps = tile_1_reps_0, x = transpose_2_cast_fp16)[name = string("tile_1_cast_fp16")];
	tensor<int32, [5]> concat_2 = const()[name = string("concat_2"), val = tensor<int32, [5]>([8, 1, 1, 512, 256])];
	tensor<fp16, [8, 1, 1, 512, 256]> reshape_2_cast_fp16 = reshape(shape = concat_2, x = tile_1_cast_fp16)[name = string("reshape_2_cast_fp16")];
	tensor<int32, [5]> transpose_3_perm_0 = const()[name = string("transpose_3_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])];
	tensor<int32, [4]> concat_3 = const()[name = string("concat_3"), val = tensor<int32, [4]>([-1, 1, 512, 256])];
	tensor<fp16, [1, 8, 1, 512, 256]> transpose_3_cast_fp16 = transpose(perm = transpose_3_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_117")];
	tensor<fp16, [8, 1, 512, 256]> reshape_3_cast_fp16 = reshape(shape = concat_3, x = transpose_3_cast_fp16)[name = string("reshape_3_cast_fp16")];
	tensor<int32, [4]> V_expanded_1_perm_0 = const()[name = string("V_expanded_1_perm_0"), val = tensor<int32, [4]>([1, 0, -2, -1])];
	bool attn_weights_1_transpose_x_0 = const()[name = string("attn_weights_1_transpose_x_0"), val = bool(false)];
	bool attn_weights_1_transpose_y_0 = const()[name = string("attn_weights_1_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 256, 512]> transpose_41_cast_fp16 = transpose(perm = transpose_41_perm_0, x = reshape_1_cast_fp16)[name = string("transpose_116")];
	tensor<fp16, [1, 8, 512, 512]> attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = q_7_cast_fp16, y = transpose_41_cast_fp16)[name = string("attn_weights_1_cast_fp16")];
	tensor<fp16, [1, 8, 512, 512]> x_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_3_cast_fp16")];
	tensor<int32, [1]> reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor<int32, [1]>([-1])];
	bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 512, 1]> reduce_max_0 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_3_cast_fp16)[name = string("reduce_max_0")];
	tensor<fp16, [1, 8, 512, 512]> var_627 = sub(x = x_3_cast_fp16, y = reduce_max_0)[name = string("op_627")];
	tensor<fp16, [1, 8, 512, 512]> var_633 = exp(x = var_627)[name = string("op_633")];
	tensor<int32, [1]> var_643_axes_0 = const()[name = string("op_643_axes_0"), val = tensor<int32, [1]>([-1])];
	bool var_643_keep_dims_0 = const()[name = string("op_643_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 512, 1]> var_643 = reduce_sum(axes = var_643_axes_0, keep_dims = var_643_keep_dims_0, x = var_633)[name = string("op_643")];
	tensor<fp16, [1, 8, 512, 512]> var_649_cast_fp16 = real_div(x = var_633, y = var_643)[name = string("op_649_cast_fp16")];
	bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)];
	bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 256]> V_expanded_1_cast_fp16 = transpose(perm = V_expanded_1_perm_0, x = reshape_3_cast_fp16)[name = string("transpose_115")];
	tensor<fp16, [1, 8, 512, 256]> attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = var_649_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_1_cast_fp16")];
	tensor<int32, [4]> var_660 = const()[name = string("op_660"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_667 = const()[name = string("op_667"), val = tensor<int32, [3]>([1, 512, 2048])];
	tensor<fp16, [1, 512, 8, 256]> var_661_cast_fp16 = transpose(perm = var_660, x = attn_output_1_cast_fp16)[name = string("transpose_114")];
	tensor<fp16, [1, 512, 2048]> attn_output_3_cast_fp16 = reshape(shape = var_667, x = var_661_cast_fp16)[name = string("attn_output_3_cast_fp16")];
	tensor<int32, [3]> var_672 = const()[name = string("op_672"), val = tensor<int32, [3]>([0, 2, 1])];
	string var_688_pad_type_0 = const()[name = string("op_688_pad_type_0"), val = string("valid")];
	int32 var_688_groups_0 = const()[name = string("op_688_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_688_strides_0 = const()[name = string("op_688_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_688_pad_0 = const()[name = string("op_688_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_688_dilations_0 = const()[name = string("op_688_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1536, 2048, 1]> squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304286848))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305859776))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")];
	tensor<fp16, [1, 2048, 512]> var_673_cast_fp16 = transpose(perm = var_672, x = attn_output_3_cast_fp16)[name = string("transpose_113")];
	tensor<fp16, [1, 1536, 512]> var_688_cast_fp16 = conv(dilations = var_688_dilations_0, groups = var_688_groups_0, pad = var_688_pad_0, pad_type = var_688_pad_type_0, strides = var_688_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_673_cast_fp16)[name = string("op_688_cast_fp16")];
	tensor<int32, [3]> var_692 = const()[name = string("op_692"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_698 = const()[name = string("op_698"), val = int32(-1)];
	fp16 const_3_promoted_to_fp16 = const()[name = string("const_3_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_7_cast_fp16 = transpose(perm = var_692, x = var_688_cast_fp16)[name = string("transpose_112")];
	tensor<fp16, [1, 512, 1536]> var_700_cast_fp16 = mul(x = x_7_cast_fp16, y = const_3_promoted_to_fp16)[name = string("op_700_cast_fp16")];
	bool input_9_interleave_0 = const()[name = string("input_9_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_9_cast_fp16 = concat(axis = var_698, interleave = input_9_interleave_0, values = (x_7_cast_fp16, var_700_cast_fp16))[name = string("input_9_cast_fp16")];
	tensor<int32, [1]> normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_695_to_fp16 = const()[name = string("op_695_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_695_to_fp16, x = input_9_cast_fp16)[name = string("normed_9_cast_fp16")];
	tensor<int32, [2]> var_705_split_sizes_0 = const()[name = string("op_705_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_705_axis_0 = const()[name = string("op_705_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_705_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_705_cast_fp16_1 = split(axis = var_705_axis_0, split_sizes = var_705_split_sizes_0, x = normed_9_cast_fp16)[name = string("op_705_cast_fp16")];
	tensor<fp16, [1536]> layers_0_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305861376)))];
	tensor<fp16, [1, 512, 1536]> attn_output_7_cast_fp16 = mul(x = var_705_cast_fp16_0, y = layers_0_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_7_cast_fp16")];
	tensor<fp16, [1, 512, 1536]> x_9_cast_fp16 = add(x = hidden_states, y = attn_output_7_cast_fp16)[name = string("x_9_cast_fp16")];
	int32 var_714 = const()[name = string("op_714"), val = int32(-1)];
	fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> var_716_cast_fp16 = mul(x = x_9_cast_fp16, y = const_4_promoted_to_fp16)[name = string("op_716_cast_fp16")];
	bool input_11_interleave_0 = const()[name = string("input_11_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_11_cast_fp16 = concat(axis = var_714, interleave = input_11_interleave_0, values = (x_9_cast_fp16, var_716_cast_fp16))[name = string("input_11_cast_fp16")];
	tensor<int32, [1]> normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_711_to_fp16 = const()[name = string("op_711_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_711_to_fp16, x = input_11_cast_fp16)[name = string("normed_13_cast_fp16")];
	tensor<int32, [2]> var_721_split_sizes_0 = const()[name = string("op_721_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_721_axis_0 = const()[name = string("op_721_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_721_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_721_cast_fp16_1 = split(axis = var_721_axis_0, split_sizes = var_721_split_sizes_0, x = normed_13_cast_fp16)[name = string("op_721_cast_fp16")];
	tensor<fp16, [1536]> layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305864512)))];
	tensor<fp16, [1, 512, 1536]> h_3_cast_fp16 = mul(x = var_721_cast_fp16_0, y = layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_3_cast_fp16")];
	tensor<int32, [3]> var_732 = const()[name = string("op_732"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_13_axes_0 = const()[name = string("input_13_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_733 = transpose(perm = var_732, x = h_3_cast_fp16)[name = string("transpose_111")];
	tensor<fp16, [1, 1536, 1, 512]> input_13 = expand_dims(axes = input_13_axes_0, x = var_733)[name = string("input_13")];
	string gate_1_pad_type_0 = const()[name = string("gate_1_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gate_1_strides_0 = const()[name = string("gate_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gate_1_pad_0 = const()[name = string("gate_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gate_1_dilations_0 = const()[name = string("gate_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gate_1_groups_0 = const()[name = string("gate_1_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 512]> gate_1 = conv(dilations = gate_1_dilations_0, groups = gate_1_groups_0, pad = gate_1_pad_0, pad_type = gate_1_pad_type_0, strides = gate_1_strides_0, weight = layers_0_mlp_gate_proj_weight_palettized, x = input_13)[name = string("gate_1")];
	string up_1_pad_type_0 = const()[name = string("up_1_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> up_1_strides_0 = const()[name = string("up_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> up_1_pad_0 = const()[name = string("up_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> up_1_dilations_0 = const()[name = string("up_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 up_1_groups_0 = const()[name = string("up_1_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 512]> up_1 = conv(dilations = up_1_dilations_0, groups = up_1_groups_0, pad = up_1_pad_0, pad_type = up_1_pad_type_0, strides = up_1_strides_0, weight = layers_0_mlp_up_proj_weight_palettized, x = input_13)[name = string("up_1")];
	string gate_3_mode_0 = const()[name = string("gate_3_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 512]> gate_3 = gelu(mode = gate_3_mode_0, x = gate_1)[name = string("gate_3")];
	tensor<fp16, [1, 12288, 1, 512]> input_15 = mul(x = gate_3, y = up_1)[name = string("input_15")];
	string mlp_out_1_pad_type_0 = const()[name = string("mlp_out_1_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_out_1_strides_0 = const()[name = string("mlp_out_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_out_1_pad_0 = const()[name = string("mlp_out_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_out_1_dilations_0 = const()[name = string("mlp_out_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_out_1_groups_0 = const()[name = string("mlp_out_1_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 512]> mlp_out_1 = conv(dilations = mlp_out_1_dilations_0, groups = mlp_out_1_groups_0, pad = mlp_out_1_pad_0, pad_type = mlp_out_1_pad_type_0, strides = mlp_out_1_strides_0, weight = layers_0_mlp_down_proj_weight_palettized, x = input_15)[name = string("mlp_out_1")];
	tensor<int32, [1]> var_773_axes_0 = const()[name = string("op_773_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_773 = squeeze(axes = var_773_axes_0, x = mlp_out_1)[name = string("op_773")];
	tensor<int32, [3]> var_777 = const()[name = string("op_777"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_783 = const()[name = string("op_783"), val = int32(-1)];
	fp16 const_5_promoted = const()[name = string("const_5_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_11 = transpose(perm = var_777, x = var_773)[name = string("transpose_110")];
	tensor<fp16, [1, 512, 1536]> var_785 = mul(x = x_11, y = const_5_promoted)[name = string("op_785")];
	bool input_17_interleave_0 = const()[name = string("input_17_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_17 = concat(axis = var_783, interleave = input_17_interleave_0, values = (x_11, var_785))[name = string("input_17")];
	tensor<int32, [1]> normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_780_to_fp16 = const()[name = string("op_780_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_780_to_fp16, x = input_17)[name = string("normed_17_cast_fp16")];
	tensor<int32, [2]> var_790_split_sizes_0 = const()[name = string("op_790_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_790_axis_0 = const()[name = string("op_790_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_790_0, tensor<fp16, [1, 512, 1536]> var_790_1 = split(axis = var_790_axis_0, split_sizes = var_790_split_sizes_0, x = normed_17_cast_fp16)[name = string("op_790")];
	tensor<fp16, [1, 512, 1536]> hidden_states_mlp_1 = mul(x = var_790_0, y = layers_0_post_feedforward_layernorm_weight)[name = string("hidden_states_mlp_1")];
	tensor<fp16, [1, 512, 1536]> hidden_states_3_cast_fp16 = add(x = x_9_cast_fp16, y = hidden_states_mlp_1)[name = string("hidden_states_3_cast_fp16")];
	tensor<int32, [3]> per_layer_slice_1_begin_0 = const()[name = string("per_layer_slice_1_begin_0"), val = tensor<int32, [3]>([0, 0, 6400])];
	tensor<int32, [3]> per_layer_slice_1_end_0 = const()[name = string("per_layer_slice_1_end_0"), val = tensor<int32, [3]>([1, 512, 6656])];
	tensor<bool, [3]> per_layer_slice_1_end_mask_0 = const()[name = string("per_layer_slice_1_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 512, 256]> per_layer_slice_1_cast_fp16 = slice_by_index(begin = per_layer_slice_1_begin_0, end = per_layer_slice_1_end_0, end_mask = per_layer_slice_1_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_1_cast_fp16")];
	tensor<int32, [3]> var_818 = const()[name = string("op_818"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_19_axes_0 = const()[name = string("input_19_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_819 = transpose(perm = var_818, x = hidden_states_3_cast_fp16)[name = string("transpose_109")];
	tensor<fp16, [1, 1536, 1, 512]> input_19 = expand_dims(axes = input_19_axes_0, x = var_819)[name = string("input_19")];
	string gated_1_pad_type_0 = const()[name = string("gated_1_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_1_strides_0 = const()[name = string("gated_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_1_pad_0 = const()[name = string("gated_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_1_dilations_0 = const()[name = string("gated_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_1_groups_0 = const()[name = string("gated_1_groups_0"), val = int32(1)];
	tensor<fp16, [1, 256, 1, 512]> gated_1 = conv(dilations = gated_1_dilations_0, groups = gated_1_groups_0, pad = gated_1_pad_0, pad_type = gated_1_pad_type_0, strides = gated_1_strides_0, weight = layers_0_per_layer_input_gate_weight_palettized, x = input_19)[name = string("gated_1")];
	string gated_3_mode_0 = const()[name = string("gated_3_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 256, 1, 512]> gated_3 = gelu(mode = gated_3_mode_0, x = gated_1)[name = string("gated_3")];
	tensor<int32, [3]> var_838 = const()[name = string("op_838"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> per_layer_slice_conv_1_axes_0 = const()[name = string("per_layer_slice_conv_1_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 256, 512]> var_839_cast_fp16 = transpose(perm = var_838, x = per_layer_slice_1_cast_fp16)[name = string("transpose_108")];
	tensor<fp16, [1, 256, 1, 512]> per_layer_slice_conv_1_cast_fp16 = expand_dims(axes = per_layer_slice_conv_1_axes_0, x = var_839_cast_fp16)[name = string("per_layer_slice_conv_1_cast_fp16")];
	tensor<fp16, [1, 256, 1, 512]> input_21_cast_fp16 = mul(x = gated_3, y = per_layer_slice_conv_1_cast_fp16)[name = string("input_21_cast_fp16")];
	string gated_5_pad_type_0 = const()[name = string("gated_5_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_5_strides_0 = const()[name = string("gated_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_5_pad_0 = const()[name = string("gated_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_5_dilations_0 = const()[name = string("gated_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_5_groups_0 = const()[name = string("gated_5_groups_0"), val = int32(1)];
	tensor<fp16, [1536, 256, 1, 1]> layers_0_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305867648))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306064320))))[name = string("layers_0_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1536, 1, 512]> gated_5_cast_fp16 = conv(dilations = gated_5_dilations_0, groups = gated_5_groups_0, pad = gated_5_pad_0, pad_type = gated_5_pad_type_0, strides = gated_5_strides_0, weight = layers_0_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_21_cast_fp16)[name = string("gated_5_cast_fp16")];
	tensor<int32, [1]> var_855_axes_0 = const()[name = string("op_855_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_855_cast_fp16 = squeeze(axes = var_855_axes_0, x = gated_5_cast_fp16)[name = string("op_855_cast_fp16")];
	tensor<int32, [3]> var_859 = const()[name = string("op_859"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_865 = const()[name = string("op_865"), val = int32(-1)];
	fp16 const_6_promoted_to_fp16 = const()[name = string("const_6_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_13_cast_fp16 = transpose(perm = var_859, x = var_855_cast_fp16)[name = string("transpose_107")];
	tensor<fp16, [1, 512, 1536]> var_867_cast_fp16 = mul(x = x_13_cast_fp16, y = const_6_promoted_to_fp16)[name = string("op_867_cast_fp16")];
	bool input_23_interleave_0 = const()[name = string("input_23_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_23_cast_fp16 = concat(axis = var_865, interleave = input_23_interleave_0, values = (x_13_cast_fp16, var_867_cast_fp16))[name = string("input_23_cast_fp16")];
	tensor<int32, [1]> normed_21_axes_0 = const()[name = string("normed_21_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_862_to_fp16 = const()[name = string("op_862_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_21_cast_fp16 = layer_norm(axes = normed_21_axes_0, epsilon = var_862_to_fp16, x = input_23_cast_fp16)[name = string("normed_21_cast_fp16")];
	tensor<int32, [2]> var_872_split_sizes_0 = const()[name = string("op_872_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_872_axis_0 = const()[name = string("op_872_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_872_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_872_cast_fp16_1 = split(axis = var_872_axis_0, split_sizes = var_872_split_sizes_0, x = normed_21_cast_fp16)[name = string("op_872_cast_fp16")];
	tensor<fp16, [1536]> layers_0_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_0_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306065920)))];
	tensor<fp16, [1, 512, 1536]> hidden_states_7_cast_fp16 = mul(x = var_872_cast_fp16_0, y = layers_0_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_7_cast_fp16")];
	tensor<fp16, [1, 512, 1536]> hidden_states_9_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = hidden_states_7_cast_fp16)[name = string("hidden_states_9_cast_fp16")];
	tensor<fp16, [1]> const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.92p-1])];
	tensor<fp16, [1, 512, 1536]> x_15_cast_fp16 = mul(x = hidden_states_9_cast_fp16, y = const_7_promoted_to_fp16)[name = string("x_15_cast_fp16")];
	int32 var_887 = const()[name = string("op_887"), val = int32(-1)];
	fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> var_889_cast_fp16 = mul(x = x_15_cast_fp16, y = const_8_promoted_to_fp16)[name = string("op_889_cast_fp16")];
	bool input_25_interleave_0 = const()[name = string("input_25_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_25_cast_fp16 = concat(axis = var_887, interleave = input_25_interleave_0, values = (x_15_cast_fp16, var_889_cast_fp16))[name = string("input_25_cast_fp16")];
	tensor<int32, [1]> normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_884_to_fp16 = const()[name = string("op_884_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_884_to_fp16, x = input_25_cast_fp16)[name = string("normed_25_cast_fp16")];
	tensor<int32, [2]> var_894_split_sizes_0 = const()[name = string("op_894_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_894_axis_0 = const()[name = string("op_894_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_894_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_894_cast_fp16_1 = split(axis = var_894_axis_0, split_sizes = var_894_split_sizes_0, x = normed_25_cast_fp16)[name = string("op_894_cast_fp16")];
	tensor<fp16, [1536]> layers_1_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306069056)))];
	tensor<fp16, [1, 512, 1536]> h_7_cast_fp16 = mul(x = var_894_cast_fp16_0, y = layers_1_input_layernorm_weight_promoted_to_fp16)[name = string("h_7_cast_fp16")];
	tensor<int32, [3]> var_900 = const()[name = string("op_900"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_903_axes_0 = const()[name = string("op_903_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_901_cast_fp16 = transpose(perm = var_900, x = h_7_cast_fp16)[name = string("transpose_106")];
	tensor<fp16, [1, 1536, 1, 512]> var_903_cast_fp16 = expand_dims(axes = var_903_axes_0, x = var_901_cast_fp16)[name = string("op_903_cast_fp16")];
	string q_raw_3_pad_type_0 = const()[name = string("q_raw_3_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> q_raw_3_strides_0 = const()[name = string("q_raw_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> q_raw_3_pad_0 = const()[name = string("q_raw_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> q_raw_3_dilations_0 = const()[name = string("q_raw_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 q_raw_3_groups_0 = const()[name = string("q_raw_3_groups_0"), val = int32(1)];
	tensor<fp16, [1, 2048, 1, 512]> q_raw_3 = conv(dilations = q_raw_3_dilations_0, groups = q_raw_3_groups_0, pad = q_raw_3_pad_0, pad_type = q_raw_3_pad_type_0, strides = q_raw_3_strides_0, weight = layers_1_self_attn_q_proj_weight_palettized, x = var_903_cast_fp16)[name = string("q_raw_3")];
	tensor<int32, [4]> var_924 = const()[name = string("op_924"), val = tensor<int32, [4]>([1, 8, 256, 512])];
	tensor<fp16, [1, 8, 256, 512]> var_925 = reshape(shape = var_924, x = q_raw_3)[name = string("op_925")];
	tensor<int32, [4]> transpose_42_perm_0 = const()[name = string("transpose_42_perm_0"), val = tensor<int32, [4]>([0, 3, 1, 2])];
	tensor<int32, [3]> var_948 = const()[name = string("op_948"), val = tensor<int32, [3]>([512, 8, 256])];
	tensor<fp16, [1, 512, 8, 256]> transpose_42 = transpose(perm = transpose_42_perm_0, x = var_925)[name = string("transpose_105")];
	tensor<fp16, [512, 8, 256]> x_17 = reshape(shape = var_948, x = transpose_42)[name = string("x_17")];
	int32 var_954 = const()[name = string("op_954"), val = int32(-1)];
	fp16 const_9_promoted = const()[name = string("const_9_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [512, 8, 256]> var_956 = mul(x = x_17, y = const_9_promoted)[name = string("op_956")];
	bool input_29_interleave_0 = const()[name = string("input_29_interleave_0"), val = bool(false)];
	tensor<fp16, [512, 8, 512]> input_29 = concat(axis = var_954, interleave = input_29_interleave_0, values = (x_17, var_956))[name = string("input_29")];
	tensor<int32, [1]> normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_951_to_fp16 = const()[name = string("op_951_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [512, 8, 512]> normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_951_to_fp16, x = input_29)[name = string("normed_29_cast_fp16")];
	tensor<int32, [2]> var_961_split_sizes_0 = const()[name = string("op_961_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_961_axis_0 = const()[name = string("op_961_axis_0"), val = int32(-1)];
	tensor<fp16, [512, 8, 256]> var_961_0, tensor<fp16, [512, 8, 256]> var_961_1 = split(axis = var_961_axis_0, split_sizes = var_961_split_sizes_0, x = normed_29_cast_fp16)[name = string("op_961")];
	tensor<fp16, [512, 8, 256]> q_11 = mul(x = var_961_0, y = layers_0_self_attn_q_norm_weight)[name = string("q_11")];
	tensor<int32, [4]> var_968 = const()[name = string("op_968"), val = tensor<int32, [4]>([1, 512, 8, 256])];
	tensor<fp16, [1, 512, 8, 256]> var_969 = reshape(shape = var_968, x = q_11)[name = string("op_969")];
	tensor<int32, [4]> var_974 = const()[name = string("op_974"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<fp16, [1, 8, 512, 256]> q_13 = transpose(perm = var_974, x = var_969)[name = string("transpose_104")];
	tensor<fp16, [1, 8, 512, 256]> var_976_cast_fp16 = mul(x = q_13, y = cos_s)[name = string("op_976_cast_fp16")];
	tensor<int32, [2]> var_977_split_sizes_0 = const()[name = string("op_977_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_977_axis_0 = const()[name = string("op_977_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 512, 128]> var_977_0, tensor<fp16, [1, 8, 512, 128]> var_977_1 = split(axis = var_977_axis_0, split_sizes = var_977_split_sizes_0, x = q_13)[name = string("op_977")];
	fp16 const_10_promoted = const()[name = string("const_10_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 512, 128]> var_979 = mul(x = var_977_1, y = const_10_promoted)[name = string("op_979")];
	int32 var_981 = const()[name = string("op_981"), val = int32(-1)];
	bool var_982_interleave_0 = const()[name = string("op_982_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 256]> var_982 = concat(axis = var_981, interleave = var_982_interleave_0, values = (var_979, var_977_0))[name = string("op_982")];
	tensor<fp16, [1, 8, 512, 256]> var_983_cast_fp16 = mul(x = var_982, y = sin_s)[name = string("op_983_cast_fp16")];
	tensor<fp16, [1, 8, 512, 256]> q_15_cast_fp16 = add(x = var_976_cast_fp16, y = var_983_cast_fp16)[name = string("q_15_cast_fp16")];
	bool attn_weights_5_transpose_x_0 = const()[name = string("attn_weights_5_transpose_x_0"), val = bool(false)];
	bool attn_weights_5_transpose_y_0 = const()[name = string("attn_weights_5_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 512]> attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = q_15_cast_fp16, y = transpose_41_cast_fp16)[name = string("attn_weights_5_cast_fp16")];
	tensor<fp16, [1, 8, 512, 512]> x_19_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_19_cast_fp16")];
	tensor<int32, [1]> reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor<int32, [1]>([-1])];
	bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 512, 1]> reduce_max_1 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_19_cast_fp16)[name = string("reduce_max_1")];
	tensor<fp16, [1, 8, 512, 512]> var_1015 = sub(x = x_19_cast_fp16, y = reduce_max_1)[name = string("op_1015")];
	tensor<fp16, [1, 8, 512, 512]> var_1021 = exp(x = var_1015)[name = string("op_1021")];
	tensor<int32, [1]> var_1031_axes_0 = const()[name = string("op_1031_axes_0"), val = tensor<int32, [1]>([-1])];
	bool var_1031_keep_dims_0 = const()[name = string("op_1031_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 512, 1]> var_1031 = reduce_sum(axes = var_1031_axes_0, keep_dims = var_1031_keep_dims_0, x = var_1021)[name = string("op_1031")];
	tensor<fp16, [1, 8, 512, 512]> var_1037_cast_fp16 = real_div(x = var_1021, y = var_1031)[name = string("op_1037_cast_fp16")];
	bool attn_output_9_transpose_x_0 = const()[name = string("attn_output_9_transpose_x_0"), val = bool(false)];
	bool attn_output_9_transpose_y_0 = const()[name = string("attn_output_9_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 256]> attn_output_9_cast_fp16 = matmul(transpose_x = attn_output_9_transpose_x_0, transpose_y = attn_output_9_transpose_y_0, x = var_1037_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_9_cast_fp16")];
	tensor<int32, [4]> var_1048 = const()[name = string("op_1048"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_1055 = const()[name = string("op_1055"), val = tensor<int32, [3]>([1, 512, 2048])];
	tensor<fp16, [1, 512, 8, 256]> var_1049_cast_fp16 = transpose(perm = var_1048, x = attn_output_9_cast_fp16)[name = string("transpose_103")];
	tensor<fp16, [1, 512, 2048]> attn_output_11_cast_fp16 = reshape(shape = var_1055, x = var_1049_cast_fp16)[name = string("attn_output_11_cast_fp16")];
	tensor<int32, [3]> var_1060 = const()[name = string("op_1060"), val = tensor<int32, [3]>([0, 2, 1])];
	string var_1076_pad_type_0 = const()[name = string("op_1076_pad_type_0"), val = string("valid")];
	int32 var_1076_groups_0 = const()[name = string("op_1076_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_1076_strides_0 = const()[name = string("op_1076_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_1076_pad_0 = const()[name = string("op_1076_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_1076_dilations_0 = const()[name = string("op_1076_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1536, 2048, 1]> squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306072192))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307645120))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")];
	tensor<fp16, [1, 2048, 512]> var_1061_cast_fp16 = transpose(perm = var_1060, x = attn_output_11_cast_fp16)[name = string("transpose_102")];
	tensor<fp16, [1, 1536, 512]> var_1076_cast_fp16 = conv(dilations = var_1076_dilations_0, groups = var_1076_groups_0, pad = var_1076_pad_0, pad_type = var_1076_pad_type_0, strides = var_1076_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_1061_cast_fp16)[name = string("op_1076_cast_fp16")];
	tensor<int32, [3]> var_1080 = const()[name = string("op_1080"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_1086 = const()[name = string("op_1086"), val = int32(-1)];
	fp16 const_11_promoted_to_fp16 = const()[name = string("const_11_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_23_cast_fp16 = transpose(perm = var_1080, x = var_1076_cast_fp16)[name = string("transpose_101")];
	tensor<fp16, [1, 512, 1536]> var_1088_cast_fp16 = mul(x = x_23_cast_fp16, y = const_11_promoted_to_fp16)[name = string("op_1088_cast_fp16")];
	bool input_33_interleave_0 = const()[name = string("input_33_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_33_cast_fp16 = concat(axis = var_1086, interleave = input_33_interleave_0, values = (x_23_cast_fp16, var_1088_cast_fp16))[name = string("input_33_cast_fp16")];
	tensor<int32, [1]> normed_33_axes_0 = const()[name = string("normed_33_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1083_to_fp16 = const()[name = string("op_1083_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_33_cast_fp16 = layer_norm(axes = normed_33_axes_0, epsilon = var_1083_to_fp16, x = input_33_cast_fp16)[name = string("normed_33_cast_fp16")];
	tensor<int32, [2]> var_1093_split_sizes_0 = const()[name = string("op_1093_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1093_axis_0 = const()[name = string("op_1093_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_1093_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_1093_cast_fp16_1 = split(axis = var_1093_axis_0, split_sizes = var_1093_split_sizes_0, x = normed_33_cast_fp16)[name = string("op_1093_cast_fp16")];
	tensor<fp16, [1536]> layers_1_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307646720)))];
	tensor<fp16, [1, 512, 1536]> attn_output_15_cast_fp16 = mul(x = var_1093_cast_fp16_0, y = layers_1_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_15_cast_fp16")];
	tensor<fp16, [1, 512, 1536]> x_25_cast_fp16 = add(x = x_15_cast_fp16, y = attn_output_15_cast_fp16)[name = string("x_25_cast_fp16")];
	int32 var_1102 = const()[name = string("op_1102"), val = int32(-1)];
	fp16 const_12_promoted_to_fp16 = const()[name = string("const_12_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> var_1104_cast_fp16 = mul(x = x_25_cast_fp16, y = const_12_promoted_to_fp16)[name = string("op_1104_cast_fp16")];
	bool input_35_interleave_0 = const()[name = string("input_35_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_35_cast_fp16 = concat(axis = var_1102, interleave = input_35_interleave_0, values = (x_25_cast_fp16, var_1104_cast_fp16))[name = string("input_35_cast_fp16")];
	tensor<int32, [1]> normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1099_to_fp16 = const()[name = string("op_1099_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_1099_to_fp16, x = input_35_cast_fp16)[name = string("normed_37_cast_fp16")];
	tensor<int32, [2]> var_1109_split_sizes_0 = const()[name = string("op_1109_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1109_axis_0 = const()[name = string("op_1109_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_1109_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_1109_cast_fp16_1 = split(axis = var_1109_axis_0, split_sizes = var_1109_split_sizes_0, x = normed_37_cast_fp16)[name = string("op_1109_cast_fp16")];
	tensor<fp16, [1536]> layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307649856)))];
	tensor<fp16, [1, 512, 1536]> h_9_cast_fp16 = mul(x = var_1109_cast_fp16_0, y = layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_9_cast_fp16")];
	tensor<int32, [3]> var_1120 = const()[name = string("op_1120"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_1121 = transpose(perm = var_1120, x = h_9_cast_fp16)[name = string("transpose_100")];
	tensor<fp16, [1, 1536, 1, 512]> input_37 = expand_dims(axes = input_37_axes_0, x = var_1121)[name = string("input_37")];
	string gate_5_pad_type_0 = const()[name = string("gate_5_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gate_5_strides_0 = const()[name = string("gate_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gate_5_pad_0 = const()[name = string("gate_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gate_5_dilations_0 = const()[name = string("gate_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gate_5_groups_0 = const()[name = string("gate_5_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 512]> gate_5 = conv(dilations = gate_5_dilations_0, groups = gate_5_groups_0, pad = gate_5_pad_0, pad_type = gate_5_pad_type_0, strides = gate_5_strides_0, weight = layers_1_mlp_gate_proj_weight_palettized, x = input_37)[name = string("gate_5")];
	string up_3_pad_type_0 = const()[name = string("up_3_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> up_3_strides_0 = const()[name = string("up_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> up_3_pad_0 = const()[name = string("up_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> up_3_dilations_0 = const()[name = string("up_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 up_3_groups_0 = const()[name = string("up_3_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 512]> up_3 = conv(dilations = up_3_dilations_0, groups = up_3_groups_0, pad = up_3_pad_0, pad_type = up_3_pad_type_0, strides = up_3_strides_0, weight = layers_1_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_3")];
	string gate_7_mode_0 = const()[name = string("gate_7_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 512]> gate_7 = gelu(mode = gate_7_mode_0, x = gate_5)[name = string("gate_7")];
	tensor<fp16, [1, 12288, 1, 512]> input_39 = mul(x = gate_7, y = up_3)[name = string("input_39")];
	string mlp_out_3_pad_type_0 = const()[name = string("mlp_out_3_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_out_3_strides_0 = const()[name = string("mlp_out_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_out_3_pad_0 = const()[name = string("mlp_out_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_out_3_dilations_0 = const()[name = string("mlp_out_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_out_3_groups_0 = const()[name = string("mlp_out_3_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 512]> mlp_out_3 = conv(dilations = mlp_out_3_dilations_0, groups = mlp_out_3_groups_0, pad = mlp_out_3_pad_0, pad_type = mlp_out_3_pad_type_0, strides = mlp_out_3_strides_0, weight = layers_1_mlp_down_proj_weight_palettized, x = input_39)[name = string("mlp_out_3")];
	tensor<int32, [1]> var_1161_axes_0 = const()[name = string("op_1161_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_1161 = squeeze(axes = var_1161_axes_0, x = mlp_out_3)[name = string("op_1161")];
	tensor<int32, [3]> var_1165 = const()[name = string("op_1165"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_1171 = const()[name = string("op_1171"), val = int32(-1)];
	fp16 const_13_promoted = const()[name = string("const_13_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_27 = transpose(perm = var_1165, x = var_1161)[name = string("transpose_99")];
	tensor<fp16, [1, 512, 1536]> var_1173 = mul(x = x_27, y = const_13_promoted)[name = string("op_1173")];
	bool input_41_interleave_0 = const()[name = string("input_41_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_41 = concat(axis = var_1171, interleave = input_41_interleave_0, values = (x_27, var_1173))[name = string("input_41")];
	tensor<int32, [1]> normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1168_to_fp16 = const()[name = string("op_1168_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_1168_to_fp16, x = input_41)[name = string("normed_41_cast_fp16")];
	tensor<int32, [2]> var_1178_split_sizes_0 = const()[name = string("op_1178_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1178_axis_0 = const()[name = string("op_1178_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_1178_0, tensor<fp16, [1, 512, 1536]> var_1178_1 = split(axis = var_1178_axis_0, split_sizes = var_1178_split_sizes_0, x = normed_41_cast_fp16)[name = string("op_1178")];
	tensor<fp16, [1, 512, 1536]> hidden_states_mlp_3 = mul(x = var_1178_0, y = layers_1_post_feedforward_layernorm_weight)[name = string("hidden_states_mlp_3")];
	tensor<fp16, [1, 512, 1536]> hidden_states_11_cast_fp16 = add(x = x_25_cast_fp16, y = hidden_states_mlp_3)[name = string("hidden_states_11_cast_fp16")];
	tensor<int32, [3]> per_layer_slice_3_begin_0 = const()[name = string("per_layer_slice_3_begin_0"), val = tensor<int32, [3]>([0, 0, 6656])];
	tensor<int32, [3]> per_layer_slice_3_end_0 = const()[name = string("per_layer_slice_3_end_0"), val = tensor<int32, [3]>([1, 512, 6912])];
	tensor<bool, [3]> per_layer_slice_3_end_mask_0 = const()[name = string("per_layer_slice_3_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 512, 256]> per_layer_slice_3_cast_fp16 = slice_by_index(begin = per_layer_slice_3_begin_0, end = per_layer_slice_3_end_0, end_mask = per_layer_slice_3_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_3_cast_fp16")];
	tensor<int32, [3]> var_1206 = const()[name = string("op_1206"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_43_axes_0 = const()[name = string("input_43_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_1207 = transpose(perm = var_1206, x = hidden_states_11_cast_fp16)[name = string("transpose_98")];
	tensor<fp16, [1, 1536, 1, 512]> input_43 = expand_dims(axes = input_43_axes_0, x = var_1207)[name = string("input_43")];
	string gated_7_pad_type_0 = const()[name = string("gated_7_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_7_strides_0 = const()[name = string("gated_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_7_pad_0 = const()[name = string("gated_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_7_dilations_0 = const()[name = string("gated_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_7_groups_0 = const()[name = string("gated_7_groups_0"), val = int32(1)];
	tensor<fp16, [1, 256, 1, 512]> gated_7 = conv(dilations = gated_7_dilations_0, groups = gated_7_groups_0, pad = gated_7_pad_0, pad_type = gated_7_pad_type_0, strides = gated_7_strides_0, weight = layers_1_per_layer_input_gate_weight_palettized, x = input_43)[name = string("gated_7")];
	string gated_9_mode_0 = const()[name = string("gated_9_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 256, 1, 512]> gated_9 = gelu(mode = gated_9_mode_0, x = gated_7)[name = string("gated_9")];
	tensor<int32, [3]> var_1226 = const()[name = string("op_1226"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> per_layer_slice_conv_3_axes_0 = const()[name = string("per_layer_slice_conv_3_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 256, 512]> var_1227_cast_fp16 = transpose(perm = var_1226, x = per_layer_slice_3_cast_fp16)[name = string("transpose_97")];
	tensor<fp16, [1, 256, 1, 512]> per_layer_slice_conv_3_cast_fp16 = expand_dims(axes = per_layer_slice_conv_3_axes_0, x = var_1227_cast_fp16)[name = string("per_layer_slice_conv_3_cast_fp16")];
	tensor<fp16, [1, 256, 1, 512]> input_45_cast_fp16 = mul(x = gated_9, y = per_layer_slice_conv_3_cast_fp16)[name = string("input_45_cast_fp16")];
	string gated_11_pad_type_0 = const()[name = string("gated_11_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_11_strides_0 = const()[name = string("gated_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_11_pad_0 = const()[name = string("gated_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_11_dilations_0 = const()[name = string("gated_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_11_groups_0 = const()[name = string("gated_11_groups_0"), val = int32(1)];
	tensor<fp16, [1536, 256, 1, 1]> layers_1_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307652992))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307849664))))[name = string("layers_1_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1536, 1, 512]> gated_11_cast_fp16 = conv(dilations = gated_11_dilations_0, groups = gated_11_groups_0, pad = gated_11_pad_0, pad_type = gated_11_pad_type_0, strides = gated_11_strides_0, weight = layers_1_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_45_cast_fp16)[name = string("gated_11_cast_fp16")];
	tensor<int32, [1]> var_1243_axes_0 = const()[name = string("op_1243_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_1243_cast_fp16 = squeeze(axes = var_1243_axes_0, x = gated_11_cast_fp16)[name = string("op_1243_cast_fp16")];
	tensor<int32, [3]> var_1247 = const()[name = string("op_1247"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_1253 = const()[name = string("op_1253"), val = int32(-1)];
	fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_29_cast_fp16 = transpose(perm = var_1247, x = var_1243_cast_fp16)[name = string("transpose_96")];
	tensor<fp16, [1, 512, 1536]> var_1255_cast_fp16 = mul(x = x_29_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_1255_cast_fp16")];
	bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_47_cast_fp16 = concat(axis = var_1253, interleave = input_47_interleave_0, values = (x_29_cast_fp16, var_1255_cast_fp16))[name = string("input_47_cast_fp16")];
	tensor<int32, [1]> normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1250_to_fp16 = const()[name = string("op_1250_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_1250_to_fp16, x = input_47_cast_fp16)[name = string("normed_45_cast_fp16")];
	tensor<int32, [2]> var_1260_split_sizes_0 = const()[name = string("op_1260_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1260_axis_0 = const()[name = string("op_1260_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_1260_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_1260_cast_fp16_1 = split(axis = var_1260_axis_0, split_sizes = var_1260_split_sizes_0, x = normed_45_cast_fp16)[name = string("op_1260_cast_fp16")];
	tensor<fp16, [1536]> layers_1_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_1_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307851264)))];
	tensor<fp16, [1, 512, 1536]> hidden_states_15_cast_fp16 = mul(x = var_1260_cast_fp16_0, y = layers_1_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_15_cast_fp16")];
	tensor<fp16, [1, 512, 1536]> hidden_states_17_cast_fp16 = add(x = hidden_states_11_cast_fp16, y = hidden_states_15_cast_fp16)[name = string("hidden_states_17_cast_fp16")];
	tensor<fp16, [1]> const_15_promoted_to_fp16 = const()[name = string("const_15_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.a6p-1])];
	tensor<fp16, [1, 512, 1536]> x_31_cast_fp16 = mul(x = hidden_states_17_cast_fp16, y = const_15_promoted_to_fp16)[name = string("x_31_cast_fp16")];
	int32 var_1275 = const()[name = string("op_1275"), val = int32(-1)];
	fp16 const_16_promoted_to_fp16 = const()[name = string("const_16_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> var_1277_cast_fp16 = mul(x = x_31_cast_fp16, y = const_16_promoted_to_fp16)[name = string("op_1277_cast_fp16")];
	bool input_49_interleave_0 = const()[name = string("input_49_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_49_cast_fp16 = concat(axis = var_1275, interleave = input_49_interleave_0, values = (x_31_cast_fp16, var_1277_cast_fp16))[name = string("input_49_cast_fp16")];
	tensor<int32, [1]> normed_49_axes_0 = const()[name = string("normed_49_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1272_to_fp16 = const()[name = string("op_1272_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_49_cast_fp16 = layer_norm(axes = normed_49_axes_0, epsilon = var_1272_to_fp16, x = input_49_cast_fp16)[name = string("normed_49_cast_fp16")];
	tensor<int32, [2]> var_1282_split_sizes_0 = const()[name = string("op_1282_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1282_axis_0 = const()[name = string("op_1282_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_1282_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_1282_cast_fp16_1 = split(axis = var_1282_axis_0, split_sizes = var_1282_split_sizes_0, x = normed_49_cast_fp16)[name = string("op_1282_cast_fp16")];
	tensor<fp16, [1536]> layers_2_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307854400)))];
	tensor<fp16, [1, 512, 1536]> h_13_cast_fp16 = mul(x = var_1282_cast_fp16_0, y = layers_2_input_layernorm_weight_promoted_to_fp16)[name = string("h_13_cast_fp16")];
	tensor<int32, [3]> var_1288 = const()[name = string("op_1288"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_1291_axes_0 = const()[name = string("op_1291_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_1289_cast_fp16 = transpose(perm = var_1288, x = h_13_cast_fp16)[name = string("transpose_95")];
	tensor<fp16, [1, 1536, 1, 512]> var_1291_cast_fp16 = expand_dims(axes = var_1291_axes_0, x = var_1289_cast_fp16)[name = string("op_1291_cast_fp16")];
	string q_raw_5_pad_type_0 = const()[name = string("q_raw_5_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> q_raw_5_strides_0 = const()[name = string("q_raw_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> q_raw_5_pad_0 = const()[name = string("q_raw_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> q_raw_5_dilations_0 = const()[name = string("q_raw_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 q_raw_5_groups_0 = const()[name = string("q_raw_5_groups_0"), val = int32(1)];
	tensor<fp16, [1, 2048, 1, 512]> q_raw_5 = conv(dilations = q_raw_5_dilations_0, groups = q_raw_5_groups_0, pad = q_raw_5_pad_0, pad_type = q_raw_5_pad_type_0, strides = q_raw_5_strides_0, weight = layers_2_self_attn_q_proj_weight_palettized, x = var_1291_cast_fp16)[name = string("q_raw_5")];
	tensor<int32, [4]> var_1312 = const()[name = string("op_1312"), val = tensor<int32, [4]>([1, 8, 256, 512])];
	tensor<fp16, [1, 8, 256, 512]> var_1313 = reshape(shape = var_1312, x = q_raw_5)[name = string("op_1313")];
	tensor<int32, [4]> transpose_44_perm_0 = const()[name = string("transpose_44_perm_0"), val = tensor<int32, [4]>([0, 3, 1, 2])];
	tensor<int32, [3]> var_1336 = const()[name = string("op_1336"), val = tensor<int32, [3]>([512, 8, 256])];
	tensor<fp16, [1, 512, 8, 256]> transpose_44 = transpose(perm = transpose_44_perm_0, x = var_1313)[name = string("transpose_94")];
	tensor<fp16, [512, 8, 256]> x_33 = reshape(shape = var_1336, x = transpose_44)[name = string("x_33")];
	int32 var_1342 = const()[name = string("op_1342"), val = int32(-1)];
	fp16 const_17_promoted = const()[name = string("const_17_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [512, 8, 256]> var_1344 = mul(x = x_33, y = const_17_promoted)[name = string("op_1344")];
	bool input_53_interleave_0 = const()[name = string("input_53_interleave_0"), val = bool(false)];
	tensor<fp16, [512, 8, 512]> input_53 = concat(axis = var_1342, interleave = input_53_interleave_0, values = (x_33, var_1344))[name = string("input_53")];
	tensor<int32, [1]> normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1339_to_fp16 = const()[name = string("op_1339_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [512, 8, 512]> normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_1339_to_fp16, x = input_53)[name = string("normed_53_cast_fp16")];
	tensor<int32, [2]> var_1349_split_sizes_0 = const()[name = string("op_1349_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_1349_axis_0 = const()[name = string("op_1349_axis_0"), val = int32(-1)];
	tensor<fp16, [512, 8, 256]> var_1349_0, tensor<fp16, [512, 8, 256]> var_1349_1 = split(axis = var_1349_axis_0, split_sizes = var_1349_split_sizes_0, x = normed_53_cast_fp16)[name = string("op_1349")];
	tensor<fp16, [512, 8, 256]> q_19 = mul(x = var_1349_0, y = layers_0_self_attn_q_norm_weight)[name = string("q_19")];
	tensor<int32, [4]> var_1356 = const()[name = string("op_1356"), val = tensor<int32, [4]>([1, 512, 8, 256])];
	tensor<fp16, [1, 512, 8, 256]> var_1357 = reshape(shape = var_1356, x = q_19)[name = string("op_1357")];
	tensor<int32, [4]> var_1362 = const()[name = string("op_1362"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<fp16, [1, 8, 512, 256]> q_21 = transpose(perm = var_1362, x = var_1357)[name = string("transpose_93")];
	tensor<fp16, [1, 8, 512, 256]> var_1364_cast_fp16 = mul(x = q_21, y = cos_s)[name = string("op_1364_cast_fp16")];
	tensor<int32, [2]> var_1365_split_sizes_0 = const()[name = string("op_1365_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_1365_axis_0 = const()[name = string("op_1365_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 512, 128]> var_1365_0, tensor<fp16, [1, 8, 512, 128]> var_1365_1 = split(axis = var_1365_axis_0, split_sizes = var_1365_split_sizes_0, x = q_21)[name = string("op_1365")];
	fp16 const_18_promoted = const()[name = string("const_18_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 512, 128]> var_1367 = mul(x = var_1365_1, y = const_18_promoted)[name = string("op_1367")];
	int32 var_1369 = const()[name = string("op_1369"), val = int32(-1)];
	bool var_1370_interleave_0 = const()[name = string("op_1370_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 256]> var_1370 = concat(axis = var_1369, interleave = var_1370_interleave_0, values = (var_1367, var_1365_0))[name = string("op_1370")];
	tensor<fp16, [1, 8, 512, 256]> var_1371_cast_fp16 = mul(x = var_1370, y = sin_s)[name = string("op_1371_cast_fp16")];
	tensor<fp16, [1, 8, 512, 256]> q_23_cast_fp16 = add(x = var_1364_cast_fp16, y = var_1371_cast_fp16)[name = string("q_23_cast_fp16")];
	bool attn_weights_9_transpose_x_0 = const()[name = string("attn_weights_9_transpose_x_0"), val = bool(false)];
	bool attn_weights_9_transpose_y_0 = const()[name = string("attn_weights_9_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 512]> attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = q_23_cast_fp16, y = transpose_41_cast_fp16)[name = string("attn_weights_9_cast_fp16")];
	tensor<fp16, [1, 8, 512, 512]> x_35_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("x_35_cast_fp16")];
	tensor<int32, [1]> reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor<int32, [1]>([-1])];
	bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 512, 1]> reduce_max_2 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_35_cast_fp16)[name = string("reduce_max_2")];
	tensor<fp16, [1, 8, 512, 512]> var_1403 = sub(x = x_35_cast_fp16, y = reduce_max_2)[name = string("op_1403")];
	tensor<fp16, [1, 8, 512, 512]> var_1409 = exp(x = var_1403)[name = string("op_1409")];
	tensor<int32, [1]> var_1419_axes_0 = const()[name = string("op_1419_axes_0"), val = tensor<int32, [1]>([-1])];
	bool var_1419_keep_dims_0 = const()[name = string("op_1419_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 512, 1]> var_1419 = reduce_sum(axes = var_1419_axes_0, keep_dims = var_1419_keep_dims_0, x = var_1409)[name = string("op_1419")];
	tensor<fp16, [1, 8, 512, 512]> var_1425_cast_fp16 = real_div(x = var_1409, y = var_1419)[name = string("op_1425_cast_fp16")];
	bool attn_output_17_transpose_x_0 = const()[name = string("attn_output_17_transpose_x_0"), val = bool(false)];
	bool attn_output_17_transpose_y_0 = const()[name = string("attn_output_17_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 256]> attn_output_17_cast_fp16 = matmul(transpose_x = attn_output_17_transpose_x_0, transpose_y = attn_output_17_transpose_y_0, x = var_1425_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_17_cast_fp16")];
	tensor<int32, [4]> var_1436 = const()[name = string("op_1436"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_1443 = const()[name = string("op_1443"), val = tensor<int32, [3]>([1, 512, 2048])];
	tensor<fp16, [1, 512, 8, 256]> var_1437_cast_fp16 = transpose(perm = var_1436, x = attn_output_17_cast_fp16)[name = string("transpose_92")];
	tensor<fp16, [1, 512, 2048]> attn_output_19_cast_fp16 = reshape(shape = var_1443, x = var_1437_cast_fp16)[name = string("attn_output_19_cast_fp16")];
	tensor<int32, [3]> var_1448 = const()[name = string("op_1448"), val = tensor<int32, [3]>([0, 2, 1])];
	string var_1464_pad_type_0 = const()[name = string("op_1464_pad_type_0"), val = string("valid")];
	int32 var_1464_groups_0 = const()[name = string("op_1464_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_1464_strides_0 = const()[name = string("op_1464_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_1464_pad_0 = const()[name = string("op_1464_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_1464_dilations_0 = const()[name = string("op_1464_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1536, 2048, 1]> squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307857536))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309430464))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")];
	tensor<fp16, [1, 2048, 512]> var_1449_cast_fp16 = transpose(perm = var_1448, x = attn_output_19_cast_fp16)[name = string("transpose_91")];
	tensor<fp16, [1, 1536, 512]> var_1464_cast_fp16 = conv(dilations = var_1464_dilations_0, groups = var_1464_groups_0, pad = var_1464_pad_0, pad_type = var_1464_pad_type_0, strides = var_1464_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_1449_cast_fp16)[name = string("op_1464_cast_fp16")];
	tensor<int32, [3]> var_1468 = const()[name = string("op_1468"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_1474 = const()[name = string("op_1474"), val = int32(-1)];
	fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_39_cast_fp16 = transpose(perm = var_1468, x = var_1464_cast_fp16)[name = string("transpose_90")];
	tensor<fp16, [1, 512, 1536]> var_1476_cast_fp16 = mul(x = x_39_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_1476_cast_fp16")];
	bool input_57_interleave_0 = const()[name = string("input_57_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_57_cast_fp16 = concat(axis = var_1474, interleave = input_57_interleave_0, values = (x_39_cast_fp16, var_1476_cast_fp16))[name = string("input_57_cast_fp16")];
	tensor<int32, [1]> normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1471_to_fp16 = const()[name = string("op_1471_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_1471_to_fp16, x = input_57_cast_fp16)[name = string("normed_57_cast_fp16")];
	tensor<int32, [2]> var_1481_split_sizes_0 = const()[name = string("op_1481_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1481_axis_0 = const()[name = string("op_1481_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_1481_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_1481_cast_fp16_1 = split(axis = var_1481_axis_0, split_sizes = var_1481_split_sizes_0, x = normed_57_cast_fp16)[name = string("op_1481_cast_fp16")];
	tensor<fp16, [1536]> layers_2_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309432064)))];
	tensor<fp16, [1, 512, 1536]> attn_output_23_cast_fp16 = mul(x = var_1481_cast_fp16_0, y = layers_2_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_23_cast_fp16")];
	tensor<fp16, [1, 512, 1536]> x_41_cast_fp16 = add(x = x_31_cast_fp16, y = attn_output_23_cast_fp16)[name = string("x_41_cast_fp16")];
	int32 var_1490 = const()[name = string("op_1490"), val = int32(-1)];
	fp16 const_20_promoted_to_fp16 = const()[name = string("const_20_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> var_1492_cast_fp16 = mul(x = x_41_cast_fp16, y = const_20_promoted_to_fp16)[name = string("op_1492_cast_fp16")];
	bool input_59_interleave_0 = const()[name = string("input_59_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_59_cast_fp16 = concat(axis = var_1490, interleave = input_59_interleave_0, values = (x_41_cast_fp16, var_1492_cast_fp16))[name = string("input_59_cast_fp16")];
	tensor<int32, [1]> normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1487_to_fp16 = const()[name = string("op_1487_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_1487_to_fp16, x = input_59_cast_fp16)[name = string("normed_61_cast_fp16")];
	tensor<int32, [2]> var_1497_split_sizes_0 = const()[name = string("op_1497_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1497_axis_0 = const()[name = string("op_1497_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_1497_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_1497_cast_fp16_1 = split(axis = var_1497_axis_0, split_sizes = var_1497_split_sizes_0, x = normed_61_cast_fp16)[name = string("op_1497_cast_fp16")];
	tensor<fp16, [1536]> layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309435200)))];
	tensor<fp16, [1, 512, 1536]> h_15_cast_fp16 = mul(x = var_1497_cast_fp16_0, y = layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_15_cast_fp16")];
	tensor<int32, [3]> var_1508 = const()[name = string("op_1508"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_61_axes_0 = const()[name = string("input_61_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_1509 = transpose(perm = var_1508, x = h_15_cast_fp16)[name = string("transpose_89")];
	tensor<fp16, [1, 1536, 1, 512]> input_61 = expand_dims(axes = input_61_axes_0, x = var_1509)[name = string("input_61")];
	string gate_9_pad_type_0 = const()[name = string("gate_9_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gate_9_strides_0 = const()[name = string("gate_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gate_9_pad_0 = const()[name = string("gate_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gate_9_dilations_0 = const()[name = string("gate_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gate_9_groups_0 = const()[name = string("gate_9_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 512]> gate_9 = conv(dilations = gate_9_dilations_0, groups = gate_9_groups_0, pad = gate_9_pad_0, pad_type = gate_9_pad_type_0, strides = gate_9_strides_0, weight = layers_2_mlp_gate_proj_weight_palettized, x = input_61)[name = string("gate_9")];
	string up_5_pad_type_0 = const()[name = string("up_5_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> up_5_strides_0 = const()[name = string("up_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> up_5_pad_0 = const()[name = string("up_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> up_5_dilations_0 = const()[name = string("up_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 up_5_groups_0 = const()[name = string("up_5_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 512]> up_5 = conv(dilations = up_5_dilations_0, groups = up_5_groups_0, pad = up_5_pad_0, pad_type = up_5_pad_type_0, strides = up_5_strides_0, weight = layers_2_mlp_up_proj_weight_palettized, x = input_61)[name = string("up_5")];
	string gate_11_mode_0 = const()[name = string("gate_11_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 512]> gate_11 = gelu(mode = gate_11_mode_0, x = gate_9)[name = string("gate_11")];
	tensor<fp16, [1, 12288, 1, 512]> input_63 = mul(x = gate_11, y = up_5)[name = string("input_63")];
	string mlp_out_5_pad_type_0 = const()[name = string("mlp_out_5_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_out_5_strides_0 = const()[name = string("mlp_out_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_out_5_pad_0 = const()[name = string("mlp_out_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_out_5_dilations_0 = const()[name = string("mlp_out_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_out_5_groups_0 = const()[name = string("mlp_out_5_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 512]> mlp_out_5 = conv(dilations = mlp_out_5_dilations_0, groups = mlp_out_5_groups_0, pad = mlp_out_5_pad_0, pad_type = mlp_out_5_pad_type_0, strides = mlp_out_5_strides_0, weight = layers_2_mlp_down_proj_weight_palettized, x = input_63)[name = string("mlp_out_5")];
	tensor<int32, [1]> var_1549_axes_0 = const()[name = string("op_1549_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_1549 = squeeze(axes = var_1549_axes_0, x = mlp_out_5)[name = string("op_1549")];
	tensor<int32, [3]> var_1553 = const()[name = string("op_1553"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_1559 = const()[name = string("op_1559"), val = int32(-1)];
	fp16 const_21_promoted = const()[name = string("const_21_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_43 = transpose(perm = var_1553, x = var_1549)[name = string("transpose_88")];
	tensor<fp16, [1, 512, 1536]> var_1561 = mul(x = x_43, y = const_21_promoted)[name = string("op_1561")];
	bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_65 = concat(axis = var_1559, interleave = input_65_interleave_0, values = (x_43, var_1561))[name = string("input_65")];
	tensor<int32, [1]> normed_65_axes_0 = const()[name = string("normed_65_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1556_to_fp16 = const()[name = string("op_1556_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_65_cast_fp16 = layer_norm(axes = normed_65_axes_0, epsilon = var_1556_to_fp16, x = input_65)[name = string("normed_65_cast_fp16")];
	tensor<int32, [2]> var_1566_split_sizes_0 = const()[name = string("op_1566_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1566_axis_0 = const()[name = string("op_1566_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_1566_0, tensor<fp16, [1, 512, 1536]> var_1566_1 = split(axis = var_1566_axis_0, split_sizes = var_1566_split_sizes_0, x = normed_65_cast_fp16)[name = string("op_1566")];
	tensor<fp16, [1, 512, 1536]> hidden_states_mlp_5 = mul(x = var_1566_0, y = layers_2_post_feedforward_layernorm_weight)[name = string("hidden_states_mlp_5")];
	tensor<fp16, [1, 512, 1536]> hidden_states_19_cast_fp16 = add(x = x_41_cast_fp16, y = hidden_states_mlp_5)[name = string("hidden_states_19_cast_fp16")];
	tensor<int32, [3]> per_layer_slice_5_begin_0 = const()[name = string("per_layer_slice_5_begin_0"), val = tensor<int32, [3]>([0, 0, 6912])];
	tensor<int32, [3]> per_layer_slice_5_end_0 = const()[name = string("per_layer_slice_5_end_0"), val = tensor<int32, [3]>([1, 512, 7168])];
	tensor<bool, [3]> per_layer_slice_5_end_mask_0 = const()[name = string("per_layer_slice_5_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 512, 256]> per_layer_slice_5_cast_fp16 = slice_by_index(begin = per_layer_slice_5_begin_0, end = per_layer_slice_5_end_0, end_mask = per_layer_slice_5_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_5_cast_fp16")];
	tensor<int32, [3]> var_1594 = const()[name = string("op_1594"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_67_axes_0 = const()[name = string("input_67_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_1595 = transpose(perm = var_1594, x = hidden_states_19_cast_fp16)[name = string("transpose_87")];
	tensor<fp16, [1, 1536, 1, 512]> input_67 = expand_dims(axes = input_67_axes_0, x = var_1595)[name = string("input_67")];
	string gated_13_pad_type_0 = const()[name = string("gated_13_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_13_strides_0 = const()[name = string("gated_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_13_pad_0 = const()[name = string("gated_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_13_dilations_0 = const()[name = string("gated_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_13_groups_0 = const()[name = string("gated_13_groups_0"), val = int32(1)];
	tensor<fp16, [1, 256, 1, 512]> gated_13 = conv(dilations = gated_13_dilations_0, groups = gated_13_groups_0, pad = gated_13_pad_0, pad_type = gated_13_pad_type_0, strides = gated_13_strides_0, weight = layers_2_per_layer_input_gate_weight_palettized, x = input_67)[name = string("gated_13")];
	string gated_15_mode_0 = const()[name = string("gated_15_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 256, 1, 512]> gated_15 = gelu(mode = gated_15_mode_0, x = gated_13)[name = string("gated_15")];
	tensor<int32, [3]> var_1614 = const()[name = string("op_1614"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> per_layer_slice_conv_5_axes_0 = const()[name = string("per_layer_slice_conv_5_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 256, 512]> var_1615_cast_fp16 = transpose(perm = var_1614, x = per_layer_slice_5_cast_fp16)[name = string("transpose_86")];
	tensor<fp16, [1, 256, 1, 512]> per_layer_slice_conv_5_cast_fp16 = expand_dims(axes = per_layer_slice_conv_5_axes_0, x = var_1615_cast_fp16)[name = string("per_layer_slice_conv_5_cast_fp16")];
	tensor<fp16, [1, 256, 1, 512]> input_69_cast_fp16 = mul(x = gated_15, y = per_layer_slice_conv_5_cast_fp16)[name = string("input_69_cast_fp16")];
	string gated_17_pad_type_0 = const()[name = string("gated_17_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_17_strides_0 = const()[name = string("gated_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_17_pad_0 = const()[name = string("gated_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_17_dilations_0 = const()[name = string("gated_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_17_groups_0 = const()[name = string("gated_17_groups_0"), val = int32(1)];
	tensor<fp16, [1536, 256, 1, 1]> layers_2_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309438336))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309635008))))[name = string("layers_2_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1536, 1, 512]> gated_17_cast_fp16 = conv(dilations = gated_17_dilations_0, groups = gated_17_groups_0, pad = gated_17_pad_0, pad_type = gated_17_pad_type_0, strides = gated_17_strides_0, weight = layers_2_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_69_cast_fp16)[name = string("gated_17_cast_fp16")];
	tensor<int32, [1]> var_1631_axes_0 = const()[name = string("op_1631_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_1631_cast_fp16 = squeeze(axes = var_1631_axes_0, x = gated_17_cast_fp16)[name = string("op_1631_cast_fp16")];
	tensor<int32, [3]> var_1635 = const()[name = string("op_1635"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_1641 = const()[name = string("op_1641"), val = int32(-1)];
	fp16 const_22_promoted_to_fp16 = const()[name = string("const_22_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_45_cast_fp16 = transpose(perm = var_1635, x = var_1631_cast_fp16)[name = string("transpose_85")];
	tensor<fp16, [1, 512, 1536]> var_1643_cast_fp16 = mul(x = x_45_cast_fp16, y = const_22_promoted_to_fp16)[name = string("op_1643_cast_fp16")];
	bool input_71_interleave_0 = const()[name = string("input_71_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_71_cast_fp16 = concat(axis = var_1641, interleave = input_71_interleave_0, values = (x_45_cast_fp16, var_1643_cast_fp16))[name = string("input_71_cast_fp16")];
	tensor<int32, [1]> normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1638_to_fp16 = const()[name = string("op_1638_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_1638_to_fp16, x = input_71_cast_fp16)[name = string("normed_69_cast_fp16")];
	tensor<int32, [2]> var_1648_split_sizes_0 = const()[name = string("op_1648_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1648_axis_0 = const()[name = string("op_1648_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_1648_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_1648_cast_fp16_1 = split(axis = var_1648_axis_0, split_sizes = var_1648_split_sizes_0, x = normed_69_cast_fp16)[name = string("op_1648_cast_fp16")];
	tensor<fp16, [1536]> layers_2_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_2_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309636608)))];
	tensor<fp16, [1, 512, 1536]> hidden_states_23_cast_fp16 = mul(x = var_1648_cast_fp16_0, y = layers_2_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_23_cast_fp16")];
	tensor<fp16, [1, 512, 1536]> hidden_states_25_cast_fp16 = add(x = hidden_states_19_cast_fp16, y = hidden_states_23_cast_fp16)[name = string("hidden_states_25_cast_fp16")];
	tensor<fp16, [1]> const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.a4p-1])];
	tensor<fp16, [1, 512, 1536]> x_47_cast_fp16 = mul(x = hidden_states_25_cast_fp16, y = const_23_promoted_to_fp16)[name = string("x_47_cast_fp16")];
	int32 var_1663 = const()[name = string("op_1663"), val = int32(-1)];
	fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> var_1665_cast_fp16 = mul(x = x_47_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_1665_cast_fp16")];
	bool input_73_interleave_0 = const()[name = string("input_73_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_73_cast_fp16 = concat(axis = var_1663, interleave = input_73_interleave_0, values = (x_47_cast_fp16, var_1665_cast_fp16))[name = string("input_73_cast_fp16")];
	tensor<int32, [1]> normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1660_to_fp16 = const()[name = string("op_1660_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_1660_to_fp16, x = input_73_cast_fp16)[name = string("normed_73_cast_fp16")];
	tensor<int32, [2]> var_1670_split_sizes_0 = const()[name = string("op_1670_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1670_axis_0 = const()[name = string("op_1670_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_1670_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_1670_cast_fp16_1 = split(axis = var_1670_axis_0, split_sizes = var_1670_split_sizes_0, x = normed_73_cast_fp16)[name = string("op_1670_cast_fp16")];
	tensor<fp16, [1536]> layers_3_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309639744)))];
	tensor<fp16, [1, 512, 1536]> h_19_cast_fp16 = mul(x = var_1670_cast_fp16_0, y = layers_3_input_layernorm_weight_promoted_to_fp16)[name = string("h_19_cast_fp16")];
	tensor<int32, [3]> var_1676 = const()[name = string("op_1676"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_1679_axes_0 = const()[name = string("op_1679_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_1677_cast_fp16 = transpose(perm = var_1676, x = h_19_cast_fp16)[name = string("transpose_84")];
	tensor<fp16, [1, 1536, 1, 512]> var_1679_cast_fp16 = expand_dims(axes = var_1679_axes_0, x = var_1677_cast_fp16)[name = string("op_1679_cast_fp16")];
	string q_raw_7_pad_type_0 = const()[name = string("q_raw_7_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> q_raw_7_strides_0 = const()[name = string("q_raw_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> q_raw_7_pad_0 = const()[name = string("q_raw_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> q_raw_7_dilations_0 = const()[name = string("q_raw_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 q_raw_7_groups_0 = const()[name = string("q_raw_7_groups_0"), val = int32(1)];
	tensor<fp16, [1, 2048, 1, 512]> q_raw_7 = conv(dilations = q_raw_7_dilations_0, groups = q_raw_7_groups_0, pad = q_raw_7_pad_0, pad_type = q_raw_7_pad_type_0, strides = q_raw_7_strides_0, weight = layers_3_self_attn_q_proj_weight_palettized, x = var_1679_cast_fp16)[name = string("q_raw_7")];
	tensor<int32, [4]> var_1700 = const()[name = string("op_1700"), val = tensor<int32, [4]>([1, 8, 256, 512])];
	tensor<fp16, [1, 8, 256, 512]> var_1701 = reshape(shape = var_1700, x = q_raw_7)[name = string("op_1701")];
	tensor<int32, [4]> transpose_46_perm_0 = const()[name = string("transpose_46_perm_0"), val = tensor<int32, [4]>([0, 3, 1, 2])];
	tensor<int32, [3]> var_1724 = const()[name = string("op_1724"), val = tensor<int32, [3]>([512, 8, 256])];
	tensor<fp16, [1, 512, 8, 256]> transpose_46 = transpose(perm = transpose_46_perm_0, x = var_1701)[name = string("transpose_83")];
	tensor<fp16, [512, 8, 256]> x_49 = reshape(shape = var_1724, x = transpose_46)[name = string("x_49")];
	int32 var_1730 = const()[name = string("op_1730"), val = int32(-1)];
	fp16 const_25_promoted = const()[name = string("const_25_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [512, 8, 256]> var_1732 = mul(x = x_49, y = const_25_promoted)[name = string("op_1732")];
	bool input_77_interleave_0 = const()[name = string("input_77_interleave_0"), val = bool(false)];
	tensor<fp16, [512, 8, 512]> input_77 = concat(axis = var_1730, interleave = input_77_interleave_0, values = (x_49, var_1732))[name = string("input_77")];
	tensor<int32, [1]> normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1727_to_fp16 = const()[name = string("op_1727_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [512, 8, 512]> normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_1727_to_fp16, x = input_77)[name = string("normed_77_cast_fp16")];
	tensor<int32, [2]> var_1737_split_sizes_0 = const()[name = string("op_1737_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_1737_axis_0 = const()[name = string("op_1737_axis_0"), val = int32(-1)];
	tensor<fp16, [512, 8, 256]> var_1737_0, tensor<fp16, [512, 8, 256]> var_1737_1 = split(axis = var_1737_axis_0, split_sizes = var_1737_split_sizes_0, x = normed_77_cast_fp16)[name = string("op_1737")];
	tensor<fp16, [512, 8, 256]> q_27 = mul(x = var_1737_0, y = layers_0_self_attn_q_norm_weight)[name = string("q_27")];
	tensor<int32, [4]> var_1744 = const()[name = string("op_1744"), val = tensor<int32, [4]>([1, 512, 8, 256])];
	tensor<fp16, [1, 512, 8, 256]> var_1745 = reshape(shape = var_1744, x = q_27)[name = string("op_1745")];
	tensor<int32, [4]> var_1750 = const()[name = string("op_1750"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<fp16, [1, 8, 512, 256]> q_29 = transpose(perm = var_1750, x = var_1745)[name = string("transpose_82")];
	tensor<fp16, [1, 8, 512, 256]> var_1752_cast_fp16 = mul(x = q_29, y = cos_s)[name = string("op_1752_cast_fp16")];
	tensor<int32, [2]> var_1753_split_sizes_0 = const()[name = string("op_1753_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_1753_axis_0 = const()[name = string("op_1753_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 512, 128]> var_1753_0, tensor<fp16, [1, 8, 512, 128]> var_1753_1 = split(axis = var_1753_axis_0, split_sizes = var_1753_split_sizes_0, x = q_29)[name = string("op_1753")];
	fp16 const_26_promoted = const()[name = string("const_26_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 512, 128]> var_1755 = mul(x = var_1753_1, y = const_26_promoted)[name = string("op_1755")];
	int32 var_1757 = const()[name = string("op_1757"), val = int32(-1)];
	bool var_1758_interleave_0 = const()[name = string("op_1758_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 256]> var_1758 = concat(axis = var_1757, interleave = var_1758_interleave_0, values = (var_1755, var_1753_0))[name = string("op_1758")];
	tensor<fp16, [1, 8, 512, 256]> var_1759_cast_fp16 = mul(x = var_1758, y = sin_s)[name = string("op_1759_cast_fp16")];
	tensor<fp16, [1, 8, 512, 256]> q_31_cast_fp16 = add(x = var_1752_cast_fp16, y = var_1759_cast_fp16)[name = string("q_31_cast_fp16")];
	bool attn_weights_13_transpose_x_0 = const()[name = string("attn_weights_13_transpose_x_0"), val = bool(false)];
	bool attn_weights_13_transpose_y_0 = const()[name = string("attn_weights_13_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 512]> attn_weights_13_cast_fp16 = matmul(transpose_x = attn_weights_13_transpose_x_0, transpose_y = attn_weights_13_transpose_y_0, x = q_31_cast_fp16, y = transpose_41_cast_fp16)[name = string("attn_weights_13_cast_fp16")];
	tensor<fp16, [1, 8, 512, 512]> x_51_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("x_51_cast_fp16")];
	tensor<int32, [1]> reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor<int32, [1]>([-1])];
	bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 512, 1]> reduce_max_3 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_51_cast_fp16)[name = string("reduce_max_3")];
	tensor<fp16, [1, 8, 512, 512]> var_1791 = sub(x = x_51_cast_fp16, y = reduce_max_3)[name = string("op_1791")];
	tensor<fp16, [1, 8, 512, 512]> var_1797 = exp(x = var_1791)[name = string("op_1797")];
	tensor<int32, [1]> var_1807_axes_0 = const()[name = string("op_1807_axes_0"), val = tensor<int32, [1]>([-1])];
	bool var_1807_keep_dims_0 = const()[name = string("op_1807_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 512, 1]> var_1807 = reduce_sum(axes = var_1807_axes_0, keep_dims = var_1807_keep_dims_0, x = var_1797)[name = string("op_1807")];
	tensor<fp16, [1, 8, 512, 512]> var_1813_cast_fp16 = real_div(x = var_1797, y = var_1807)[name = string("op_1813_cast_fp16")];
	bool attn_output_25_transpose_x_0 = const()[name = string("attn_output_25_transpose_x_0"), val = bool(false)];
	bool attn_output_25_transpose_y_0 = const()[name = string("attn_output_25_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 256]> attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = var_1813_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_25_cast_fp16")];
	tensor<int32, [4]> var_1824 = const()[name = string("op_1824"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_1831 = const()[name = string("op_1831"), val = tensor<int32, [3]>([1, 512, 2048])];
	tensor<fp16, [1, 512, 8, 256]> var_1825_cast_fp16 = transpose(perm = var_1824, x = attn_output_25_cast_fp16)[name = string("transpose_81")];
	tensor<fp16, [1, 512, 2048]> attn_output_27_cast_fp16 = reshape(shape = var_1831, x = var_1825_cast_fp16)[name = string("attn_output_27_cast_fp16")];
	tensor<int32, [3]> var_1836 = const()[name = string("op_1836"), val = tensor<int32, [3]>([0, 2, 1])];
	string var_1852_pad_type_0 = const()[name = string("op_1852_pad_type_0"), val = string("valid")];
	int32 var_1852_groups_0 = const()[name = string("op_1852_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_1852_strides_0 = const()[name = string("op_1852_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_1852_pad_0 = const()[name = string("op_1852_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_1852_dilations_0 = const()[name = string("op_1852_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1536, 2048, 1]> squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309642880))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311215808))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")];
	tensor<fp16, [1, 2048, 512]> var_1837_cast_fp16 = transpose(perm = var_1836, x = attn_output_27_cast_fp16)[name = string("transpose_80")];
	tensor<fp16, [1, 1536, 512]> var_1852_cast_fp16 = conv(dilations = var_1852_dilations_0, groups = var_1852_groups_0, pad = var_1852_pad_0, pad_type = var_1852_pad_type_0, strides = var_1852_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_1837_cast_fp16)[name = string("op_1852_cast_fp16")];
	tensor<int32, [3]> var_1856 = const()[name = string("op_1856"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_1862 = const()[name = string("op_1862"), val = int32(-1)];
	fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_55_cast_fp16 = transpose(perm = var_1856, x = var_1852_cast_fp16)[name = string("transpose_79")];
	tensor<fp16, [1, 512, 1536]> var_1864_cast_fp16 = mul(x = x_55_cast_fp16, y = const_27_promoted_to_fp16)[name = string("op_1864_cast_fp16")];
	bool input_81_interleave_0 = const()[name = string("input_81_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_81_cast_fp16 = concat(axis = var_1862, interleave = input_81_interleave_0, values = (x_55_cast_fp16, var_1864_cast_fp16))[name = string("input_81_cast_fp16")];
	tensor<int32, [1]> normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1859_to_fp16 = const()[name = string("op_1859_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_1859_to_fp16, x = input_81_cast_fp16)[name = string("normed_81_cast_fp16")];
	tensor<int32, [2]> var_1869_split_sizes_0 = const()[name = string("op_1869_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1869_axis_0 = const()[name = string("op_1869_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_1869_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_1869_cast_fp16_1 = split(axis = var_1869_axis_0, split_sizes = var_1869_split_sizes_0, x = normed_81_cast_fp16)[name = string("op_1869_cast_fp16")];
	tensor<fp16, [1536]> layers_3_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311217408)))];
	tensor<fp16, [1, 512, 1536]> attn_output_31_cast_fp16 = mul(x = var_1869_cast_fp16_0, y = layers_3_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_31_cast_fp16")];
	tensor<fp16, [1, 512, 1536]> x_57_cast_fp16 = add(x = x_47_cast_fp16, y = attn_output_31_cast_fp16)[name = string("x_57_cast_fp16")];
	int32 var_1878 = const()[name = string("op_1878"), val = int32(-1)];
	fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> var_1880_cast_fp16 = mul(x = x_57_cast_fp16, y = const_28_promoted_to_fp16)[name = string("op_1880_cast_fp16")];
	bool input_83_interleave_0 = const()[name = string("input_83_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_83_cast_fp16 = concat(axis = var_1878, interleave = input_83_interleave_0, values = (x_57_cast_fp16, var_1880_cast_fp16))[name = string("input_83_cast_fp16")];
	tensor<int32, [1]> normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1875_to_fp16 = const()[name = string("op_1875_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_1875_to_fp16, x = input_83_cast_fp16)[name = string("normed_85_cast_fp16")];
	tensor<int32, [2]> var_1885_split_sizes_0 = const()[name = string("op_1885_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1885_axis_0 = const()[name = string("op_1885_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_1885_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_1885_cast_fp16_1 = split(axis = var_1885_axis_0, split_sizes = var_1885_split_sizes_0, x = normed_85_cast_fp16)[name = string("op_1885_cast_fp16")];
	tensor<fp16, [1536]> layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311220544)))];
	tensor<fp16, [1, 512, 1536]> h_21_cast_fp16 = mul(x = var_1885_cast_fp16_0, y = layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_21_cast_fp16")];
	tensor<int32, [3]> var_1896 = const()[name = string("op_1896"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_85_axes_0 = const()[name = string("input_85_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_1897 = transpose(perm = var_1896, x = h_21_cast_fp16)[name = string("transpose_78")];
	tensor<fp16, [1, 1536, 1, 512]> input_85 = expand_dims(axes = input_85_axes_0, x = var_1897)[name = string("input_85")];
	string gate_13_pad_type_0 = const()[name = string("gate_13_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gate_13_strides_0 = const()[name = string("gate_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gate_13_pad_0 = const()[name = string("gate_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gate_13_dilations_0 = const()[name = string("gate_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gate_13_groups_0 = const()[name = string("gate_13_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 512]> gate_13 = conv(dilations = gate_13_dilations_0, groups = gate_13_groups_0, pad = gate_13_pad_0, pad_type = gate_13_pad_type_0, strides = gate_13_strides_0, weight = layers_3_mlp_gate_proj_weight_palettized, x = input_85)[name = string("gate_13")];
	string up_7_pad_type_0 = const()[name = string("up_7_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> up_7_strides_0 = const()[name = string("up_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> up_7_pad_0 = const()[name = string("up_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> up_7_dilations_0 = const()[name = string("up_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 up_7_groups_0 = const()[name = string("up_7_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 512]> up_7 = conv(dilations = up_7_dilations_0, groups = up_7_groups_0, pad = up_7_pad_0, pad_type = up_7_pad_type_0, strides = up_7_strides_0, weight = layers_3_mlp_up_proj_weight_palettized, x = input_85)[name = string("up_7")];
	string gate_15_mode_0 = const()[name = string("gate_15_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 512]> gate_15 = gelu(mode = gate_15_mode_0, x = gate_13)[name = string("gate_15")];
	tensor<fp16, [1, 12288, 1, 512]> input_87 = mul(x = gate_15, y = up_7)[name = string("input_87")];
	string mlp_out_7_pad_type_0 = const()[name = string("mlp_out_7_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_out_7_strides_0 = const()[name = string("mlp_out_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_out_7_pad_0 = const()[name = string("mlp_out_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_out_7_dilations_0 = const()[name = string("mlp_out_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_out_7_groups_0 = const()[name = string("mlp_out_7_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 512]> mlp_out_7 = conv(dilations = mlp_out_7_dilations_0, groups = mlp_out_7_groups_0, pad = mlp_out_7_pad_0, pad_type = mlp_out_7_pad_type_0, strides = mlp_out_7_strides_0, weight = layers_3_mlp_down_proj_weight_palettized, x = input_87)[name = string("mlp_out_7")];
	tensor<int32, [1]> var_1937_axes_0 = const()[name = string("op_1937_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_1937 = squeeze(axes = var_1937_axes_0, x = mlp_out_7)[name = string("op_1937")];
	tensor<int32, [3]> var_1941 = const()[name = string("op_1941"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_1947 = const()[name = string("op_1947"), val = int32(-1)];
	fp16 const_29_promoted = const()[name = string("const_29_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_59 = transpose(perm = var_1941, x = var_1937)[name = string("transpose_77")];
	tensor<fp16, [1, 512, 1536]> var_1949 = mul(x = x_59, y = const_29_promoted)[name = string("op_1949")];
	bool input_89_interleave_0 = const()[name = string("input_89_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_89 = concat(axis = var_1947, interleave = input_89_interleave_0, values = (x_59, var_1949))[name = string("input_89")];
	tensor<int32, [1]> normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1944_to_fp16 = const()[name = string("op_1944_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_1944_to_fp16, x = input_89)[name = string("normed_89_cast_fp16")];
	tensor<int32, [2]> var_1954_split_sizes_0 = const()[name = string("op_1954_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1954_axis_0 = const()[name = string("op_1954_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_1954_0, tensor<fp16, [1, 512, 1536]> var_1954_1 = split(axis = var_1954_axis_0, split_sizes = var_1954_split_sizes_0, x = normed_89_cast_fp16)[name = string("op_1954")];
	tensor<fp16, [1, 512, 1536]> hidden_states_mlp_7 = mul(x = var_1954_0, y = layers_3_post_feedforward_layernorm_weight)[name = string("hidden_states_mlp_7")];
	tensor<fp16, [1, 512, 1536]> hidden_states_27_cast_fp16 = add(x = x_57_cast_fp16, y = hidden_states_mlp_7)[name = string("hidden_states_27_cast_fp16")];
	tensor<int32, [3]> per_layer_slice_7_begin_0 = const()[name = string("per_layer_slice_7_begin_0"), val = tensor<int32, [3]>([0, 0, 7168])];
	tensor<int32, [3]> per_layer_slice_7_end_0 = const()[name = string("per_layer_slice_7_end_0"), val = tensor<int32, [3]>([1, 512, 7424])];
	tensor<bool, [3]> per_layer_slice_7_end_mask_0 = const()[name = string("per_layer_slice_7_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 512, 256]> per_layer_slice_7_cast_fp16 = slice_by_index(begin = per_layer_slice_7_begin_0, end = per_layer_slice_7_end_0, end_mask = per_layer_slice_7_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_7_cast_fp16")];
	tensor<int32, [3]> var_1982 = const()[name = string("op_1982"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_91_axes_0 = const()[name = string("input_91_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_1983 = transpose(perm = var_1982, x = hidden_states_27_cast_fp16)[name = string("transpose_76")];
	tensor<fp16, [1, 1536, 1, 512]> input_91 = expand_dims(axes = input_91_axes_0, x = var_1983)[name = string("input_91")];
	string gated_19_pad_type_0 = const()[name = string("gated_19_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_19_strides_0 = const()[name = string("gated_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_19_pad_0 = const()[name = string("gated_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_19_dilations_0 = const()[name = string("gated_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_19_groups_0 = const()[name = string("gated_19_groups_0"), val = int32(1)];
	tensor<fp16, [1, 256, 1, 512]> gated_19 = conv(dilations = gated_19_dilations_0, groups = gated_19_groups_0, pad = gated_19_pad_0, pad_type = gated_19_pad_type_0, strides = gated_19_strides_0, weight = layers_3_per_layer_input_gate_weight_palettized, x = input_91)[name = string("gated_19")];
	string gated_21_mode_0 = const()[name = string("gated_21_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 256, 1, 512]> gated_21 = gelu(mode = gated_21_mode_0, x = gated_19)[name = string("gated_21")];
	tensor<int32, [3]> var_2002 = const()[name = string("op_2002"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> per_layer_slice_conv_7_axes_0 = const()[name = string("per_layer_slice_conv_7_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 256, 512]> var_2003_cast_fp16 = transpose(perm = var_2002, x = per_layer_slice_7_cast_fp16)[name = string("transpose_75")];
	tensor<fp16, [1, 256, 1, 512]> per_layer_slice_conv_7_cast_fp16 = expand_dims(axes = per_layer_slice_conv_7_axes_0, x = var_2003_cast_fp16)[name = string("per_layer_slice_conv_7_cast_fp16")];
	tensor<fp16, [1, 256, 1, 512]> input_93_cast_fp16 = mul(x = gated_21, y = per_layer_slice_conv_7_cast_fp16)[name = string("input_93_cast_fp16")];
	string gated_23_pad_type_0 = const()[name = string("gated_23_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_23_strides_0 = const()[name = string("gated_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_23_pad_0 = const()[name = string("gated_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_23_dilations_0 = const()[name = string("gated_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_23_groups_0 = const()[name = string("gated_23_groups_0"), val = int32(1)];
	tensor<fp16, [1536, 256, 1, 1]> layers_3_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311223680))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311420352))))[name = string("layers_3_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1536, 1, 512]> gated_23_cast_fp16 = conv(dilations = gated_23_dilations_0, groups = gated_23_groups_0, pad = gated_23_pad_0, pad_type = gated_23_pad_type_0, strides = gated_23_strides_0, weight = layers_3_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_93_cast_fp16)[name = string("gated_23_cast_fp16")];
	tensor<int32, [1]> var_2019_axes_0 = const()[name = string("op_2019_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_2019_cast_fp16 = squeeze(axes = var_2019_axes_0, x = gated_23_cast_fp16)[name = string("op_2019_cast_fp16")];
	tensor<int32, [3]> var_2023 = const()[name = string("op_2023"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_2029 = const()[name = string("op_2029"), val = int32(-1)];
	fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_61_cast_fp16 = transpose(perm = var_2023, x = var_2019_cast_fp16)[name = string("transpose_74")];
	tensor<fp16, [1, 512, 1536]> var_2031_cast_fp16 = mul(x = x_61_cast_fp16, y = const_30_promoted_to_fp16)[name = string("op_2031_cast_fp16")];
	bool input_95_interleave_0 = const()[name = string("input_95_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_95_cast_fp16 = concat(axis = var_2029, interleave = input_95_interleave_0, values = (x_61_cast_fp16, var_2031_cast_fp16))[name = string("input_95_cast_fp16")];
	tensor<int32, [1]> normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2026_to_fp16 = const()[name = string("op_2026_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_2026_to_fp16, x = input_95_cast_fp16)[name = string("normed_93_cast_fp16")];
	tensor<int32, [2]> var_2036_split_sizes_0 = const()[name = string("op_2036_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2036_axis_0 = const()[name = string("op_2036_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_2036_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_2036_cast_fp16_1 = split(axis = var_2036_axis_0, split_sizes = var_2036_split_sizes_0, x = normed_93_cast_fp16)[name = string("op_2036_cast_fp16")];
	tensor<fp16, [1536]> layers_3_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_3_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311421952)))];
	tensor<fp16, [1, 512, 1536]> hidden_states_31_cast_fp16 = mul(x = var_2036_cast_fp16_0, y = layers_3_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_31_cast_fp16")];
	tensor<fp16, [1, 512, 1536]> hidden_states_33_cast_fp16 = add(x = hidden_states_27_cast_fp16, y = hidden_states_31_cast_fp16)[name = string("hidden_states_33_cast_fp16")];
	tensor<fp16, [1]> const_31_promoted_to_fp16 = const()[name = string("const_31_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.a4p-1])];
	tensor<fp16, [1, 512, 1536]> x_63_cast_fp16 = mul(x = hidden_states_33_cast_fp16, y = const_31_promoted_to_fp16)[name = string("x_63_cast_fp16")];
	int32 var_2051 = const()[name = string("op_2051"), val = int32(-1)];
	fp16 const_32_promoted_to_fp16 = const()[name = string("const_32_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> var_2053_cast_fp16 = mul(x = x_63_cast_fp16, y = const_32_promoted_to_fp16)[name = string("op_2053_cast_fp16")];
	bool input_97_interleave_0 = const()[name = string("input_97_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_97_cast_fp16 = concat(axis = var_2051, interleave = input_97_interleave_0, values = (x_63_cast_fp16, var_2053_cast_fp16))[name = string("input_97_cast_fp16")];
	tensor<int32, [1]> normed_97_axes_0 = const()[name = string("normed_97_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2048_to_fp16 = const()[name = string("op_2048_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_97_cast_fp16 = layer_norm(axes = normed_97_axes_0, epsilon = var_2048_to_fp16, x = input_97_cast_fp16)[name = string("normed_97_cast_fp16")];
	tensor<int32, [2]> var_2058_split_sizes_0 = const()[name = string("op_2058_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2058_axis_0 = const()[name = string("op_2058_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_2058_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_2058_cast_fp16_1 = split(axis = var_2058_axis_0, split_sizes = var_2058_split_sizes_0, x = normed_97_cast_fp16)[name = string("op_2058_cast_fp16")];
	tensor<fp16, [1536]> layers_4_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311425088)))];
	tensor<fp16, [1, 512, 1536]> h_25_cast_fp16 = mul(x = var_2058_cast_fp16_0, y = layers_4_input_layernorm_weight_promoted_to_fp16)[name = string("h_25_cast_fp16")];
	tensor<int32, [3]> var_2064 = const()[name = string("op_2064"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_2067_axes_0 = const()[name = string("op_2067_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_2065_cast_fp16 = transpose(perm = var_2064, x = h_25_cast_fp16)[name = string("transpose_73")];
	tensor<fp16, [1, 1536, 1, 512]> var_2067_cast_fp16 = expand_dims(axes = var_2067_axes_0, x = var_2065_cast_fp16)[name = string("op_2067_cast_fp16")];
	string q_raw_9_pad_type_0 = const()[name = string("q_raw_9_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> q_raw_9_strides_0 = const()[name = string("q_raw_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> q_raw_9_pad_0 = const()[name = string("q_raw_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> q_raw_9_dilations_0 = const()[name = string("q_raw_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 q_raw_9_groups_0 = const()[name = string("q_raw_9_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4096, 1, 512]> q_raw_9 = conv(dilations = q_raw_9_dilations_0, groups = q_raw_9_groups_0, pad = q_raw_9_pad_0, pad_type = q_raw_9_pad_type_0, strides = q_raw_9_strides_0, weight = layers_4_self_attn_q_proj_weight_palettized, x = var_2067_cast_fp16)[name = string("q_raw_9")];
	tensor<int32, [4]> var_2088 = const()[name = string("op_2088"), val = tensor<int32, [4]>([1, 8, 512, 512])];
	tensor<fp16, [1, 8, 512, 512]> var_2089 = reshape(shape = var_2088, x = q_raw_9)[name = string("op_2089")];
	tensor<int32, [4]> transpose_48_perm_0 = const()[name = string("transpose_48_perm_0"), val = tensor<int32, [4]>([0, 3, 1, 2])];
	tensor<int32, [3]> var_2112 = const()[name = string("op_2112"), val = tensor<int32, [3]>([512, 8, 512])];
	tensor<fp16, [1, 512, 8, 512]> transpose_48 = transpose(perm = transpose_48_perm_0, x = var_2089)[name = string("transpose_72")];
	tensor<fp16, [512, 8, 512]> x_65 = reshape(shape = var_2112, x = transpose_48)[name = string("x_65")];
	int32 var_2118 = const()[name = string("op_2118"), val = int32(-1)];
	fp16 const_33_promoted = const()[name = string("const_33_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [512, 8, 512]> var_2120 = mul(x = x_65, y = const_33_promoted)[name = string("op_2120")];
	bool input_101_interleave_0 = const()[name = string("input_101_interleave_0"), val = bool(false)];
	tensor<fp16, [512, 8, 1024]> input_101 = concat(axis = var_2118, interleave = input_101_interleave_0, values = (x_65, var_2120))[name = string("input_101")];
	tensor<int32, [1]> normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2115_to_fp16 = const()[name = string("op_2115_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [512, 8, 1024]> normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_2115_to_fp16, x = input_101)[name = string("normed_101_cast_fp16")];
	tensor<int32, [2]> var_2125_split_sizes_0 = const()[name = string("op_2125_split_sizes_0"), val = tensor<int32, [2]>([512, 512])];
	int32 var_2125_axis_0 = const()[name = string("op_2125_axis_0"), val = int32(-1)];
	tensor<fp16, [512, 8, 512]> var_2125_0, tensor<fp16, [512, 8, 512]> var_2125_1 = split(axis = var_2125_axis_0, split_sizes = var_2125_split_sizes_0, x = normed_101_cast_fp16)[name = string("op_2125")];
	tensor<fp16, [512, 8, 512]> q_35 = mul(x = var_2125_0, y = layers_4_self_attn_q_norm_weight)[name = string("q_35")];
	tensor<int32, [4]> var_2132 = const()[name = string("op_2132"), val = tensor<int32, [4]>([1, 512, 8, 512])];
	tensor<fp16, [1, 512, 8, 512]> var_2133 = reshape(shape = var_2132, x = q_35)[name = string("op_2133")];
	tensor<int32, [4]> var_2138 = const()[name = string("op_2138"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<fp16, [1, 8, 512, 512]> q_37 = transpose(perm = var_2138, x = var_2133)[name = string("transpose_71")];
	tensor<fp16, [1, 8, 512, 512]> var_2140_cast_fp16 = mul(x = q_37, y = cos_f)[name = string("op_2140_cast_fp16")];
	tensor<int32, [2]> var_2141_split_sizes_0 = const()[name = string("op_2141_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_2141_axis_0 = const()[name = string("op_2141_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 512, 256]> var_2141_0, tensor<fp16, [1, 8, 512, 256]> var_2141_1 = split(axis = var_2141_axis_0, split_sizes = var_2141_split_sizes_0, x = q_37)[name = string("op_2141")];
	fp16 const_34_promoted = const()[name = string("const_34_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 512, 256]> var_2143 = mul(x = var_2141_1, y = const_34_promoted)[name = string("op_2143")];
	int32 var_2145 = const()[name = string("op_2145"), val = int32(-1)];
	bool var_2146_interleave_0 = const()[name = string("op_2146_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 512]> var_2146 = concat(axis = var_2145, interleave = var_2146_interleave_0, values = (var_2143, var_2141_0))[name = string("op_2146")];
	tensor<fp16, [1, 8, 512, 512]> var_2147_cast_fp16 = mul(x = var_2146, y = sin_f)[name = string("op_2147_cast_fp16")];
	tensor<fp16, [1, 8, 512, 512]> q_39_cast_fp16 = add(x = var_2140_cast_fp16, y = var_2147_cast_fp16)[name = string("q_39_cast_fp16")];
	tensor<int32, [4]> transpose_16_perm_0 = const()[name = string("transpose_16_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	tensor<int32, [4]> tile_8_reps_0 = const()[name = string("tile_8_reps_0"), val = tensor<int32, [4]>([8, 1, 1, 1])];
	tensor<fp16, [1, 1, 512, 512]> transpose_16_cast_fp16 = transpose(perm = transpose_16_perm_0, x = kv14_k)[name = string("transpose_70")];
	tensor<fp16, [8, 1, 512, 512]> tile_8_cast_fp16 = tile(reps = tile_8_reps_0, x = transpose_16_cast_fp16)[name = string("tile_8_cast_fp16")];
	tensor<int32, [5]> concat_16 = const()[name = string("concat_16"), val = tensor<int32, [5]>([8, 1, 1, 512, 512])];
	tensor<fp16, [8, 1, 1, 512, 512]> reshape_16_cast_fp16 = reshape(shape = concat_16, x = tile_8_cast_fp16)[name = string("reshape_16_cast_fp16")];
	tensor<int32, [5]> transpose_17_perm_0 = const()[name = string("transpose_17_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])];
	tensor<int32, [4]> concat_17 = const()[name = string("concat_17"), val = tensor<int32, [4]>([-1, 1, 512, 512])];
	tensor<fp16, [1, 8, 1, 512, 512]> transpose_17_cast_fp16 = transpose(perm = transpose_17_perm_0, x = reshape_16_cast_fp16)[name = string("transpose_69")];
	tensor<fp16, [8, 1, 512, 512]> reshape_17_cast_fp16 = reshape(shape = concat_17, x = transpose_17_cast_fp16)[name = string("reshape_17_cast_fp16")];
	tensor<int32, [4]> transpose_49_perm_0 = const()[name = string("transpose_49_perm_0"), val = tensor<int32, [4]>([1, 0, -1, -2])];
	tensor<int32, [4]> transpose_18_perm_0 = const()[name = string("transpose_18_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	tensor<int32, [4]> tile_9_reps_0 = const()[name = string("tile_9_reps_0"), val = tensor<int32, [4]>([8, 1, 1, 1])];
	tensor<fp16, [1, 1, 512, 512]> transpose_18_cast_fp16 = transpose(perm = transpose_18_perm_0, x = kv14_v)[name = string("transpose_68")];
	tensor<fp16, [8, 1, 512, 512]> tile_9_cast_fp16 = tile(reps = tile_9_reps_0, x = transpose_18_cast_fp16)[name = string("tile_9_cast_fp16")];
	tensor<int32, [5]> concat_18 = const()[name = string("concat_18"), val = tensor<int32, [5]>([8, 1, 1, 512, 512])];
	tensor<fp16, [8, 1, 1, 512, 512]> reshape_18_cast_fp16 = reshape(shape = concat_18, x = tile_9_cast_fp16)[name = string("reshape_18_cast_fp16")];
	tensor<int32, [5]> transpose_19_perm_0 = const()[name = string("transpose_19_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])];
	tensor<int32, [4]> concat_19 = const()[name = string("concat_19"), val = tensor<int32, [4]>([-1, 1, 512, 512])];
	tensor<fp16, [1, 8, 1, 512, 512]> transpose_19_cast_fp16 = transpose(perm = transpose_19_perm_0, x = reshape_18_cast_fp16)[name = string("transpose_67")];
	tensor<fp16, [8, 1, 512, 512]> reshape_19_cast_fp16 = reshape(shape = concat_19, x = transpose_19_cast_fp16)[name = string("reshape_19_cast_fp16")];
	tensor<int32, [4]> V_expanded_9_perm_0 = const()[name = string("V_expanded_9_perm_0"), val = tensor<int32, [4]>([1, 0, -2, -1])];
	bool attn_weights_17_transpose_x_0 = const()[name = string("attn_weights_17_transpose_x_0"), val = bool(false)];
	bool attn_weights_17_transpose_y_0 = const()[name = string("attn_weights_17_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 512]> transpose_49_cast_fp16 = transpose(perm = transpose_49_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_66")];
	tensor<fp16, [1, 8, 512, 512]> attn_weights_17_cast_fp16 = matmul(transpose_x = attn_weights_17_transpose_x_0, transpose_y = attn_weights_17_transpose_y_0, x = q_39_cast_fp16, y = transpose_49_cast_fp16)[name = string("attn_weights_17_cast_fp16")];
	tensor<fp16, [1, 8, 512, 512]> x_67_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask)[name = string("x_67_cast_fp16")];
	tensor<int32, [1]> reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor<int32, [1]>([-1])];
	bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 512, 1]> reduce_max_4 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_67_cast_fp16)[name = string("reduce_max_4")];
	tensor<fp16, [1, 8, 512, 512]> var_2179 = sub(x = x_67_cast_fp16, y = reduce_max_4)[name = string("op_2179")];
	tensor<fp16, [1, 8, 512, 512]> var_2185 = exp(x = var_2179)[name = string("op_2185")];
	tensor<int32, [1]> var_2195_axes_0 = const()[name = string("op_2195_axes_0"), val = tensor<int32, [1]>([-1])];
	bool var_2195_keep_dims_0 = const()[name = string("op_2195_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 512, 1]> var_2195 = reduce_sum(axes = var_2195_axes_0, keep_dims = var_2195_keep_dims_0, x = var_2185)[name = string("op_2195")];
	tensor<fp16, [1, 8, 512, 512]> var_2201_cast_fp16 = real_div(x = var_2185, y = var_2195)[name = string("op_2201_cast_fp16")];
	bool attn_output_33_transpose_x_0 = const()[name = string("attn_output_33_transpose_x_0"), val = bool(false)];
	bool attn_output_33_transpose_y_0 = const()[name = string("attn_output_33_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 512]> V_expanded_9_cast_fp16 = transpose(perm = V_expanded_9_perm_0, x = reshape_19_cast_fp16)[name = string("transpose_65")];
	tensor<fp16, [1, 8, 512, 512]> attn_output_33_cast_fp16 = matmul(transpose_x = attn_output_33_transpose_x_0, transpose_y = attn_output_33_transpose_y_0, x = var_2201_cast_fp16, y = V_expanded_9_cast_fp16)[name = string("attn_output_33_cast_fp16")];
	tensor<int32, [4]> var_2212 = const()[name = string("op_2212"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_2219 = const()[name = string("op_2219"), val = tensor<int32, [3]>([1, 512, 4096])];
	tensor<fp16, [1, 512, 8, 512]> var_2213_cast_fp16 = transpose(perm = var_2212, x = attn_output_33_cast_fp16)[name = string("transpose_64")];
	tensor<fp16, [1, 512, 4096]> attn_output_35_cast_fp16 = reshape(shape = var_2219, x = var_2213_cast_fp16)[name = string("attn_output_35_cast_fp16")];
	tensor<int32, [3]> var_2224 = const()[name = string("op_2224"), val = tensor<int32, [3]>([0, 2, 1])];
	string var_2240_pad_type_0 = const()[name = string("op_2240_pad_type_0"), val = string("valid")];
	int32 var_2240_groups_0 = const()[name = string("op_2240_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_2240_strides_0 = const()[name = string("op_2240_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_2240_pad_0 = const()[name = string("op_2240_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_2240_dilations_0 = const()[name = string("op_2240_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1536, 4096, 1]> squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 4096, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311428224))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314574016))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")];
	tensor<fp16, [1, 4096, 512]> var_2225_cast_fp16 = transpose(perm = var_2224, x = attn_output_35_cast_fp16)[name = string("transpose_63")];
	tensor<fp16, [1, 1536, 512]> var_2240_cast_fp16 = conv(dilations = var_2240_dilations_0, groups = var_2240_groups_0, pad = var_2240_pad_0, pad_type = var_2240_pad_type_0, strides = var_2240_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_2225_cast_fp16)[name = string("op_2240_cast_fp16")];
	tensor<int32, [3]> var_2244 = const()[name = string("op_2244"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_2250 = const()[name = string("op_2250"), val = int32(-1)];
	fp16 const_35_promoted_to_fp16 = const()[name = string("const_35_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_71_cast_fp16 = transpose(perm = var_2244, x = var_2240_cast_fp16)[name = string("transpose_62")];
	tensor<fp16, [1, 512, 1536]> var_2252_cast_fp16 = mul(x = x_71_cast_fp16, y = const_35_promoted_to_fp16)[name = string("op_2252_cast_fp16")];
	bool input_105_interleave_0 = const()[name = string("input_105_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_105_cast_fp16 = concat(axis = var_2250, interleave = input_105_interleave_0, values = (x_71_cast_fp16, var_2252_cast_fp16))[name = string("input_105_cast_fp16")];
	tensor<int32, [1]> normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2247_to_fp16 = const()[name = string("op_2247_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_2247_to_fp16, x = input_105_cast_fp16)[name = string("normed_105_cast_fp16")];
	tensor<int32, [2]> var_2257_split_sizes_0 = const()[name = string("op_2257_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2257_axis_0 = const()[name = string("op_2257_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_2257_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_2257_cast_fp16_1 = split(axis = var_2257_axis_0, split_sizes = var_2257_split_sizes_0, x = normed_105_cast_fp16)[name = string("op_2257_cast_fp16")];
	tensor<fp16, [1536]> layers_4_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314575616)))];
	tensor<fp16, [1, 512, 1536]> attn_output_39_cast_fp16 = mul(x = var_2257_cast_fp16_0, y = layers_4_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_39_cast_fp16")];
	tensor<fp16, [1, 512, 1536]> x_73_cast_fp16 = add(x = x_63_cast_fp16, y = attn_output_39_cast_fp16)[name = string("x_73_cast_fp16")];
	int32 var_2266 = const()[name = string("op_2266"), val = int32(-1)];
	fp16 const_36_promoted_to_fp16 = const()[name = string("const_36_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> var_2268_cast_fp16 = mul(x = x_73_cast_fp16, y = const_36_promoted_to_fp16)[name = string("op_2268_cast_fp16")];
	bool input_107_interleave_0 = const()[name = string("input_107_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_107_cast_fp16 = concat(axis = var_2266, interleave = input_107_interleave_0, values = (x_73_cast_fp16, var_2268_cast_fp16))[name = string("input_107_cast_fp16")];
	tensor<int32, [1]> normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2263_to_fp16 = const()[name = string("op_2263_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_2263_to_fp16, x = input_107_cast_fp16)[name = string("normed_109_cast_fp16")];
	tensor<int32, [2]> var_2273_split_sizes_0 = const()[name = string("op_2273_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2273_axis_0 = const()[name = string("op_2273_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_2273_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_2273_cast_fp16_1 = split(axis = var_2273_axis_0, split_sizes = var_2273_split_sizes_0, x = normed_109_cast_fp16)[name = string("op_2273_cast_fp16")];
	tensor<fp16, [1536]> layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314578752)))];
	tensor<fp16, [1, 512, 1536]> h_27_cast_fp16 = mul(x = var_2273_cast_fp16_0, y = layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_27_cast_fp16")];
	tensor<int32, [3]> var_2284 = const()[name = string("op_2284"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_109_axes_0 = const()[name = string("input_109_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_2285 = transpose(perm = var_2284, x = h_27_cast_fp16)[name = string("transpose_61")];
	tensor<fp16, [1, 1536, 1, 512]> input_109 = expand_dims(axes = input_109_axes_0, x = var_2285)[name = string("input_109")];
	string gate_17_pad_type_0 = const()[name = string("gate_17_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gate_17_strides_0 = const()[name = string("gate_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gate_17_pad_0 = const()[name = string("gate_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gate_17_dilations_0 = const()[name = string("gate_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gate_17_groups_0 = const()[name = string("gate_17_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 512]> gate_17 = conv(dilations = gate_17_dilations_0, groups = gate_17_groups_0, pad = gate_17_pad_0, pad_type = gate_17_pad_type_0, strides = gate_17_strides_0, weight = layers_4_mlp_gate_proj_weight_palettized, x = input_109)[name = string("gate_17")];
	string up_9_pad_type_0 = const()[name = string("up_9_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> up_9_strides_0 = const()[name = string("up_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> up_9_pad_0 = const()[name = string("up_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> up_9_dilations_0 = const()[name = string("up_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 up_9_groups_0 = const()[name = string("up_9_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 512]> up_9 = conv(dilations = up_9_dilations_0, groups = up_9_groups_0, pad = up_9_pad_0, pad_type = up_9_pad_type_0, strides = up_9_strides_0, weight = layers_4_mlp_up_proj_weight_palettized, x = input_109)[name = string("up_9")];
	string gate_19_mode_0 = const()[name = string("gate_19_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 512]> gate_19 = gelu(mode = gate_19_mode_0, x = gate_17)[name = string("gate_19")];
	tensor<fp16, [1, 12288, 1, 512]> input_111 = mul(x = gate_19, y = up_9)[name = string("input_111")];
	string mlp_out_9_pad_type_0 = const()[name = string("mlp_out_9_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_out_9_strides_0 = const()[name = string("mlp_out_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_out_9_pad_0 = const()[name = string("mlp_out_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_out_9_dilations_0 = const()[name = string("mlp_out_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_out_9_groups_0 = const()[name = string("mlp_out_9_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 512]> mlp_out_9 = conv(dilations = mlp_out_9_dilations_0, groups = mlp_out_9_groups_0, pad = mlp_out_9_pad_0, pad_type = mlp_out_9_pad_type_0, strides = mlp_out_9_strides_0, weight = layers_4_mlp_down_proj_weight_palettized, x = input_111)[name = string("mlp_out_9")];
	tensor<int32, [1]> var_2325_axes_0 = const()[name = string("op_2325_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_2325 = squeeze(axes = var_2325_axes_0, x = mlp_out_9)[name = string("op_2325")];
	tensor<int32, [3]> var_2329 = const()[name = string("op_2329"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_2335 = const()[name = string("op_2335"), val = int32(-1)];
	fp16 const_37_promoted = const()[name = string("const_37_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_75 = transpose(perm = var_2329, x = var_2325)[name = string("transpose_60")];
	tensor<fp16, [1, 512, 1536]> var_2337 = mul(x = x_75, y = const_37_promoted)[name = string("op_2337")];
	bool input_113_interleave_0 = const()[name = string("input_113_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_113 = concat(axis = var_2335, interleave = input_113_interleave_0, values = (x_75, var_2337))[name = string("input_113")];
	tensor<int32, [1]> normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2332_to_fp16 = const()[name = string("op_2332_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_2332_to_fp16, x = input_113)[name = string("normed_113_cast_fp16")];
	tensor<int32, [2]> var_2342_split_sizes_0 = const()[name = string("op_2342_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2342_axis_0 = const()[name = string("op_2342_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_2342_0, tensor<fp16, [1, 512, 1536]> var_2342_1 = split(axis = var_2342_axis_0, split_sizes = var_2342_split_sizes_0, x = normed_113_cast_fp16)[name = string("op_2342")];
	tensor<fp16, [1, 512, 1536]> hidden_states_mlp_9 = mul(x = var_2342_0, y = layers_4_post_feedforward_layernorm_weight)[name = string("hidden_states_mlp_9")];
	tensor<fp16, [1, 512, 1536]> hidden_states_35_cast_fp16 = add(x = x_73_cast_fp16, y = hidden_states_mlp_9)[name = string("hidden_states_35_cast_fp16")];
	tensor<int32, [3]> per_layer_slice_9_begin_0 = const()[name = string("per_layer_slice_9_begin_0"), val = tensor<int32, [3]>([0, 0, 7424])];
	tensor<int32, [3]> per_layer_slice_9_end_0 = const()[name = string("per_layer_slice_9_end_0"), val = tensor<int32, [3]>([1, 512, 7680])];
	tensor<bool, [3]> per_layer_slice_9_end_mask_0 = const()[name = string("per_layer_slice_9_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 512, 256]> per_layer_slice_9_cast_fp16 = slice_by_index(begin = per_layer_slice_9_begin_0, end = per_layer_slice_9_end_0, end_mask = per_layer_slice_9_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_9_cast_fp16")];
	tensor<int32, [3]> var_2370 = const()[name = string("op_2370"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_115_axes_0 = const()[name = string("input_115_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_2371 = transpose(perm = var_2370, x = hidden_states_35_cast_fp16)[name = string("transpose_59")];
	tensor<fp16, [1, 1536, 1, 512]> input_115 = expand_dims(axes = input_115_axes_0, x = var_2371)[name = string("input_115")];
	string gated_25_pad_type_0 = const()[name = string("gated_25_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_25_strides_0 = const()[name = string("gated_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_25_pad_0 = const()[name = string("gated_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_25_dilations_0 = const()[name = string("gated_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_25_groups_0 = const()[name = string("gated_25_groups_0"), val = int32(1)];
	tensor<fp16, [1, 256, 1, 512]> gated_25 = conv(dilations = gated_25_dilations_0, groups = gated_25_groups_0, pad = gated_25_pad_0, pad_type = gated_25_pad_type_0, strides = gated_25_strides_0, weight = layers_4_per_layer_input_gate_weight_palettized, x = input_115)[name = string("gated_25")];
	string gated_27_mode_0 = const()[name = string("gated_27_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 256, 1, 512]> gated_27 = gelu(mode = gated_27_mode_0, x = gated_25)[name = string("gated_27")];
	tensor<int32, [3]> var_2390 = const()[name = string("op_2390"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> per_layer_slice_conv_9_axes_0 = const()[name = string("per_layer_slice_conv_9_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 256, 512]> var_2391_cast_fp16 = transpose(perm = var_2390, x = per_layer_slice_9_cast_fp16)[name = string("transpose_58")];
	tensor<fp16, [1, 256, 1, 512]> per_layer_slice_conv_9_cast_fp16 = expand_dims(axes = per_layer_slice_conv_9_axes_0, x = var_2391_cast_fp16)[name = string("per_layer_slice_conv_9_cast_fp16")];
	tensor<fp16, [1, 256, 1, 512]> input_117_cast_fp16 = mul(x = gated_27, y = per_layer_slice_conv_9_cast_fp16)[name = string("input_117_cast_fp16")];
	string gated_29_pad_type_0 = const()[name = string("gated_29_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_29_strides_0 = const()[name = string("gated_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_29_pad_0 = const()[name = string("gated_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_29_dilations_0 = const()[name = string("gated_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_29_groups_0 = const()[name = string("gated_29_groups_0"), val = int32(1)];
	tensor<fp16, [1536, 256, 1, 1]> layers_4_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314581888))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314778560))))[name = string("layers_4_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1536, 1, 512]> gated_29_cast_fp16 = conv(dilations = gated_29_dilations_0, groups = gated_29_groups_0, pad = gated_29_pad_0, pad_type = gated_29_pad_type_0, strides = gated_29_strides_0, weight = layers_4_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("gated_29_cast_fp16")];
	tensor<int32, [1]> var_2407_axes_0 = const()[name = string("op_2407_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_2407_cast_fp16 = squeeze(axes = var_2407_axes_0, x = gated_29_cast_fp16)[name = string("op_2407_cast_fp16")];
	tensor<int32, [3]> var_2411 = const()[name = string("op_2411"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_2417 = const()[name = string("op_2417"), val = int32(-1)];
	fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_77_cast_fp16 = transpose(perm = var_2411, x = var_2407_cast_fp16)[name = string("transpose_57")];
	tensor<fp16, [1, 512, 1536]> var_2419_cast_fp16 = mul(x = x_77_cast_fp16, y = const_38_promoted_to_fp16)[name = string("op_2419_cast_fp16")];
	bool input_119_interleave_0 = const()[name = string("input_119_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_119_cast_fp16 = concat(axis = var_2417, interleave = input_119_interleave_0, values = (x_77_cast_fp16, var_2419_cast_fp16))[name = string("input_119_cast_fp16")];
	tensor<int32, [1]> normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2414_to_fp16 = const()[name = string("op_2414_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_2414_to_fp16, x = input_119_cast_fp16)[name = string("normed_117_cast_fp16")];
	tensor<int32, [2]> var_2424_split_sizes_0 = const()[name = string("op_2424_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2424_axis_0 = const()[name = string("op_2424_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_2424_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_2424_cast_fp16_1 = split(axis = var_2424_axis_0, split_sizes = var_2424_split_sizes_0, x = normed_117_cast_fp16)[name = string("op_2424_cast_fp16")];
	tensor<fp16, [1536]> layers_4_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_4_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314780160)))];
	tensor<fp16, [1, 512, 1536]> hidden_states_39_cast_fp16 = mul(x = var_2424_cast_fp16_0, y = layers_4_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_39_cast_fp16")];
	tensor<fp16, [1, 512, 1536]> hidden_states_41_cast_fp16 = add(x = hidden_states_35_cast_fp16, y = hidden_states_39_cast_fp16)[name = string("hidden_states_41_cast_fp16")];
	tensor<fp16, [1]> const_39_promoted_to_fp16 = const()[name = string("const_39_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.ap-1])];
	tensor<fp16, [1, 512, 1536]> x_79_cast_fp16 = mul(x = hidden_states_41_cast_fp16, y = const_39_promoted_to_fp16)[name = string("x_79_cast_fp16")];
	int32 var_2439 = const()[name = string("op_2439"), val = int32(-1)];
	fp16 const_40_promoted_to_fp16 = const()[name = string("const_40_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> var_2441_cast_fp16 = mul(x = x_79_cast_fp16, y = const_40_promoted_to_fp16)[name = string("op_2441_cast_fp16")];
	bool input_121_interleave_0 = const()[name = string("input_121_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_121_cast_fp16 = concat(axis = var_2439, interleave = input_121_interleave_0, values = (x_79_cast_fp16, var_2441_cast_fp16))[name = string("input_121_cast_fp16")];
	tensor<int32, [1]> normed_121_axes_0 = const()[name = string("normed_121_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2436_to_fp16 = const()[name = string("op_2436_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_121_cast_fp16 = layer_norm(axes = normed_121_axes_0, epsilon = var_2436_to_fp16, x = input_121_cast_fp16)[name = string("normed_121_cast_fp16")];
	tensor<int32, [2]> var_2446_split_sizes_0 = const()[name = string("op_2446_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2446_axis_0 = const()[name = string("op_2446_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_2446_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_2446_cast_fp16_1 = split(axis = var_2446_axis_0, split_sizes = var_2446_split_sizes_0, x = normed_121_cast_fp16)[name = string("op_2446_cast_fp16")];
	tensor<fp16, [1536]> layers_5_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314783296)))];
	tensor<fp16, [1, 512, 1536]> h_31_cast_fp16 = mul(x = var_2446_cast_fp16_0, y = layers_5_input_layernorm_weight_promoted_to_fp16)[name = string("h_31_cast_fp16")];
	tensor<int32, [3]> var_2452 = const()[name = string("op_2452"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_2455_axes_0 = const()[name = string("op_2455_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_2453_cast_fp16 = transpose(perm = var_2452, x = h_31_cast_fp16)[name = string("transpose_56")];
	tensor<fp16, [1, 1536, 1, 512]> var_2455_cast_fp16 = expand_dims(axes = var_2455_axes_0, x = var_2453_cast_fp16)[name = string("op_2455_cast_fp16")];
	string q_raw_11_pad_type_0 = const()[name = string("q_raw_11_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> q_raw_11_strides_0 = const()[name = string("q_raw_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> q_raw_11_pad_0 = const()[name = string("q_raw_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> q_raw_11_dilations_0 = const()[name = string("q_raw_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 q_raw_11_groups_0 = const()[name = string("q_raw_11_groups_0"), val = int32(1)];
	tensor<fp16, [1, 2048, 1, 512]> q_raw_11 = conv(dilations = q_raw_11_dilations_0, groups = q_raw_11_groups_0, pad = q_raw_11_pad_0, pad_type = q_raw_11_pad_type_0, strides = q_raw_11_strides_0, weight = layers_5_self_attn_q_proj_weight_palettized, x = var_2455_cast_fp16)[name = string("q_raw_11")];
	tensor<int32, [4]> var_2476 = const()[name = string("op_2476"), val = tensor<int32, [4]>([1, 8, 256, 512])];
	tensor<fp16, [1, 8, 256, 512]> var_2477 = reshape(shape = var_2476, x = q_raw_11)[name = string("op_2477")];
	tensor<int32, [4]> transpose_50_perm_0 = const()[name = string("transpose_50_perm_0"), val = tensor<int32, [4]>([0, 3, 1, 2])];
	tensor<int32, [3]> var_2500 = const()[name = string("op_2500"), val = tensor<int32, [3]>([512, 8, 256])];
	tensor<fp16, [1, 512, 8, 256]> transpose_50 = transpose(perm = transpose_50_perm_0, x = var_2477)[name = string("transpose_55")];
	tensor<fp16, [512, 8, 256]> x_81 = reshape(shape = var_2500, x = transpose_50)[name = string("x_81")];
	int32 var_2506 = const()[name = string("op_2506"), val = int32(-1)];
	fp16 const_41_promoted = const()[name = string("const_41_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [512, 8, 256]> var_2508 = mul(x = x_81, y = const_41_promoted)[name = string("op_2508")];
	bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)];
	tensor<fp16, [512, 8, 512]> input_125 = concat(axis = var_2506, interleave = input_125_interleave_0, values = (x_81, var_2508))[name = string("input_125")];
	tensor<int32, [1]> normed_125_axes_0 = const()[name = string("normed_125_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2503_to_fp16 = const()[name = string("op_2503_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [512, 8, 512]> normed_125_cast_fp16 = layer_norm(axes = normed_125_axes_0, epsilon = var_2503_to_fp16, x = input_125)[name = string("normed_125_cast_fp16")];
	tensor<int32, [2]> var_2513_split_sizes_0 = const()[name = string("op_2513_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_2513_axis_0 = const()[name = string("op_2513_axis_0"), val = int32(-1)];
	tensor<fp16, [512, 8, 256]> var_2513_0, tensor<fp16, [512, 8, 256]> var_2513_1 = split(axis = var_2513_axis_0, split_sizes = var_2513_split_sizes_0, x = normed_125_cast_fp16)[name = string("op_2513")];
	tensor<fp16, [512, 8, 256]> q_43 = mul(x = var_2513_0, y = layers_0_self_attn_q_norm_weight)[name = string("q_43")];
	tensor<int32, [4]> var_2520 = const()[name = string("op_2520"), val = tensor<int32, [4]>([1, 512, 8, 256])];
	tensor<fp16, [1, 512, 8, 256]> var_2521 = reshape(shape = var_2520, x = q_43)[name = string("op_2521")];
	tensor<int32, [4]> var_2526 = const()[name = string("op_2526"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<fp16, [1, 8, 512, 256]> q_45 = transpose(perm = var_2526, x = var_2521)[name = string("transpose_54")];
	tensor<fp16, [1, 8, 512, 256]> var_2528_cast_fp16 = mul(x = q_45, y = cos_s)[name = string("op_2528_cast_fp16")];
	tensor<int32, [2]> var_2529_split_sizes_0 = const()[name = string("op_2529_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_2529_axis_0 = const()[name = string("op_2529_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 512, 128]> var_2529_0, tensor<fp16, [1, 8, 512, 128]> var_2529_1 = split(axis = var_2529_axis_0, split_sizes = var_2529_split_sizes_0, x = q_45)[name = string("op_2529")];
	fp16 const_42_promoted = const()[name = string("const_42_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 512, 128]> var_2531 = mul(x = var_2529_1, y = const_42_promoted)[name = string("op_2531")];
	int32 var_2533 = const()[name = string("op_2533"), val = int32(-1)];
	bool var_2534_interleave_0 = const()[name = string("op_2534_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 256]> var_2534 = concat(axis = var_2533, interleave = var_2534_interleave_0, values = (var_2531, var_2529_0))[name = string("op_2534")];
	tensor<fp16, [1, 8, 512, 256]> var_2535_cast_fp16 = mul(x = var_2534, y = sin_s)[name = string("op_2535_cast_fp16")];
	tensor<fp16, [1, 8, 512, 256]> q_47_cast_fp16 = add(x = var_2528_cast_fp16, y = var_2535_cast_fp16)[name = string("q_47_cast_fp16")];
	bool attn_weights_21_transpose_x_0 = const()[name = string("attn_weights_21_transpose_x_0"), val = bool(false)];
	bool attn_weights_21_transpose_y_0 = const()[name = string("attn_weights_21_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 512]> attn_weights_21_cast_fp16 = matmul(transpose_x = attn_weights_21_transpose_x_0, transpose_y = attn_weights_21_transpose_y_0, x = q_47_cast_fp16, y = transpose_41_cast_fp16)[name = string("attn_weights_21_cast_fp16")];
	tensor<fp16, [1, 8, 512, 512]> x_83_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask)[name = string("x_83_cast_fp16")];
	tensor<int32, [1]> reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor<int32, [1]>([-1])];
	bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 512, 1]> reduce_max_5 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_83_cast_fp16)[name = string("reduce_max_5")];
	tensor<fp16, [1, 8, 512, 512]> var_2567 = sub(x = x_83_cast_fp16, y = reduce_max_5)[name = string("op_2567")];
	tensor<fp16, [1, 8, 512, 512]> var_2573 = exp(x = var_2567)[name = string("op_2573")];
	tensor<int32, [1]> var_2583_axes_0 = const()[name = string("op_2583_axes_0"), val = tensor<int32, [1]>([-1])];
	bool var_2583_keep_dims_0 = const()[name = string("op_2583_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 512, 1]> var_2583 = reduce_sum(axes = var_2583_axes_0, keep_dims = var_2583_keep_dims_0, x = var_2573)[name = string("op_2583")];
	tensor<fp16, [1, 8, 512, 512]> var_2589_cast_fp16 = real_div(x = var_2573, y = var_2583)[name = string("op_2589_cast_fp16")];
	bool attn_output_41_transpose_x_0 = const()[name = string("attn_output_41_transpose_x_0"), val = bool(false)];
	bool attn_output_41_transpose_y_0 = const()[name = string("attn_output_41_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 256]> attn_output_41_cast_fp16 = matmul(transpose_x = attn_output_41_transpose_x_0, transpose_y = attn_output_41_transpose_y_0, x = var_2589_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_41_cast_fp16")];
	tensor<int32, [4]> var_2600 = const()[name = string("op_2600"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_2607 = const()[name = string("op_2607"), val = tensor<int32, [3]>([1, 512, 2048])];
	tensor<fp16, [1, 512, 8, 256]> var_2601_cast_fp16 = transpose(perm = var_2600, x = attn_output_41_cast_fp16)[name = string("transpose_53")];
	tensor<fp16, [1, 512, 2048]> attn_output_43_cast_fp16 = reshape(shape = var_2607, x = var_2601_cast_fp16)[name = string("attn_output_43_cast_fp16")];
	tensor<int32, [3]> var_2612 = const()[name = string("op_2612"), val = tensor<int32, [3]>([0, 2, 1])];
	string var_2628_pad_type_0 = const()[name = string("op_2628_pad_type_0"), val = string("valid")];
	int32 var_2628_groups_0 = const()[name = string("op_2628_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_2628_strides_0 = const()[name = string("op_2628_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_2628_pad_0 = const()[name = string("op_2628_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_2628_dilations_0 = const()[name = string("op_2628_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1536, 2048, 1]> squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314786432))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316359360))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")];
	tensor<fp16, [1, 2048, 512]> var_2613_cast_fp16 = transpose(perm = var_2612, x = attn_output_43_cast_fp16)[name = string("transpose_52")];
	tensor<fp16, [1, 1536, 512]> var_2628_cast_fp16 = conv(dilations = var_2628_dilations_0, groups = var_2628_groups_0, pad = var_2628_pad_0, pad_type = var_2628_pad_type_0, strides = var_2628_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_2613_cast_fp16)[name = string("op_2628_cast_fp16")];
	tensor<int32, [3]> var_2632 = const()[name = string("op_2632"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_2638 = const()[name = string("op_2638"), val = int32(-1)];
	fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_87_cast_fp16 = transpose(perm = var_2632, x = var_2628_cast_fp16)[name = string("transpose_51")];
	tensor<fp16, [1, 512, 1536]> var_2640_cast_fp16 = mul(x = x_87_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_2640_cast_fp16")];
	bool input_129_interleave_0 = const()[name = string("input_129_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_129_cast_fp16 = concat(axis = var_2638, interleave = input_129_interleave_0, values = (x_87_cast_fp16, var_2640_cast_fp16))[name = string("input_129_cast_fp16")];
	tensor<int32, [1]> normed_129_axes_0 = const()[name = string("normed_129_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2635_to_fp16 = const()[name = string("op_2635_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_129_cast_fp16 = layer_norm(axes = normed_129_axes_0, epsilon = var_2635_to_fp16, x = input_129_cast_fp16)[name = string("normed_129_cast_fp16")];
	tensor<int32, [2]> var_2645_split_sizes_0 = const()[name = string("op_2645_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2645_axis_0 = const()[name = string("op_2645_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_2645_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_2645_cast_fp16_1 = split(axis = var_2645_axis_0, split_sizes = var_2645_split_sizes_0, x = normed_129_cast_fp16)[name = string("op_2645_cast_fp16")];
	tensor<fp16, [1536]> layers_5_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316360960)))];
	tensor<fp16, [1, 512, 1536]> attn_output_47_cast_fp16 = mul(x = var_2645_cast_fp16_0, y = layers_5_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_47_cast_fp16")];
	tensor<fp16, [1, 512, 1536]> x_89_cast_fp16 = add(x = x_79_cast_fp16, y = attn_output_47_cast_fp16)[name = string("x_89_cast_fp16")];
	int32 var_2654 = const()[name = string("op_2654"), val = int32(-1)];
	fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> var_2656_cast_fp16 = mul(x = x_89_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_2656_cast_fp16")];
	bool input_131_interleave_0 = const()[name = string("input_131_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_131_cast_fp16 = concat(axis = var_2654, interleave = input_131_interleave_0, values = (x_89_cast_fp16, var_2656_cast_fp16))[name = string("input_131_cast_fp16")];
	tensor<int32, [1]> normed_133_axes_0 = const()[name = string("normed_133_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2651_to_fp16 = const()[name = string("op_2651_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_133_cast_fp16 = layer_norm(axes = normed_133_axes_0, epsilon = var_2651_to_fp16, x = input_131_cast_fp16)[name = string("normed_133_cast_fp16")];
	tensor<int32, [2]> var_2661_split_sizes_0 = const()[name = string("op_2661_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2661_axis_0 = const()[name = string("op_2661_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_2661_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_2661_cast_fp16_1 = split(axis = var_2661_axis_0, split_sizes = var_2661_split_sizes_0, x = normed_133_cast_fp16)[name = string("op_2661_cast_fp16")];
	tensor<fp16, [1536]> layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316364096)))];
	tensor<fp16, [1, 512, 1536]> h_33_cast_fp16 = mul(x = var_2661_cast_fp16_0, y = layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_33_cast_fp16")];
	tensor<int32, [3]> var_2672 = const()[name = string("op_2672"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_133_axes_0 = const()[name = string("input_133_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_2673 = transpose(perm = var_2672, x = h_33_cast_fp16)[name = string("transpose_50")];
	tensor<fp16, [1, 1536, 1, 512]> input_133 = expand_dims(axes = input_133_axes_0, x = var_2673)[name = string("input_133")];
	string gate_21_pad_type_0 = const()[name = string("gate_21_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gate_21_strides_0 = const()[name = string("gate_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gate_21_pad_0 = const()[name = string("gate_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gate_21_dilations_0 = const()[name = string("gate_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gate_21_groups_0 = const()[name = string("gate_21_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 512]> gate_21 = conv(dilations = gate_21_dilations_0, groups = gate_21_groups_0, pad = gate_21_pad_0, pad_type = gate_21_pad_type_0, strides = gate_21_strides_0, weight = layers_5_mlp_gate_proj_weight_palettized, x = input_133)[name = string("gate_21")];
	string up_11_pad_type_0 = const()[name = string("up_11_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> up_11_strides_0 = const()[name = string("up_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> up_11_pad_0 = const()[name = string("up_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> up_11_dilations_0 = const()[name = string("up_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 up_11_groups_0 = const()[name = string("up_11_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 512]> up_11 = conv(dilations = up_11_dilations_0, groups = up_11_groups_0, pad = up_11_pad_0, pad_type = up_11_pad_type_0, strides = up_11_strides_0, weight = layers_5_mlp_up_proj_weight_palettized, x = input_133)[name = string("up_11")];
	string gate_23_mode_0 = const()[name = string("gate_23_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 512]> gate_23 = gelu(mode = gate_23_mode_0, x = gate_21)[name = string("gate_23")];
	tensor<fp16, [1, 12288, 1, 512]> input_135 = mul(x = gate_23, y = up_11)[name = string("input_135")];
	string mlp_out_11_pad_type_0 = const()[name = string("mlp_out_11_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_out_11_strides_0 = const()[name = string("mlp_out_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_out_11_pad_0 = const()[name = string("mlp_out_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_out_11_dilations_0 = const()[name = string("mlp_out_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_out_11_groups_0 = const()[name = string("mlp_out_11_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 512]> mlp_out_11 = conv(dilations = mlp_out_11_dilations_0, groups = mlp_out_11_groups_0, pad = mlp_out_11_pad_0, pad_type = mlp_out_11_pad_type_0, strides = mlp_out_11_strides_0, weight = layers_5_mlp_down_proj_weight_palettized, x = input_135)[name = string("mlp_out_11")];
	tensor<int32, [1]> var_2713_axes_0 = const()[name = string("op_2713_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_2713 = squeeze(axes = var_2713_axes_0, x = mlp_out_11)[name = string("op_2713")];
	tensor<int32, [3]> var_2717 = const()[name = string("op_2717"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_2723 = const()[name = string("op_2723"), val = int32(-1)];
	fp16 const_45_promoted = const()[name = string("const_45_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_91 = transpose(perm = var_2717, x = var_2713)[name = string("transpose_49")];
	tensor<fp16, [1, 512, 1536]> var_2725 = mul(x = x_91, y = const_45_promoted)[name = string("op_2725")];
	bool input_137_interleave_0 = const()[name = string("input_137_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_137 = concat(axis = var_2723, interleave = input_137_interleave_0, values = (x_91, var_2725))[name = string("input_137")];
	tensor<int32, [1]> normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2720_to_fp16 = const()[name = string("op_2720_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_2720_to_fp16, x = input_137)[name = string("normed_137_cast_fp16")];
	tensor<int32, [2]> var_2730_split_sizes_0 = const()[name = string("op_2730_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2730_axis_0 = const()[name = string("op_2730_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_2730_0, tensor<fp16, [1, 512, 1536]> var_2730_1 = split(axis = var_2730_axis_0, split_sizes = var_2730_split_sizes_0, x = normed_137_cast_fp16)[name = string("op_2730")];
	tensor<fp16, [1, 512, 1536]> hidden_states_mlp_11 = mul(x = var_2730_0, y = layers_5_post_feedforward_layernorm_weight)[name = string("hidden_states_mlp_11")];
	tensor<fp16, [1, 512, 1536]> hidden_states_43_cast_fp16 = add(x = x_89_cast_fp16, y = hidden_states_mlp_11)[name = string("hidden_states_43_cast_fp16")];
	tensor<int32, [3]> per_layer_slice_11_begin_0 = const()[name = string("per_layer_slice_11_begin_0"), val = tensor<int32, [3]>([0, 0, 7680])];
	tensor<int32, [3]> per_layer_slice_11_end_0 = const()[name = string("per_layer_slice_11_end_0"), val = tensor<int32, [3]>([1, 512, 7936])];
	tensor<bool, [3]> per_layer_slice_11_end_mask_0 = const()[name = string("per_layer_slice_11_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 512, 256]> per_layer_slice_11_cast_fp16 = slice_by_index(begin = per_layer_slice_11_begin_0, end = per_layer_slice_11_end_0, end_mask = per_layer_slice_11_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_11_cast_fp16")];
	tensor<int32, [3]> var_2758 = const()[name = string("op_2758"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_139_axes_0 = const()[name = string("input_139_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_2759 = transpose(perm = var_2758, x = hidden_states_43_cast_fp16)[name = string("transpose_48")];
	tensor<fp16, [1, 1536, 1, 512]> input_139 = expand_dims(axes = input_139_axes_0, x = var_2759)[name = string("input_139")];
	string gated_31_pad_type_0 = const()[name = string("gated_31_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_31_strides_0 = const()[name = string("gated_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_31_pad_0 = const()[name = string("gated_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_31_dilations_0 = const()[name = string("gated_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_31_groups_0 = const()[name = string("gated_31_groups_0"), val = int32(1)];
	tensor<fp16, [1, 256, 1, 512]> gated_31 = conv(dilations = gated_31_dilations_0, groups = gated_31_groups_0, pad = gated_31_pad_0, pad_type = gated_31_pad_type_0, strides = gated_31_strides_0, weight = layers_5_per_layer_input_gate_weight_palettized, x = input_139)[name = string("gated_31")];
	string gated_33_mode_0 = const()[name = string("gated_33_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 256, 1, 512]> gated_33 = gelu(mode = gated_33_mode_0, x = gated_31)[name = string("gated_33")];
	tensor<int32, [3]> var_2778 = const()[name = string("op_2778"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> per_layer_slice_conv_11_axes_0 = const()[name = string("per_layer_slice_conv_11_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 256, 512]> var_2779_cast_fp16 = transpose(perm = var_2778, x = per_layer_slice_11_cast_fp16)[name = string("transpose_47")];
	tensor<fp16, [1, 256, 1, 512]> per_layer_slice_conv_11_cast_fp16 = expand_dims(axes = per_layer_slice_conv_11_axes_0, x = var_2779_cast_fp16)[name = string("per_layer_slice_conv_11_cast_fp16")];
	tensor<fp16, [1, 256, 1, 512]> input_141_cast_fp16 = mul(x = gated_33, y = per_layer_slice_conv_11_cast_fp16)[name = string("input_141_cast_fp16")];
	string gated_35_pad_type_0 = const()[name = string("gated_35_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_35_strides_0 = const()[name = string("gated_35_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_35_pad_0 = const()[name = string("gated_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_35_dilations_0 = const()[name = string("gated_35_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_35_groups_0 = const()[name = string("gated_35_groups_0"), val = int32(1)];
	tensor<fp16, [1536, 256, 1, 1]> layers_5_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316367232))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316563904))))[name = string("layers_5_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1536, 1, 512]> gated_35_cast_fp16 = conv(dilations = gated_35_dilations_0, groups = gated_35_groups_0, pad = gated_35_pad_0, pad_type = gated_35_pad_type_0, strides = gated_35_strides_0, weight = layers_5_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_141_cast_fp16)[name = string("gated_35_cast_fp16")];
	tensor<int32, [1]> var_2795_axes_0 = const()[name = string("op_2795_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_2795_cast_fp16 = squeeze(axes = var_2795_axes_0, x = gated_35_cast_fp16)[name = string("op_2795_cast_fp16")];
	tensor<int32, [3]> var_2799 = const()[name = string("op_2799"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_2805 = const()[name = string("op_2805"), val = int32(-1)];
	fp16 const_46_promoted_to_fp16 = const()[name = string("const_46_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_93_cast_fp16 = transpose(perm = var_2799, x = var_2795_cast_fp16)[name = string("transpose_46")];
	tensor<fp16, [1, 512, 1536]> var_2807_cast_fp16 = mul(x = x_93_cast_fp16, y = const_46_promoted_to_fp16)[name = string("op_2807_cast_fp16")];
	bool input_143_interleave_0 = const()[name = string("input_143_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_143_cast_fp16 = concat(axis = var_2805, interleave = input_143_interleave_0, values = (x_93_cast_fp16, var_2807_cast_fp16))[name = string("input_143_cast_fp16")];
	tensor<int32, [1]> normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2802_to_fp16 = const()[name = string("op_2802_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_2802_to_fp16, x = input_143_cast_fp16)[name = string("normed_141_cast_fp16")];
	tensor<int32, [2]> var_2812_split_sizes_0 = const()[name = string("op_2812_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2812_axis_0 = const()[name = string("op_2812_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_2812_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_2812_cast_fp16_1 = split(axis = var_2812_axis_0, split_sizes = var_2812_split_sizes_0, x = normed_141_cast_fp16)[name = string("op_2812_cast_fp16")];
	tensor<fp16, [1536]> layers_5_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_5_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316565504)))];
	tensor<fp16, [1, 512, 1536]> hidden_states_47_cast_fp16 = mul(x = var_2812_cast_fp16_0, y = layers_5_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_47_cast_fp16")];
	tensor<fp16, [1, 512, 1536]> hidden_states_49_cast_fp16 = add(x = hidden_states_43_cast_fp16, y = hidden_states_47_cast_fp16)[name = string("hidden_states_49_cast_fp16")];
	tensor<fp16, [1]> const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.bep-1])];
	tensor<fp16, [1, 512, 1536]> x_95_cast_fp16 = mul(x = hidden_states_49_cast_fp16, y = const_47_promoted_to_fp16)[name = string("x_95_cast_fp16")];
	int32 var_2827 = const()[name = string("op_2827"), val = int32(-1)];
	fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> var_2829_cast_fp16 = mul(x = x_95_cast_fp16, y = const_48_promoted_to_fp16)[name = string("op_2829_cast_fp16")];
	bool input_145_interleave_0 = const()[name = string("input_145_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_145_cast_fp16 = concat(axis = var_2827, interleave = input_145_interleave_0, values = (x_95_cast_fp16, var_2829_cast_fp16))[name = string("input_145_cast_fp16")];
	tensor<int32, [1]> normed_145_axes_0 = const()[name = string("normed_145_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2824_to_fp16 = const()[name = string("op_2824_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_145_cast_fp16 = layer_norm(axes = normed_145_axes_0, epsilon = var_2824_to_fp16, x = input_145_cast_fp16)[name = string("normed_145_cast_fp16")];
	tensor<int32, [2]> var_2834_split_sizes_0 = const()[name = string("op_2834_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2834_axis_0 = const()[name = string("op_2834_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_2834_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_2834_cast_fp16_1 = split(axis = var_2834_axis_0, split_sizes = var_2834_split_sizes_0, x = normed_145_cast_fp16)[name = string("op_2834_cast_fp16")];
	tensor<fp16, [1536]> layers_6_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316568640)))];
	tensor<fp16, [1, 512, 1536]> h_37_cast_fp16 = mul(x = var_2834_cast_fp16_0, y = layers_6_input_layernorm_weight_promoted_to_fp16)[name = string("h_37_cast_fp16")];
	tensor<int32, [3]> var_2840 = const()[name = string("op_2840"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_2843_axes_0 = const()[name = string("op_2843_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_2841_cast_fp16 = transpose(perm = var_2840, x = h_37_cast_fp16)[name = string("transpose_45")];
	tensor<fp16, [1, 1536, 1, 512]> var_2843_cast_fp16 = expand_dims(axes = var_2843_axes_0, x = var_2841_cast_fp16)[name = string("op_2843_cast_fp16")];
	string q_raw_13_pad_type_0 = const()[name = string("q_raw_13_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> q_raw_13_strides_0 = const()[name = string("q_raw_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> q_raw_13_pad_0 = const()[name = string("q_raw_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> q_raw_13_dilations_0 = const()[name = string("q_raw_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 q_raw_13_groups_0 = const()[name = string("q_raw_13_groups_0"), val = int32(1)];
	tensor<fp16, [1, 2048, 1, 512]> q_raw_13 = conv(dilations = q_raw_13_dilations_0, groups = q_raw_13_groups_0, pad = q_raw_13_pad_0, pad_type = q_raw_13_pad_type_0, strides = q_raw_13_strides_0, weight = layers_6_self_attn_q_proj_weight_palettized, x = var_2843_cast_fp16)[name = string("q_raw_13")];
	tensor<int32, [4]> var_2864 = const()[name = string("op_2864"), val = tensor<int32, [4]>([1, 8, 256, 512])];
	tensor<fp16, [1, 8, 256, 512]> var_2865 = reshape(shape = var_2864, x = q_raw_13)[name = string("op_2865")];
	tensor<int32, [4]> transpose_52_perm_0 = const()[name = string("transpose_52_perm_0"), val = tensor<int32, [4]>([0, 3, 1, 2])];
	tensor<int32, [3]> var_2888 = const()[name = string("op_2888"), val = tensor<int32, [3]>([512, 8, 256])];
	tensor<fp16, [1, 512, 8, 256]> transpose_52 = transpose(perm = transpose_52_perm_0, x = var_2865)[name = string("transpose_44")];
	tensor<fp16, [512, 8, 256]> x_97 = reshape(shape = var_2888, x = transpose_52)[name = string("x_97")];
	int32 var_2894 = const()[name = string("op_2894"), val = int32(-1)];
	fp16 const_49_promoted = const()[name = string("const_49_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [512, 8, 256]> var_2896 = mul(x = x_97, y = const_49_promoted)[name = string("op_2896")];
	bool input_149_interleave_0 = const()[name = string("input_149_interleave_0"), val = bool(false)];
	tensor<fp16, [512, 8, 512]> input_149 = concat(axis = var_2894, interleave = input_149_interleave_0, values = (x_97, var_2896))[name = string("input_149")];
	tensor<int32, [1]> normed_149_axes_0 = const()[name = string("normed_149_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2891_to_fp16 = const()[name = string("op_2891_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [512, 8, 512]> normed_149_cast_fp16 = layer_norm(axes = normed_149_axes_0, epsilon = var_2891_to_fp16, x = input_149)[name = string("normed_149_cast_fp16")];
	tensor<int32, [2]> var_2901_split_sizes_0 = const()[name = string("op_2901_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_2901_axis_0 = const()[name = string("op_2901_axis_0"), val = int32(-1)];
	tensor<fp16, [512, 8, 256]> var_2901_0, tensor<fp16, [512, 8, 256]> var_2901_1 = split(axis = var_2901_axis_0, split_sizes = var_2901_split_sizes_0, x = normed_149_cast_fp16)[name = string("op_2901")];
	tensor<fp16, [512, 8, 256]> q_51 = mul(x = var_2901_0, y = layers_0_self_attn_q_norm_weight)[name = string("q_51")];
	tensor<int32, [4]> var_2908 = const()[name = string("op_2908"), val = tensor<int32, [4]>([1, 512, 8, 256])];
	tensor<fp16, [1, 512, 8, 256]> var_2909 = reshape(shape = var_2908, x = q_51)[name = string("op_2909")];
	tensor<int32, [4]> var_2914 = const()[name = string("op_2914"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<fp16, [1, 8, 512, 256]> q_53 = transpose(perm = var_2914, x = var_2909)[name = string("transpose_43")];
	tensor<fp16, [1, 8, 512, 256]> var_2916_cast_fp16 = mul(x = q_53, y = cos_s)[name = string("op_2916_cast_fp16")];
	tensor<int32, [2]> var_2917_split_sizes_0 = const()[name = string("op_2917_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_2917_axis_0 = const()[name = string("op_2917_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 512, 128]> var_2917_0, tensor<fp16, [1, 8, 512, 128]> var_2917_1 = split(axis = var_2917_axis_0, split_sizes = var_2917_split_sizes_0, x = q_53)[name = string("op_2917")];
	fp16 const_50_promoted = const()[name = string("const_50_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 512, 128]> var_2919 = mul(x = var_2917_1, y = const_50_promoted)[name = string("op_2919")];
	int32 var_2921 = const()[name = string("op_2921"), val = int32(-1)];
	bool var_2922_interleave_0 = const()[name = string("op_2922_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 256]> var_2922 = concat(axis = var_2921, interleave = var_2922_interleave_0, values = (var_2919, var_2917_0))[name = string("op_2922")];
	tensor<fp16, [1, 8, 512, 256]> var_2923_cast_fp16 = mul(x = var_2922, y = sin_s)[name = string("op_2923_cast_fp16")];
	tensor<fp16, [1, 8, 512, 256]> q_55_cast_fp16 = add(x = var_2916_cast_fp16, y = var_2923_cast_fp16)[name = string("q_55_cast_fp16")];
	bool attn_weights_25_transpose_x_0 = const()[name = string("attn_weights_25_transpose_x_0"), val = bool(false)];
	bool attn_weights_25_transpose_y_0 = const()[name = string("attn_weights_25_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 512]> attn_weights_25_cast_fp16 = matmul(transpose_x = attn_weights_25_transpose_x_0, transpose_y = attn_weights_25_transpose_y_0, x = q_55_cast_fp16, y = transpose_41_cast_fp16)[name = string("attn_weights_25_cast_fp16")];
	tensor<fp16, [1, 8, 512, 512]> x_99_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask)[name = string("x_99_cast_fp16")];
	tensor<int32, [1]> reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor<int32, [1]>([-1])];
	bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 512, 1]> reduce_max_6 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_99_cast_fp16)[name = string("reduce_max_6")];
	tensor<fp16, [1, 8, 512, 512]> var_2955 = sub(x = x_99_cast_fp16, y = reduce_max_6)[name = string("op_2955")];
	tensor<fp16, [1, 8, 512, 512]> var_2961 = exp(x = var_2955)[name = string("op_2961")];
	tensor<int32, [1]> var_2971_axes_0 = const()[name = string("op_2971_axes_0"), val = tensor<int32, [1]>([-1])];
	bool var_2971_keep_dims_0 = const()[name = string("op_2971_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 512, 1]> var_2971 = reduce_sum(axes = var_2971_axes_0, keep_dims = var_2971_keep_dims_0, x = var_2961)[name = string("op_2971")];
	tensor<fp16, [1, 8, 512, 512]> var_2977_cast_fp16 = real_div(x = var_2961, y = var_2971)[name = string("op_2977_cast_fp16")];
	bool attn_output_49_transpose_x_0 = const()[name = string("attn_output_49_transpose_x_0"), val = bool(false)];
	bool attn_output_49_transpose_y_0 = const()[name = string("attn_output_49_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 256]> attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_0, transpose_y = attn_output_49_transpose_y_0, x = var_2977_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_49_cast_fp16")];
	tensor<int32, [4]> var_2988 = const()[name = string("op_2988"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_2995 = const()[name = string("op_2995"), val = tensor<int32, [3]>([1, 512, 2048])];
	tensor<fp16, [1, 512, 8, 256]> var_2989_cast_fp16 = transpose(perm = var_2988, x = attn_output_49_cast_fp16)[name = string("transpose_42")];
	tensor<fp16, [1, 512, 2048]> attn_output_51_cast_fp16 = reshape(shape = var_2995, x = var_2989_cast_fp16)[name = string("attn_output_51_cast_fp16")];
	tensor<int32, [3]> var_3000 = const()[name = string("op_3000"), val = tensor<int32, [3]>([0, 2, 1])];
	string var_3016_pad_type_0 = const()[name = string("op_3016_pad_type_0"), val = string("valid")];
	int32 var_3016_groups_0 = const()[name = string("op_3016_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_3016_strides_0 = const()[name = string("op_3016_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_3016_pad_0 = const()[name = string("op_3016_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_3016_dilations_0 = const()[name = string("op_3016_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1536, 2048, 1]> squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316571776))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318144704))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")];
	tensor<fp16, [1, 2048, 512]> var_3001_cast_fp16 = transpose(perm = var_3000, x = attn_output_51_cast_fp16)[name = string("transpose_41")];
	tensor<fp16, [1, 1536, 512]> var_3016_cast_fp16 = conv(dilations = var_3016_dilations_0, groups = var_3016_groups_0, pad = var_3016_pad_0, pad_type = var_3016_pad_type_0, strides = var_3016_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_3001_cast_fp16)[name = string("op_3016_cast_fp16")];
	tensor<int32, [3]> var_3020 = const()[name = string("op_3020"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_3026 = const()[name = string("op_3026"), val = int32(-1)];
	fp16 const_51_promoted_to_fp16 = const()[name = string("const_51_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_103_cast_fp16 = transpose(perm = var_3020, x = var_3016_cast_fp16)[name = string("transpose_40")];
	tensor<fp16, [1, 512, 1536]> var_3028_cast_fp16 = mul(x = x_103_cast_fp16, y = const_51_promoted_to_fp16)[name = string("op_3028_cast_fp16")];
	bool input_153_interleave_0 = const()[name = string("input_153_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_153_cast_fp16 = concat(axis = var_3026, interleave = input_153_interleave_0, values = (x_103_cast_fp16, var_3028_cast_fp16))[name = string("input_153_cast_fp16")];
	tensor<int32, [1]> normed_153_axes_0 = const()[name = string("normed_153_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3023_to_fp16 = const()[name = string("op_3023_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_153_cast_fp16 = layer_norm(axes = normed_153_axes_0, epsilon = var_3023_to_fp16, x = input_153_cast_fp16)[name = string("normed_153_cast_fp16")];
	tensor<int32, [2]> var_3033_split_sizes_0 = const()[name = string("op_3033_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3033_axis_0 = const()[name = string("op_3033_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_3033_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_3033_cast_fp16_1 = split(axis = var_3033_axis_0, split_sizes = var_3033_split_sizes_0, x = normed_153_cast_fp16)[name = string("op_3033_cast_fp16")];
	tensor<fp16, [1536]> layers_6_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318146304)))];
	tensor<fp16, [1, 512, 1536]> attn_output_55_cast_fp16 = mul(x = var_3033_cast_fp16_0, y = layers_6_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_55_cast_fp16")];
	tensor<fp16, [1, 512, 1536]> x_105_cast_fp16 = add(x = x_95_cast_fp16, y = attn_output_55_cast_fp16)[name = string("x_105_cast_fp16")];
	int32 var_3042 = const()[name = string("op_3042"), val = int32(-1)];
	fp16 const_52_promoted_to_fp16 = const()[name = string("const_52_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> var_3044_cast_fp16 = mul(x = x_105_cast_fp16, y = const_52_promoted_to_fp16)[name = string("op_3044_cast_fp16")];
	bool input_155_interleave_0 = const()[name = string("input_155_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_155_cast_fp16 = concat(axis = var_3042, interleave = input_155_interleave_0, values = (x_105_cast_fp16, var_3044_cast_fp16))[name = string("input_155_cast_fp16")];
	tensor<int32, [1]> normed_157_axes_0 = const()[name = string("normed_157_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3039_to_fp16 = const()[name = string("op_3039_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_157_cast_fp16 = layer_norm(axes = normed_157_axes_0, epsilon = var_3039_to_fp16, x = input_155_cast_fp16)[name = string("normed_157_cast_fp16")];
	tensor<int32, [2]> var_3049_split_sizes_0 = const()[name = string("op_3049_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3049_axis_0 = const()[name = string("op_3049_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_3049_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_3049_cast_fp16_1 = split(axis = var_3049_axis_0, split_sizes = var_3049_split_sizes_0, x = normed_157_cast_fp16)[name = string("op_3049_cast_fp16")];
	tensor<fp16, [1536]> layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318149440)))];
	tensor<fp16, [1, 512, 1536]> h_39_cast_fp16 = mul(x = var_3049_cast_fp16_0, y = layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_39_cast_fp16")];
	tensor<int32, [3]> var_3060 = const()[name = string("op_3060"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_157_axes_0 = const()[name = string("input_157_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_3061 = transpose(perm = var_3060, x = h_39_cast_fp16)[name = string("transpose_39")];
	tensor<fp16, [1, 1536, 1, 512]> input_157 = expand_dims(axes = input_157_axes_0, x = var_3061)[name = string("input_157")];
	string gate_25_pad_type_0 = const()[name = string("gate_25_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gate_25_strides_0 = const()[name = string("gate_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gate_25_pad_0 = const()[name = string("gate_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gate_25_dilations_0 = const()[name = string("gate_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gate_25_groups_0 = const()[name = string("gate_25_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 512]> gate_25 = conv(dilations = gate_25_dilations_0, groups = gate_25_groups_0, pad = gate_25_pad_0, pad_type = gate_25_pad_type_0, strides = gate_25_strides_0, weight = layers_6_mlp_gate_proj_weight_palettized, x = input_157)[name = string("gate_25")];
	string up_13_pad_type_0 = const()[name = string("up_13_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> up_13_strides_0 = const()[name = string("up_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> up_13_pad_0 = const()[name = string("up_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> up_13_dilations_0 = const()[name = string("up_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 up_13_groups_0 = const()[name = string("up_13_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 512]> up_13 = conv(dilations = up_13_dilations_0, groups = up_13_groups_0, pad = up_13_pad_0, pad_type = up_13_pad_type_0, strides = up_13_strides_0, weight = layers_6_mlp_up_proj_weight_palettized, x = input_157)[name = string("up_13")];
	string gate_27_mode_0 = const()[name = string("gate_27_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 512]> gate_27 = gelu(mode = gate_27_mode_0, x = gate_25)[name = string("gate_27")];
	tensor<fp16, [1, 12288, 1, 512]> input_159 = mul(x = gate_27, y = up_13)[name = string("input_159")];
	string mlp_out_13_pad_type_0 = const()[name = string("mlp_out_13_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_out_13_strides_0 = const()[name = string("mlp_out_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_out_13_pad_0 = const()[name = string("mlp_out_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_out_13_dilations_0 = const()[name = string("mlp_out_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_out_13_groups_0 = const()[name = string("mlp_out_13_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 512]> mlp_out_13 = conv(dilations = mlp_out_13_dilations_0, groups = mlp_out_13_groups_0, pad = mlp_out_13_pad_0, pad_type = mlp_out_13_pad_type_0, strides = mlp_out_13_strides_0, weight = layers_6_mlp_down_proj_weight_palettized, x = input_159)[name = string("mlp_out_13")];
	tensor<int32, [1]> var_3101_axes_0 = const()[name = string("op_3101_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_3101 = squeeze(axes = var_3101_axes_0, x = mlp_out_13)[name = string("op_3101")];
	tensor<int32, [3]> var_3105 = const()[name = string("op_3105"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_3111 = const()[name = string("op_3111"), val = int32(-1)];
	fp16 const_53_promoted = const()[name = string("const_53_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_107 = transpose(perm = var_3105, x = var_3101)[name = string("transpose_38")];
	tensor<fp16, [1, 512, 1536]> var_3113 = mul(x = x_107, y = const_53_promoted)[name = string("op_3113")];
	bool input_161_interleave_0 = const()[name = string("input_161_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_161 = concat(axis = var_3111, interleave = input_161_interleave_0, values = (x_107, var_3113))[name = string("input_161")];
	tensor<int32, [1]> normed_161_axes_0 = const()[name = string("normed_161_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3108_to_fp16 = const()[name = string("op_3108_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_161_cast_fp16 = layer_norm(axes = normed_161_axes_0, epsilon = var_3108_to_fp16, x = input_161)[name = string("normed_161_cast_fp16")];
	tensor<int32, [2]> var_3118_split_sizes_0 = const()[name = string("op_3118_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3118_axis_0 = const()[name = string("op_3118_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_3118_0, tensor<fp16, [1, 512, 1536]> var_3118_1 = split(axis = var_3118_axis_0, split_sizes = var_3118_split_sizes_0, x = normed_161_cast_fp16)[name = string("op_3118")];
	tensor<fp16, [1, 512, 1536]> hidden_states_mlp_13 = mul(x = var_3118_0, y = layers_6_post_feedforward_layernorm_weight)[name = string("hidden_states_mlp_13")];
	tensor<fp16, [1, 512, 1536]> hidden_states_51_cast_fp16 = add(x = x_105_cast_fp16, y = hidden_states_mlp_13)[name = string("hidden_states_51_cast_fp16")];
	tensor<int32, [3]> per_layer_slice_13_begin_0 = const()[name = string("per_layer_slice_13_begin_0"), val = tensor<int32, [3]>([0, 0, 7936])];
	tensor<int32, [3]> per_layer_slice_13_end_0 = const()[name = string("per_layer_slice_13_end_0"), val = tensor<int32, [3]>([1, 512, 8192])];
	tensor<bool, [3]> per_layer_slice_13_end_mask_0 = const()[name = string("per_layer_slice_13_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 512, 256]> per_layer_slice_13_cast_fp16 = slice_by_index(begin = per_layer_slice_13_begin_0, end = per_layer_slice_13_end_0, end_mask = per_layer_slice_13_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_13_cast_fp16")];
	tensor<int32, [3]> var_3146 = const()[name = string("op_3146"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_163_axes_0 = const()[name = string("input_163_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_3147 = transpose(perm = var_3146, x = hidden_states_51_cast_fp16)[name = string("transpose_37")];
	tensor<fp16, [1, 1536, 1, 512]> input_163 = expand_dims(axes = input_163_axes_0, x = var_3147)[name = string("input_163")];
	string gated_37_pad_type_0 = const()[name = string("gated_37_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_37_strides_0 = const()[name = string("gated_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_37_pad_0 = const()[name = string("gated_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_37_dilations_0 = const()[name = string("gated_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_37_groups_0 = const()[name = string("gated_37_groups_0"), val = int32(1)];
	tensor<fp16, [1, 256, 1, 512]> gated_37 = conv(dilations = gated_37_dilations_0, groups = gated_37_groups_0, pad = gated_37_pad_0, pad_type = gated_37_pad_type_0, strides = gated_37_strides_0, weight = layers_6_per_layer_input_gate_weight_palettized, x = input_163)[name = string("gated_37")];
	string gated_39_mode_0 = const()[name = string("gated_39_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 256, 1, 512]> gated_39 = gelu(mode = gated_39_mode_0, x = gated_37)[name = string("gated_39")];
	tensor<int32, [3]> var_3166 = const()[name = string("op_3166"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> per_layer_slice_conv_13_axes_0 = const()[name = string("per_layer_slice_conv_13_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 256, 512]> var_3167_cast_fp16 = transpose(perm = var_3166, x = per_layer_slice_13_cast_fp16)[name = string("transpose_36")];
	tensor<fp16, [1, 256, 1, 512]> per_layer_slice_conv_13_cast_fp16 = expand_dims(axes = per_layer_slice_conv_13_axes_0, x = var_3167_cast_fp16)[name = string("per_layer_slice_conv_13_cast_fp16")];
	tensor<fp16, [1, 256, 1, 512]> input_165_cast_fp16 = mul(x = gated_39, y = per_layer_slice_conv_13_cast_fp16)[name = string("input_165_cast_fp16")];
	string gated_41_pad_type_0 = const()[name = string("gated_41_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_41_strides_0 = const()[name = string("gated_41_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_41_pad_0 = const()[name = string("gated_41_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_41_dilations_0 = const()[name = string("gated_41_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_41_groups_0 = const()[name = string("gated_41_groups_0"), val = int32(1)];
	tensor<fp16, [1536, 256, 1, 1]> layers_6_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318152576))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318349248))))[name = string("layers_6_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1536, 1, 512]> gated_41_cast_fp16 = conv(dilations = gated_41_dilations_0, groups = gated_41_groups_0, pad = gated_41_pad_0, pad_type = gated_41_pad_type_0, strides = gated_41_strides_0, weight = layers_6_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_165_cast_fp16)[name = string("gated_41_cast_fp16")];
	tensor<int32, [1]> var_3183_axes_0 = const()[name = string("op_3183_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_3183_cast_fp16 = squeeze(axes = var_3183_axes_0, x = gated_41_cast_fp16)[name = string("op_3183_cast_fp16")];
	tensor<int32, [3]> var_3187 = const()[name = string("op_3187"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_3193 = const()[name = string("op_3193"), val = int32(-1)];
	fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_109_cast_fp16 = transpose(perm = var_3187, x = var_3183_cast_fp16)[name = string("transpose_35")];
	tensor<fp16, [1, 512, 1536]> var_3195_cast_fp16 = mul(x = x_109_cast_fp16, y = const_54_promoted_to_fp16)[name = string("op_3195_cast_fp16")];
	bool input_167_interleave_0 = const()[name = string("input_167_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_167_cast_fp16 = concat(axis = var_3193, interleave = input_167_interleave_0, values = (x_109_cast_fp16, var_3195_cast_fp16))[name = string("input_167_cast_fp16")];
	tensor<int32, [1]> normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3190_to_fp16 = const()[name = string("op_3190_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_3190_to_fp16, x = input_167_cast_fp16)[name = string("normed_165_cast_fp16")];
	tensor<int32, [2]> var_3200_split_sizes_0 = const()[name = string("op_3200_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3200_axis_0 = const()[name = string("op_3200_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_3200_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_3200_cast_fp16_1 = split(axis = var_3200_axis_0, split_sizes = var_3200_split_sizes_0, x = normed_165_cast_fp16)[name = string("op_3200_cast_fp16")];
	tensor<fp16, [1536]> layers_6_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_6_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318350848)))];
	tensor<fp16, [1, 512, 1536]> hidden_states_55_cast_fp16 = mul(x = var_3200_cast_fp16_0, y = layers_6_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_55_cast_fp16")];
	tensor<fp16, [1, 512, 1536]> hidden_states_57_cast_fp16 = add(x = hidden_states_51_cast_fp16, y = hidden_states_55_cast_fp16)[name = string("hidden_states_57_cast_fp16")];
	tensor<fp16, [1]> const_55_promoted_to_fp16 = const()[name = string("const_55_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.a8p-1])];
	tensor<fp16, [1, 512, 1536]> x_111_cast_fp16 = mul(x = hidden_states_57_cast_fp16, y = const_55_promoted_to_fp16)[name = string("x_111_cast_fp16")];
	int32 var_3215 = const()[name = string("op_3215"), val = int32(-1)];
	fp16 const_56_promoted_to_fp16 = const()[name = string("const_56_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> var_3217_cast_fp16 = mul(x = x_111_cast_fp16, y = const_56_promoted_to_fp16)[name = string("op_3217_cast_fp16")];
	bool input_169_interleave_0 = const()[name = string("input_169_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_169_cast_fp16 = concat(axis = var_3215, interleave = input_169_interleave_0, values = (x_111_cast_fp16, var_3217_cast_fp16))[name = string("input_169_cast_fp16")];
	tensor<int32, [1]> normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3212_to_fp16 = const()[name = string("op_3212_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_3212_to_fp16, x = input_169_cast_fp16)[name = string("normed_169_cast_fp16")];
	tensor<int32, [2]> var_3222_split_sizes_0 = const()[name = string("op_3222_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3222_axis_0 = const()[name = string("op_3222_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_3222_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_3222_cast_fp16_1 = split(axis = var_3222_axis_0, split_sizes = var_3222_split_sizes_0, x = normed_169_cast_fp16)[name = string("op_3222_cast_fp16")];
	tensor<fp16, [1536]> layers_7_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318353984)))];
	tensor<fp16, [1, 512, 1536]> h_43_cast_fp16 = mul(x = var_3222_cast_fp16_0, y = layers_7_input_layernorm_weight_promoted_to_fp16)[name = string("h_43_cast_fp16")];
	tensor<int32, [3]> var_3228 = const()[name = string("op_3228"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_3231_axes_0 = const()[name = string("op_3231_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_3229_cast_fp16 = transpose(perm = var_3228, x = h_43_cast_fp16)[name = string("transpose_34")];
	tensor<fp16, [1, 1536, 1, 512]> var_3231_cast_fp16 = expand_dims(axes = var_3231_axes_0, x = var_3229_cast_fp16)[name = string("op_3231_cast_fp16")];
	string q_raw_15_pad_type_0 = const()[name = string("q_raw_15_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> q_raw_15_strides_0 = const()[name = string("q_raw_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> q_raw_15_pad_0 = const()[name = string("q_raw_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> q_raw_15_dilations_0 = const()[name = string("q_raw_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 q_raw_15_groups_0 = const()[name = string("q_raw_15_groups_0"), val = int32(1)];
	tensor<fp16, [1, 2048, 1, 512]> q_raw_15 = conv(dilations = q_raw_15_dilations_0, groups = q_raw_15_groups_0, pad = q_raw_15_pad_0, pad_type = q_raw_15_pad_type_0, strides = q_raw_15_strides_0, weight = layers_7_self_attn_q_proj_weight_palettized, x = var_3231_cast_fp16)[name = string("q_raw_15")];
	tensor<int32, [4]> var_3252 = const()[name = string("op_3252"), val = tensor<int32, [4]>([1, 8, 256, 512])];
	tensor<fp16, [1, 8, 256, 512]> var_3253 = reshape(shape = var_3252, x = q_raw_15)[name = string("op_3253")];
	tensor<int32, [4]> transpose_54_perm_0 = const()[name = string("transpose_54_perm_0"), val = tensor<int32, [4]>([0, 3, 1, 2])];
	tensor<int32, [3]> var_3276 = const()[name = string("op_3276"), val = tensor<int32, [3]>([512, 8, 256])];
	tensor<fp16, [1, 512, 8, 256]> transpose_54 = transpose(perm = transpose_54_perm_0, x = var_3253)[name = string("transpose_33")];
	tensor<fp16, [512, 8, 256]> x_113 = reshape(shape = var_3276, x = transpose_54)[name = string("x_113")];
	int32 var_3282 = const()[name = string("op_3282"), val = int32(-1)];
	fp16 const_57_promoted = const()[name = string("const_57_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [512, 8, 256]> var_3284 = mul(x = x_113, y = const_57_promoted)[name = string("op_3284")];
	bool input_173_interleave_0 = const()[name = string("input_173_interleave_0"), val = bool(false)];
	tensor<fp16, [512, 8, 512]> input_173 = concat(axis = var_3282, interleave = input_173_interleave_0, values = (x_113, var_3284))[name = string("input_173")];
	tensor<int32, [1]> normed_173_axes_0 = const()[name = string("normed_173_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3279_to_fp16 = const()[name = string("op_3279_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [512, 8, 512]> normed_173_cast_fp16 = layer_norm(axes = normed_173_axes_0, epsilon = var_3279_to_fp16, x = input_173)[name = string("normed_173_cast_fp16")];
	tensor<int32, [2]> var_3289_split_sizes_0 = const()[name = string("op_3289_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_3289_axis_0 = const()[name = string("op_3289_axis_0"), val = int32(-1)];
	tensor<fp16, [512, 8, 256]> var_3289_0, tensor<fp16, [512, 8, 256]> var_3289_1 = split(axis = var_3289_axis_0, split_sizes = var_3289_split_sizes_0, x = normed_173_cast_fp16)[name = string("op_3289")];
	tensor<fp16, [512, 8, 256]> q_59 = mul(x = var_3289_0, y = layers_0_self_attn_q_norm_weight)[name = string("q_59")];
	tensor<int32, [4]> var_3296 = const()[name = string("op_3296"), val = tensor<int32, [4]>([1, 512, 8, 256])];
	tensor<fp16, [1, 512, 8, 256]> var_3297 = reshape(shape = var_3296, x = q_59)[name = string("op_3297")];
	tensor<int32, [4]> var_3302 = const()[name = string("op_3302"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<fp16, [1, 8, 512, 256]> q_61 = transpose(perm = var_3302, x = var_3297)[name = string("transpose_32")];
	tensor<fp16, [1, 8, 512, 256]> var_3304_cast_fp16 = mul(x = q_61, y = cos_s)[name = string("op_3304_cast_fp16")];
	tensor<int32, [2]> var_3305_split_sizes_0 = const()[name = string("op_3305_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_3305_axis_0 = const()[name = string("op_3305_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 512, 128]> var_3305_0, tensor<fp16, [1, 8, 512, 128]> var_3305_1 = split(axis = var_3305_axis_0, split_sizes = var_3305_split_sizes_0, x = q_61)[name = string("op_3305")];
	fp16 const_58_promoted = const()[name = string("const_58_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 512, 128]> var_3307 = mul(x = var_3305_1, y = const_58_promoted)[name = string("op_3307")];
	int32 var_3309 = const()[name = string("op_3309"), val = int32(-1)];
	bool var_3310_interleave_0 = const()[name = string("op_3310_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 256]> var_3310 = concat(axis = var_3309, interleave = var_3310_interleave_0, values = (var_3307, var_3305_0))[name = string("op_3310")];
	tensor<fp16, [1, 8, 512, 256]> var_3311_cast_fp16 = mul(x = var_3310, y = sin_s)[name = string("op_3311_cast_fp16")];
	tensor<fp16, [1, 8, 512, 256]> q_63_cast_fp16 = add(x = var_3304_cast_fp16, y = var_3311_cast_fp16)[name = string("q_63_cast_fp16")];
	bool attn_weights_29_transpose_x_0 = const()[name = string("attn_weights_29_transpose_x_0"), val = bool(false)];
	bool attn_weights_29_transpose_y_0 = const()[name = string("attn_weights_29_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 512]> attn_weights_29_cast_fp16 = matmul(transpose_x = attn_weights_29_transpose_x_0, transpose_y = attn_weights_29_transpose_y_0, x = q_63_cast_fp16, y = transpose_41_cast_fp16)[name = string("attn_weights_29_cast_fp16")];
	tensor<fp16, [1, 8, 512, 512]> x_115_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = causal_mask)[name = string("x_115_cast_fp16")];
	tensor<int32, [1]> reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor<int32, [1]>([-1])];
	bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 512, 1]> reduce_max_7 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_115_cast_fp16)[name = string("reduce_max_7")];
	tensor<fp16, [1, 8, 512, 512]> var_3343 = sub(x = x_115_cast_fp16, y = reduce_max_7)[name = string("op_3343")];
	tensor<fp16, [1, 8, 512, 512]> var_3349 = exp(x = var_3343)[name = string("op_3349")];
	tensor<int32, [1]> var_3359_axes_0 = const()[name = string("op_3359_axes_0"), val = tensor<int32, [1]>([-1])];
	bool var_3359_keep_dims_0 = const()[name = string("op_3359_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 512, 1]> var_3359 = reduce_sum(axes = var_3359_axes_0, keep_dims = var_3359_keep_dims_0, x = var_3349)[name = string("op_3359")];
	tensor<fp16, [1, 8, 512, 512]> var_3365_cast_fp16 = real_div(x = var_3349, y = var_3359)[name = string("op_3365_cast_fp16")];
	bool attn_output_57_transpose_x_0 = const()[name = string("attn_output_57_transpose_x_0"), val = bool(false)];
	bool attn_output_57_transpose_y_0 = const()[name = string("attn_output_57_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 256]> attn_output_57_cast_fp16 = matmul(transpose_x = attn_output_57_transpose_x_0, transpose_y = attn_output_57_transpose_y_0, x = var_3365_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_57_cast_fp16")];
	tensor<int32, [4]> var_3376 = const()[name = string("op_3376"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_3383 = const()[name = string("op_3383"), val = tensor<int32, [3]>([1, 512, 2048])];
	tensor<fp16, [1, 512, 8, 256]> var_3377_cast_fp16 = transpose(perm = var_3376, x = attn_output_57_cast_fp16)[name = string("transpose_31")];
	tensor<fp16, [1, 512, 2048]> attn_output_59_cast_fp16 = reshape(shape = var_3383, x = var_3377_cast_fp16)[name = string("attn_output_59_cast_fp16")];
	tensor<int32, [3]> var_3388 = const()[name = string("op_3388"), val = tensor<int32, [3]>([0, 2, 1])];
	string var_3404_pad_type_0 = const()[name = string("op_3404_pad_type_0"), val = string("valid")];
	int32 var_3404_groups_0 = const()[name = string("op_3404_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_3404_strides_0 = const()[name = string("op_3404_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_3404_pad_0 = const()[name = string("op_3404_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_3404_dilations_0 = const()[name = string("op_3404_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1536, 2048, 1]> squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318357120))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319930048))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")];
	tensor<fp16, [1, 2048, 512]> var_3389_cast_fp16 = transpose(perm = var_3388, x = attn_output_59_cast_fp16)[name = string("transpose_30")];
	tensor<fp16, [1, 1536, 512]> var_3404_cast_fp16 = conv(dilations = var_3404_dilations_0, groups = var_3404_groups_0, pad = var_3404_pad_0, pad_type = var_3404_pad_type_0, strides = var_3404_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_3389_cast_fp16)[name = string("op_3404_cast_fp16")];
	tensor<int32, [3]> var_3408 = const()[name = string("op_3408"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_3414 = const()[name = string("op_3414"), val = int32(-1)];
	fp16 const_59_promoted_to_fp16 = const()[name = string("const_59_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_119_cast_fp16 = transpose(perm = var_3408, x = var_3404_cast_fp16)[name = string("transpose_29")];
	tensor<fp16, [1, 512, 1536]> var_3416_cast_fp16 = mul(x = x_119_cast_fp16, y = const_59_promoted_to_fp16)[name = string("op_3416_cast_fp16")];
	bool input_177_interleave_0 = const()[name = string("input_177_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_177_cast_fp16 = concat(axis = var_3414, interleave = input_177_interleave_0, values = (x_119_cast_fp16, var_3416_cast_fp16))[name = string("input_177_cast_fp16")];
	tensor<int32, [1]> normed_177_axes_0 = const()[name = string("normed_177_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3411_to_fp16 = const()[name = string("op_3411_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_177_cast_fp16 = layer_norm(axes = normed_177_axes_0, epsilon = var_3411_to_fp16, x = input_177_cast_fp16)[name = string("normed_177_cast_fp16")];
	tensor<int32, [2]> var_3421_split_sizes_0 = const()[name = string("op_3421_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3421_axis_0 = const()[name = string("op_3421_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_3421_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_3421_cast_fp16_1 = split(axis = var_3421_axis_0, split_sizes = var_3421_split_sizes_0, x = normed_177_cast_fp16)[name = string("op_3421_cast_fp16")];
	tensor<fp16, [1536]> layers_7_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319931648)))];
	tensor<fp16, [1, 512, 1536]> attn_output_63_cast_fp16 = mul(x = var_3421_cast_fp16_0, y = layers_7_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_63_cast_fp16")];
	tensor<fp16, [1, 512, 1536]> x_121_cast_fp16 = add(x = x_111_cast_fp16, y = attn_output_63_cast_fp16)[name = string("x_121_cast_fp16")];
	int32 var_3430 = const()[name = string("op_3430"), val = int32(-1)];
	fp16 const_60_promoted_to_fp16 = const()[name = string("const_60_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> var_3432_cast_fp16 = mul(x = x_121_cast_fp16, y = const_60_promoted_to_fp16)[name = string("op_3432_cast_fp16")];
	bool input_179_interleave_0 = const()[name = string("input_179_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_179_cast_fp16 = concat(axis = var_3430, interleave = input_179_interleave_0, values = (x_121_cast_fp16, var_3432_cast_fp16))[name = string("input_179_cast_fp16")];
	tensor<int32, [1]> normed_181_axes_0 = const()[name = string("normed_181_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3427_to_fp16 = const()[name = string("op_3427_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_181_cast_fp16 = layer_norm(axes = normed_181_axes_0, epsilon = var_3427_to_fp16, x = input_179_cast_fp16)[name = string("normed_181_cast_fp16")];
	tensor<int32, [2]> var_3437_split_sizes_0 = const()[name = string("op_3437_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3437_axis_0 = const()[name = string("op_3437_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_3437_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_3437_cast_fp16_1 = split(axis = var_3437_axis_0, split_sizes = var_3437_split_sizes_0, x = normed_181_cast_fp16)[name = string("op_3437_cast_fp16")];
	tensor<fp16, [1536]> layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319934784)))];
	tensor<fp16, [1, 512, 1536]> h_45_cast_fp16 = mul(x = var_3437_cast_fp16_0, y = layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_45_cast_fp16")];
	tensor<int32, [3]> var_3448 = const()[name = string("op_3448"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_181_axes_0 = const()[name = string("input_181_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_3449 = transpose(perm = var_3448, x = h_45_cast_fp16)[name = string("transpose_28")];
	tensor<fp16, [1, 1536, 1, 512]> input_181 = expand_dims(axes = input_181_axes_0, x = var_3449)[name = string("input_181")];
	string gate_29_pad_type_0 = const()[name = string("gate_29_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gate_29_strides_0 = const()[name = string("gate_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gate_29_pad_0 = const()[name = string("gate_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gate_29_dilations_0 = const()[name = string("gate_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gate_29_groups_0 = const()[name = string("gate_29_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 512]> gate_29 = conv(dilations = gate_29_dilations_0, groups = gate_29_groups_0, pad = gate_29_pad_0, pad_type = gate_29_pad_type_0, strides = gate_29_strides_0, weight = layers_7_mlp_gate_proj_weight_palettized, x = input_181)[name = string("gate_29")];
	string up_15_pad_type_0 = const()[name = string("up_15_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> up_15_strides_0 = const()[name = string("up_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> up_15_pad_0 = const()[name = string("up_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> up_15_dilations_0 = const()[name = string("up_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 up_15_groups_0 = const()[name = string("up_15_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 512]> up_15 = conv(dilations = up_15_dilations_0, groups = up_15_groups_0, pad = up_15_pad_0, pad_type = up_15_pad_type_0, strides = up_15_strides_0, weight = layers_7_mlp_up_proj_weight_palettized, x = input_181)[name = string("up_15")];
	string gate_31_mode_0 = const()[name = string("gate_31_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 512]> gate_31 = gelu(mode = gate_31_mode_0, x = gate_29)[name = string("gate_31")];
	tensor<fp16, [1, 12288, 1, 512]> input_183 = mul(x = gate_31, y = up_15)[name = string("input_183")];
	string mlp_out_15_pad_type_0 = const()[name = string("mlp_out_15_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_out_15_strides_0 = const()[name = string("mlp_out_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_out_15_pad_0 = const()[name = string("mlp_out_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_out_15_dilations_0 = const()[name = string("mlp_out_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_out_15_groups_0 = const()[name = string("mlp_out_15_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 512]> mlp_out_15 = conv(dilations = mlp_out_15_dilations_0, groups = mlp_out_15_groups_0, pad = mlp_out_15_pad_0, pad_type = mlp_out_15_pad_type_0, strides = mlp_out_15_strides_0, weight = layers_7_mlp_down_proj_weight_palettized, x = input_183)[name = string("mlp_out_15")];
	tensor<int32, [1]> var_3489_axes_0 = const()[name = string("op_3489_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_3489 = squeeze(axes = var_3489_axes_0, x = mlp_out_15)[name = string("op_3489")];
	tensor<int32, [3]> var_3493 = const()[name = string("op_3493"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_3499 = const()[name = string("op_3499"), val = int32(-1)];
	fp16 const_61_promoted = const()[name = string("const_61_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_123 = transpose(perm = var_3493, x = var_3489)[name = string("transpose_27")];
	tensor<fp16, [1, 512, 1536]> var_3501 = mul(x = x_123, y = const_61_promoted)[name = string("op_3501")];
	bool input_185_interleave_0 = const()[name = string("input_185_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_185 = concat(axis = var_3499, interleave = input_185_interleave_0, values = (x_123, var_3501))[name = string("input_185")];
	tensor<int32, [1]> normed_185_axes_0 = const()[name = string("normed_185_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3496_to_fp16 = const()[name = string("op_3496_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_185_cast_fp16 = layer_norm(axes = normed_185_axes_0, epsilon = var_3496_to_fp16, x = input_185)[name = string("normed_185_cast_fp16")];
	tensor<int32, [2]> var_3506_split_sizes_0 = const()[name = string("op_3506_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3506_axis_0 = const()[name = string("op_3506_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_3506_0, tensor<fp16, [1, 512, 1536]> var_3506_1 = split(axis = var_3506_axis_0, split_sizes = var_3506_split_sizes_0, x = normed_185_cast_fp16)[name = string("op_3506")];
	tensor<fp16, [1, 512, 1536]> hidden_states_mlp_15 = mul(x = var_3506_0, y = layers_7_post_feedforward_layernorm_weight)[name = string("hidden_states_mlp_15")];
	tensor<fp16, [1, 512, 1536]> hidden_states_59_cast_fp16 = add(x = x_121_cast_fp16, y = hidden_states_mlp_15)[name = string("hidden_states_59_cast_fp16")];
	tensor<int32, [3]> per_layer_slice_15_begin_0 = const()[name = string("per_layer_slice_15_begin_0"), val = tensor<int32, [3]>([0, 0, 8192])];
	tensor<int32, [3]> per_layer_slice_15_end_0 = const()[name = string("per_layer_slice_15_end_0"), val = tensor<int32, [3]>([1, 512, 8448])];
	tensor<bool, [3]> per_layer_slice_15_end_mask_0 = const()[name = string("per_layer_slice_15_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 512, 256]> per_layer_slice_15_cast_fp16 = slice_by_index(begin = per_layer_slice_15_begin_0, end = per_layer_slice_15_end_0, end_mask = per_layer_slice_15_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_15_cast_fp16")];
	tensor<int32, [3]> var_3534 = const()[name = string("op_3534"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_187_axes_0 = const()[name = string("input_187_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_3535 = transpose(perm = var_3534, x = hidden_states_59_cast_fp16)[name = string("transpose_26")];
	tensor<fp16, [1, 1536, 1, 512]> input_187 = expand_dims(axes = input_187_axes_0, x = var_3535)[name = string("input_187")];
	string gated_43_pad_type_0 = const()[name = string("gated_43_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_43_strides_0 = const()[name = string("gated_43_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_43_pad_0 = const()[name = string("gated_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_43_dilations_0 = const()[name = string("gated_43_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_43_groups_0 = const()[name = string("gated_43_groups_0"), val = int32(1)];
	tensor<fp16, [1, 256, 1, 512]> gated_43 = conv(dilations = gated_43_dilations_0, groups = gated_43_groups_0, pad = gated_43_pad_0, pad_type = gated_43_pad_type_0, strides = gated_43_strides_0, weight = layers_7_per_layer_input_gate_weight_palettized, x = input_187)[name = string("gated_43")];
	string gated_45_mode_0 = const()[name = string("gated_45_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 256, 1, 512]> gated_45 = gelu(mode = gated_45_mode_0, x = gated_43)[name = string("gated_45")];
	tensor<int32, [3]> var_3554 = const()[name = string("op_3554"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> per_layer_slice_conv_15_axes_0 = const()[name = string("per_layer_slice_conv_15_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 256, 512]> var_3555_cast_fp16 = transpose(perm = var_3554, x = per_layer_slice_15_cast_fp16)[name = string("transpose_25")];
	tensor<fp16, [1, 256, 1, 512]> per_layer_slice_conv_15_cast_fp16 = expand_dims(axes = per_layer_slice_conv_15_axes_0, x = var_3555_cast_fp16)[name = string("per_layer_slice_conv_15_cast_fp16")];
	tensor<fp16, [1, 256, 1, 512]> input_189_cast_fp16 = mul(x = gated_45, y = per_layer_slice_conv_15_cast_fp16)[name = string("input_189_cast_fp16")];
	string gated_47_pad_type_0 = const()[name = string("gated_47_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_47_strides_0 = const()[name = string("gated_47_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_47_pad_0 = const()[name = string("gated_47_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_47_dilations_0 = const()[name = string("gated_47_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_47_groups_0 = const()[name = string("gated_47_groups_0"), val = int32(1)];
	tensor<fp16, [1536, 256, 1, 1]> layers_7_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319937920))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320134592))))[name = string("layers_7_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1536, 1, 512]> gated_47_cast_fp16 = conv(dilations = gated_47_dilations_0, groups = gated_47_groups_0, pad = gated_47_pad_0, pad_type = gated_47_pad_type_0, strides = gated_47_strides_0, weight = layers_7_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_189_cast_fp16)[name = string("gated_47_cast_fp16")];
	tensor<int32, [1]> var_3571_axes_0 = const()[name = string("op_3571_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_3571_cast_fp16 = squeeze(axes = var_3571_axes_0, x = gated_47_cast_fp16)[name = string("op_3571_cast_fp16")];
	tensor<int32, [3]> var_3575 = const()[name = string("op_3575"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_3581 = const()[name = string("op_3581"), val = int32(-1)];
	fp16 const_62_promoted_to_fp16 = const()[name = string("const_62_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_125_cast_fp16 = transpose(perm = var_3575, x = var_3571_cast_fp16)[name = string("transpose_24")];
	tensor<fp16, [1, 512, 1536]> var_3583_cast_fp16 = mul(x = x_125_cast_fp16, y = const_62_promoted_to_fp16)[name = string("op_3583_cast_fp16")];
	bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_191_cast_fp16 = concat(axis = var_3581, interleave = input_191_interleave_0, values = (x_125_cast_fp16, var_3583_cast_fp16))[name = string("input_191_cast_fp16")];
	tensor<int32, [1]> normed_189_axes_0 = const()[name = string("normed_189_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3578_to_fp16 = const()[name = string("op_3578_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_189_cast_fp16 = layer_norm(axes = normed_189_axes_0, epsilon = var_3578_to_fp16, x = input_191_cast_fp16)[name = string("normed_189_cast_fp16")];
	tensor<int32, [2]> var_3588_split_sizes_0 = const()[name = string("op_3588_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3588_axis_0 = const()[name = string("op_3588_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_3588_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_3588_cast_fp16_1 = split(axis = var_3588_axis_0, split_sizes = var_3588_split_sizes_0, x = normed_189_cast_fp16)[name = string("op_3588_cast_fp16")];
	tensor<fp16, [1536]> layers_7_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_7_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320136192)))];
	tensor<fp16, [1, 512, 1536]> hidden_states_63_cast_fp16 = mul(x = var_3588_cast_fp16_0, y = layers_7_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_63_cast_fp16")];
	tensor<fp16, [1, 512, 1536]> hidden_states_65_cast_fp16 = add(x = hidden_states_59_cast_fp16, y = hidden_states_63_cast_fp16)[name = string("hidden_states_65_cast_fp16")];
	tensor<fp16, [1]> const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.bep-1])];
	tensor<fp16, [1, 512, 1536]> x_127_cast_fp16 = mul(x = hidden_states_65_cast_fp16, y = const_63_promoted_to_fp16)[name = string("x_127_cast_fp16")];
	int32 var_3603 = const()[name = string("op_3603"), val = int32(-1)];
	fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> var_3605_cast_fp16 = mul(x = x_127_cast_fp16, y = const_64_promoted_to_fp16)[name = string("op_3605_cast_fp16")];
	bool input_193_interleave_0 = const()[name = string("input_193_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_193_cast_fp16 = concat(axis = var_3603, interleave = input_193_interleave_0, values = (x_127_cast_fp16, var_3605_cast_fp16))[name = string("input_193_cast_fp16")];
	tensor<int32, [1]> normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3600_to_fp16 = const()[name = string("op_3600_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_3600_to_fp16, x = input_193_cast_fp16)[name = string("normed_193_cast_fp16")];
	tensor<int32, [2]> var_3610_split_sizes_0 = const()[name = string("op_3610_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3610_axis_0 = const()[name = string("op_3610_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_3610_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_3610_cast_fp16_1 = split(axis = var_3610_axis_0, split_sizes = var_3610_split_sizes_0, x = normed_193_cast_fp16)[name = string("op_3610_cast_fp16")];
	tensor<fp16, [1536]> layers_8_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320139328)))];
	tensor<fp16, [1, 512, 1536]> h_49_cast_fp16 = mul(x = var_3610_cast_fp16_0, y = layers_8_input_layernorm_weight_promoted_to_fp16)[name = string("h_49_cast_fp16")];
	tensor<int32, [3]> var_3616 = const()[name = string("op_3616"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_3619_axes_0 = const()[name = string("op_3619_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_3617_cast_fp16 = transpose(perm = var_3616, x = h_49_cast_fp16)[name = string("transpose_23")];
	tensor<fp16, [1, 1536, 1, 512]> var_3619_cast_fp16 = expand_dims(axes = var_3619_axes_0, x = var_3617_cast_fp16)[name = string("op_3619_cast_fp16")];
	string q_raw_17_pad_type_0 = const()[name = string("q_raw_17_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> q_raw_17_strides_0 = const()[name = string("q_raw_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> q_raw_17_pad_0 = const()[name = string("q_raw_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> q_raw_17_dilations_0 = const()[name = string("q_raw_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 q_raw_17_groups_0 = const()[name = string("q_raw_17_groups_0"), val = int32(1)];
	tensor<fp16, [1, 2048, 1, 512]> q_raw_17 = conv(dilations = q_raw_17_dilations_0, groups = q_raw_17_groups_0, pad = q_raw_17_pad_0, pad_type = q_raw_17_pad_type_0, strides = q_raw_17_strides_0, weight = layers_8_self_attn_q_proj_weight_palettized, x = var_3619_cast_fp16)[name = string("q_raw_17")];
	tensor<int32, [4]> var_3640 = const()[name = string("op_3640"), val = tensor<int32, [4]>([1, 8, 256, 512])];
	tensor<fp16, [1, 8, 256, 512]> var_3641 = reshape(shape = var_3640, x = q_raw_17)[name = string("op_3641")];
	tensor<int32, [4]> transpose_56_perm_0 = const()[name = string("transpose_56_perm_0"), val = tensor<int32, [4]>([0, 3, 1, 2])];
	tensor<int32, [3]> var_3664 = const()[name = string("op_3664"), val = tensor<int32, [3]>([512, 8, 256])];
	tensor<fp16, [1, 512, 8, 256]> transpose_56 = transpose(perm = transpose_56_perm_0, x = var_3641)[name = string("transpose_22")];
	tensor<fp16, [512, 8, 256]> x_129 = reshape(shape = var_3664, x = transpose_56)[name = string("x_129")];
	int32 var_3670 = const()[name = string("op_3670"), val = int32(-1)];
	fp16 const_65_promoted = const()[name = string("const_65_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [512, 8, 256]> var_3672 = mul(x = x_129, y = const_65_promoted)[name = string("op_3672")];
	bool input_197_interleave_0 = const()[name = string("input_197_interleave_0"), val = bool(false)];
	tensor<fp16, [512, 8, 512]> input_197 = concat(axis = var_3670, interleave = input_197_interleave_0, values = (x_129, var_3672))[name = string("input_197")];
	tensor<int32, [1]> normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3667_to_fp16 = const()[name = string("op_3667_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [512, 8, 512]> normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_3667_to_fp16, x = input_197)[name = string("normed_197_cast_fp16")];
	tensor<int32, [2]> var_3677_split_sizes_0 = const()[name = string("op_3677_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_3677_axis_0 = const()[name = string("op_3677_axis_0"), val = int32(-1)];
	tensor<fp16, [512, 8, 256]> var_3677_0, tensor<fp16, [512, 8, 256]> var_3677_1 = split(axis = var_3677_axis_0, split_sizes = var_3677_split_sizes_0, x = normed_197_cast_fp16)[name = string("op_3677")];
	tensor<fp16, [512, 8, 256]> q_67 = mul(x = var_3677_0, y = layers_0_self_attn_q_norm_weight)[name = string("q_67")];
	tensor<int32, [4]> var_3684 = const()[name = string("op_3684"), val = tensor<int32, [4]>([1, 512, 8, 256])];
	tensor<fp16, [1, 512, 8, 256]> var_3685 = reshape(shape = var_3684, x = q_67)[name = string("op_3685")];
	tensor<int32, [4]> var_3690 = const()[name = string("op_3690"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<fp16, [1, 8, 512, 256]> q_69 = transpose(perm = var_3690, x = var_3685)[name = string("transpose_21")];
	tensor<fp16, [1, 8, 512, 256]> var_3692_cast_fp16 = mul(x = q_69, y = cos_s)[name = string("op_3692_cast_fp16")];
	tensor<int32, [2]> var_3693_split_sizes_0 = const()[name = string("op_3693_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_3693_axis_0 = const()[name = string("op_3693_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 512, 128]> var_3693_0, tensor<fp16, [1, 8, 512, 128]> var_3693_1 = split(axis = var_3693_axis_0, split_sizes = var_3693_split_sizes_0, x = q_69)[name = string("op_3693")];
	fp16 const_66_promoted = const()[name = string("const_66_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 512, 128]> var_3695 = mul(x = var_3693_1, y = const_66_promoted)[name = string("op_3695")];
	int32 var_3697 = const()[name = string("op_3697"), val = int32(-1)];
	bool var_3698_interleave_0 = const()[name = string("op_3698_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 256]> var_3698 = concat(axis = var_3697, interleave = var_3698_interleave_0, values = (var_3695, var_3693_0))[name = string("op_3698")];
	tensor<fp16, [1, 8, 512, 256]> var_3699_cast_fp16 = mul(x = var_3698, y = sin_s)[name = string("op_3699_cast_fp16")];
	tensor<fp16, [1, 8, 512, 256]> q_71_cast_fp16 = add(x = var_3692_cast_fp16, y = var_3699_cast_fp16)[name = string("q_71_cast_fp16")];
	bool attn_weights_33_transpose_x_0 = const()[name = string("attn_weights_33_transpose_x_0"), val = bool(false)];
	bool attn_weights_33_transpose_y_0 = const()[name = string("attn_weights_33_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 512]> attn_weights_33_cast_fp16 = matmul(transpose_x = attn_weights_33_transpose_x_0, transpose_y = attn_weights_33_transpose_y_0, x = q_71_cast_fp16, y = transpose_41_cast_fp16)[name = string("attn_weights_33_cast_fp16")];
	tensor<fp16, [1, 8, 512, 512]> x_131_cast_fp16 = add(x = attn_weights_33_cast_fp16, y = causal_mask)[name = string("x_131_cast_fp16")];
	tensor<int32, [1]> reduce_max_8_axes_0 = const()[name = string("reduce_max_8_axes_0"), val = tensor<int32, [1]>([-1])];
	bool reduce_max_8_keep_dims_0 = const()[name = string("reduce_max_8_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 512, 1]> reduce_max_8 = reduce_max(axes = reduce_max_8_axes_0, keep_dims = reduce_max_8_keep_dims_0, x = x_131_cast_fp16)[name = string("reduce_max_8")];
	tensor<fp16, [1, 8, 512, 512]> var_3731 = sub(x = x_131_cast_fp16, y = reduce_max_8)[name = string("op_3731")];
	tensor<fp16, [1, 8, 512, 512]> var_3737 = exp(x = var_3731)[name = string("op_3737")];
	tensor<int32, [1]> var_3747_axes_0 = const()[name = string("op_3747_axes_0"), val = tensor<int32, [1]>([-1])];
	bool var_3747_keep_dims_0 = const()[name = string("op_3747_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 512, 1]> var_3747 = reduce_sum(axes = var_3747_axes_0, keep_dims = var_3747_keep_dims_0, x = var_3737)[name = string("op_3747")];
	tensor<fp16, [1, 8, 512, 512]> var_3753_cast_fp16 = real_div(x = var_3737, y = var_3747)[name = string("op_3753_cast_fp16")];
	bool attn_output_65_transpose_x_0 = const()[name = string("attn_output_65_transpose_x_0"), val = bool(false)];
	bool attn_output_65_transpose_y_0 = const()[name = string("attn_output_65_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 256]> attn_output_65_cast_fp16 = matmul(transpose_x = attn_output_65_transpose_x_0, transpose_y = attn_output_65_transpose_y_0, x = var_3753_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_65_cast_fp16")];
	tensor<int32, [4]> var_3764 = const()[name = string("op_3764"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_3771 = const()[name = string("op_3771"), val = tensor<int32, [3]>([1, 512, 2048])];
	tensor<fp16, [1, 512, 8, 256]> var_3765_cast_fp16 = transpose(perm = var_3764, x = attn_output_65_cast_fp16)[name = string("transpose_20")];
	tensor<fp16, [1, 512, 2048]> attn_output_67_cast_fp16 = reshape(shape = var_3771, x = var_3765_cast_fp16)[name = string("attn_output_67_cast_fp16")];
	tensor<int32, [3]> var_3776 = const()[name = string("op_3776"), val = tensor<int32, [3]>([0, 2, 1])];
	string var_3792_pad_type_0 = const()[name = string("op_3792_pad_type_0"), val = string("valid")];
	int32 var_3792_groups_0 = const()[name = string("op_3792_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_3792_strides_0 = const()[name = string("op_3792_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_3792_pad_0 = const()[name = string("op_3792_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_3792_dilations_0 = const()[name = string("op_3792_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1536, 2048, 1]> squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320142464))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321715392))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")];
	tensor<fp16, [1, 2048, 512]> var_3777_cast_fp16 = transpose(perm = var_3776, x = attn_output_67_cast_fp16)[name = string("transpose_19")];
	tensor<fp16, [1, 1536, 512]> var_3792_cast_fp16 = conv(dilations = var_3792_dilations_0, groups = var_3792_groups_0, pad = var_3792_pad_0, pad_type = var_3792_pad_type_0, strides = var_3792_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_3777_cast_fp16)[name = string("op_3792_cast_fp16")];
	tensor<int32, [3]> var_3796 = const()[name = string("op_3796"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_3802 = const()[name = string("op_3802"), val = int32(-1)];
	fp16 const_67_promoted_to_fp16 = const()[name = string("const_67_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_135_cast_fp16 = transpose(perm = var_3796, x = var_3792_cast_fp16)[name = string("transpose_18")];
	tensor<fp16, [1, 512, 1536]> var_3804_cast_fp16 = mul(x = x_135_cast_fp16, y = const_67_promoted_to_fp16)[name = string("op_3804_cast_fp16")];
	bool input_201_interleave_0 = const()[name = string("input_201_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_201_cast_fp16 = concat(axis = var_3802, interleave = input_201_interleave_0, values = (x_135_cast_fp16, var_3804_cast_fp16))[name = string("input_201_cast_fp16")];
	tensor<int32, [1]> normed_201_axes_0 = const()[name = string("normed_201_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3799_to_fp16 = const()[name = string("op_3799_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_201_cast_fp16 = layer_norm(axes = normed_201_axes_0, epsilon = var_3799_to_fp16, x = input_201_cast_fp16)[name = string("normed_201_cast_fp16")];
	tensor<int32, [2]> var_3809_split_sizes_0 = const()[name = string("op_3809_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3809_axis_0 = const()[name = string("op_3809_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_3809_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_3809_cast_fp16_1 = split(axis = var_3809_axis_0, split_sizes = var_3809_split_sizes_0, x = normed_201_cast_fp16)[name = string("op_3809_cast_fp16")];
	tensor<fp16, [1536]> layers_8_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321716992)))];
	tensor<fp16, [1, 512, 1536]> attn_output_71_cast_fp16 = mul(x = var_3809_cast_fp16_0, y = layers_8_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_71_cast_fp16")];
	tensor<fp16, [1, 512, 1536]> x_137_cast_fp16 = add(x = x_127_cast_fp16, y = attn_output_71_cast_fp16)[name = string("x_137_cast_fp16")];
	int32 var_3818 = const()[name = string("op_3818"), val = int32(-1)];
	fp16 const_68_promoted_to_fp16 = const()[name = string("const_68_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> var_3820_cast_fp16 = mul(x = x_137_cast_fp16, y = const_68_promoted_to_fp16)[name = string("op_3820_cast_fp16")];
	bool input_203_interleave_0 = const()[name = string("input_203_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_203_cast_fp16 = concat(axis = var_3818, interleave = input_203_interleave_0, values = (x_137_cast_fp16, var_3820_cast_fp16))[name = string("input_203_cast_fp16")];
	tensor<int32, [1]> normed_205_axes_0 = const()[name = string("normed_205_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3815_to_fp16 = const()[name = string("op_3815_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_205_cast_fp16 = layer_norm(axes = normed_205_axes_0, epsilon = var_3815_to_fp16, x = input_203_cast_fp16)[name = string("normed_205_cast_fp16")];
	tensor<int32, [2]> var_3825_split_sizes_0 = const()[name = string("op_3825_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3825_axis_0 = const()[name = string("op_3825_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_3825_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_3825_cast_fp16_1 = split(axis = var_3825_axis_0, split_sizes = var_3825_split_sizes_0, x = normed_205_cast_fp16)[name = string("op_3825_cast_fp16")];
	tensor<fp16, [1536]> layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321720128)))];
	tensor<fp16, [1, 512, 1536]> h_51_cast_fp16 = mul(x = var_3825_cast_fp16_0, y = layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_51_cast_fp16")];
	tensor<int32, [3]> var_3836 = const()[name = string("op_3836"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_205_axes_0 = const()[name = string("input_205_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_3837 = transpose(perm = var_3836, x = h_51_cast_fp16)[name = string("transpose_17")];
	tensor<fp16, [1, 1536, 1, 512]> input_205 = expand_dims(axes = input_205_axes_0, x = var_3837)[name = string("input_205")];
	string gate_33_pad_type_0 = const()[name = string("gate_33_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gate_33_strides_0 = const()[name = string("gate_33_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gate_33_pad_0 = const()[name = string("gate_33_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gate_33_dilations_0 = const()[name = string("gate_33_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gate_33_groups_0 = const()[name = string("gate_33_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 512]> gate_33 = conv(dilations = gate_33_dilations_0, groups = gate_33_groups_0, pad = gate_33_pad_0, pad_type = gate_33_pad_type_0, strides = gate_33_strides_0, weight = layers_8_mlp_gate_proj_weight_palettized, x = input_205)[name = string("gate_33")];
	string up_17_pad_type_0 = const()[name = string("up_17_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> up_17_strides_0 = const()[name = string("up_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> up_17_pad_0 = const()[name = string("up_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> up_17_dilations_0 = const()[name = string("up_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 up_17_groups_0 = const()[name = string("up_17_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 512]> up_17 = conv(dilations = up_17_dilations_0, groups = up_17_groups_0, pad = up_17_pad_0, pad_type = up_17_pad_type_0, strides = up_17_strides_0, weight = layers_8_mlp_up_proj_weight_palettized, x = input_205)[name = string("up_17")];
	string gate_35_mode_0 = const()[name = string("gate_35_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 512]> gate_35 = gelu(mode = gate_35_mode_0, x = gate_33)[name = string("gate_35")];
	tensor<fp16, [1, 12288, 1, 512]> input_207 = mul(x = gate_35, y = up_17)[name = string("input_207")];
	string mlp_out_17_pad_type_0 = const()[name = string("mlp_out_17_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_out_17_strides_0 = const()[name = string("mlp_out_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_out_17_pad_0 = const()[name = string("mlp_out_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_out_17_dilations_0 = const()[name = string("mlp_out_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_out_17_groups_0 = const()[name = string("mlp_out_17_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 512]> mlp_out_17 = conv(dilations = mlp_out_17_dilations_0, groups = mlp_out_17_groups_0, pad = mlp_out_17_pad_0, pad_type = mlp_out_17_pad_type_0, strides = mlp_out_17_strides_0, weight = layers_8_mlp_down_proj_weight_palettized, x = input_207)[name = string("mlp_out_17")];
	tensor<int32, [1]> var_3877_axes_0 = const()[name = string("op_3877_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_3877 = squeeze(axes = var_3877_axes_0, x = mlp_out_17)[name = string("op_3877")];
	tensor<int32, [3]> var_3881 = const()[name = string("op_3881"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_3887 = const()[name = string("op_3887"), val = int32(-1)];
	fp16 const_69_promoted = const()[name = string("const_69_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_139 = transpose(perm = var_3881, x = var_3877)[name = string("transpose_16")];
	tensor<fp16, [1, 512, 1536]> var_3889 = mul(x = x_139, y = const_69_promoted)[name = string("op_3889")];
	bool input_209_interleave_0 = const()[name = string("input_209_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_209 = concat(axis = var_3887, interleave = input_209_interleave_0, values = (x_139, var_3889))[name = string("input_209")];
	tensor<int32, [1]> normed_209_axes_0 = const()[name = string("normed_209_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3884_to_fp16 = const()[name = string("op_3884_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_209_cast_fp16 = layer_norm(axes = normed_209_axes_0, epsilon = var_3884_to_fp16, x = input_209)[name = string("normed_209_cast_fp16")];
	tensor<int32, [2]> var_3894_split_sizes_0 = const()[name = string("op_3894_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3894_axis_0 = const()[name = string("op_3894_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_3894_0, tensor<fp16, [1, 512, 1536]> var_3894_1 = split(axis = var_3894_axis_0, split_sizes = var_3894_split_sizes_0, x = normed_209_cast_fp16)[name = string("op_3894")];
	tensor<fp16, [1, 512, 1536]> hidden_states_mlp_17 = mul(x = var_3894_0, y = layers_8_post_feedforward_layernorm_weight)[name = string("hidden_states_mlp_17")];
	tensor<fp16, [1, 512, 1536]> hidden_states_67_cast_fp16 = add(x = x_137_cast_fp16, y = hidden_states_mlp_17)[name = string("hidden_states_67_cast_fp16")];
	tensor<int32, [3]> per_layer_slice_17_begin_0 = const()[name = string("per_layer_slice_17_begin_0"), val = tensor<int32, [3]>([0, 0, 8448])];
	tensor<int32, [3]> per_layer_slice_17_end_0 = const()[name = string("per_layer_slice_17_end_0"), val = tensor<int32, [3]>([1, 512, 8704])];
	tensor<bool, [3]> per_layer_slice_17_end_mask_0 = const()[name = string("per_layer_slice_17_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 512, 256]> per_layer_slice_17_cast_fp16 = slice_by_index(begin = per_layer_slice_17_begin_0, end = per_layer_slice_17_end_0, end_mask = per_layer_slice_17_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_17_cast_fp16")];
	tensor<int32, [3]> var_3922 = const()[name = string("op_3922"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_211_axes_0 = const()[name = string("input_211_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_3923 = transpose(perm = var_3922, x = hidden_states_67_cast_fp16)[name = string("transpose_15")];
	tensor<fp16, [1, 1536, 1, 512]> input_211 = expand_dims(axes = input_211_axes_0, x = var_3923)[name = string("input_211")];
	string gated_49_pad_type_0 = const()[name = string("gated_49_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_49_strides_0 = const()[name = string("gated_49_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_49_pad_0 = const()[name = string("gated_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_49_dilations_0 = const()[name = string("gated_49_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_49_groups_0 = const()[name = string("gated_49_groups_0"), val = int32(1)];
	tensor<fp16, [1, 256, 1, 512]> gated_49 = conv(dilations = gated_49_dilations_0, groups = gated_49_groups_0, pad = gated_49_pad_0, pad_type = gated_49_pad_type_0, strides = gated_49_strides_0, weight = layers_8_per_layer_input_gate_weight_palettized, x = input_211)[name = string("gated_49")];
	string gated_51_mode_0 = const()[name = string("gated_51_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 256, 1, 512]> gated_51 = gelu(mode = gated_51_mode_0, x = gated_49)[name = string("gated_51")];
	tensor<int32, [3]> var_3942 = const()[name = string("op_3942"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> per_layer_slice_conv_17_axes_0 = const()[name = string("per_layer_slice_conv_17_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 256, 512]> var_3943_cast_fp16 = transpose(perm = var_3942, x = per_layer_slice_17_cast_fp16)[name = string("transpose_14")];
	tensor<fp16, [1, 256, 1, 512]> per_layer_slice_conv_17_cast_fp16 = expand_dims(axes = per_layer_slice_conv_17_axes_0, x = var_3943_cast_fp16)[name = string("per_layer_slice_conv_17_cast_fp16")];
	tensor<fp16, [1, 256, 1, 512]> input_213_cast_fp16 = mul(x = gated_51, y = per_layer_slice_conv_17_cast_fp16)[name = string("input_213_cast_fp16")];
	string gated_53_pad_type_0 = const()[name = string("gated_53_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_53_strides_0 = const()[name = string("gated_53_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_53_pad_0 = const()[name = string("gated_53_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_53_dilations_0 = const()[name = string("gated_53_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_53_groups_0 = const()[name = string("gated_53_groups_0"), val = int32(1)];
	tensor<fp16, [1536, 256, 1, 1]> layers_8_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321723264))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321919936))))[name = string("layers_8_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1536, 1, 512]> gated_53_cast_fp16 = conv(dilations = gated_53_dilations_0, groups = gated_53_groups_0, pad = gated_53_pad_0, pad_type = gated_53_pad_type_0, strides = gated_53_strides_0, weight = layers_8_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_213_cast_fp16)[name = string("gated_53_cast_fp16")];
	tensor<int32, [1]> var_3959_axes_0 = const()[name = string("op_3959_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_3959_cast_fp16 = squeeze(axes = var_3959_axes_0, x = gated_53_cast_fp16)[name = string("op_3959_cast_fp16")];
	tensor<int32, [3]> var_3963 = const()[name = string("op_3963"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_3969 = const()[name = string("op_3969"), val = int32(-1)];
	fp16 const_70_promoted_to_fp16 = const()[name = string("const_70_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_141_cast_fp16 = transpose(perm = var_3963, x = var_3959_cast_fp16)[name = string("transpose_13")];
	tensor<fp16, [1, 512, 1536]> var_3971_cast_fp16 = mul(x = x_141_cast_fp16, y = const_70_promoted_to_fp16)[name = string("op_3971_cast_fp16")];
	bool input_215_interleave_0 = const()[name = string("input_215_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_215_cast_fp16 = concat(axis = var_3969, interleave = input_215_interleave_0, values = (x_141_cast_fp16, var_3971_cast_fp16))[name = string("input_215_cast_fp16")];
	tensor<int32, [1]> normed_213_axes_0 = const()[name = string("normed_213_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3966_to_fp16 = const()[name = string("op_3966_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_213_cast_fp16 = layer_norm(axes = normed_213_axes_0, epsilon = var_3966_to_fp16, x = input_215_cast_fp16)[name = string("normed_213_cast_fp16")];
	tensor<int32, [2]> var_3976_split_sizes_0 = const()[name = string("op_3976_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3976_axis_0 = const()[name = string("op_3976_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_3976_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_3976_cast_fp16_1 = split(axis = var_3976_axis_0, split_sizes = var_3976_split_sizes_0, x = normed_213_cast_fp16)[name = string("op_3976_cast_fp16")];
	tensor<fp16, [1536]> layers_8_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_8_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321921536)))];
	tensor<fp16, [1, 512, 1536]> hidden_states_71_cast_fp16 = mul(x = var_3976_cast_fp16_0, y = layers_8_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_71_cast_fp16")];
	tensor<fp16, [1, 512, 1536]> hidden_states_73_cast_fp16 = add(x = hidden_states_67_cast_fp16, y = hidden_states_71_cast_fp16)[name = string("hidden_states_73_cast_fp16")];
	tensor<fp16, [1]> const_71_promoted_to_fp16 = const()[name = string("const_71_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.64p-1])];
	tensor<fp16, [1, 512, 1536]> x_143_cast_fp16 = mul(x = hidden_states_73_cast_fp16, y = const_71_promoted_to_fp16)[name = string("x_143_cast_fp16")];
	int32 var_3991 = const()[name = string("op_3991"), val = int32(-1)];
	fp16 const_72_promoted_to_fp16 = const()[name = string("const_72_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> var_3993_cast_fp16 = mul(x = x_143_cast_fp16, y = const_72_promoted_to_fp16)[name = string("op_3993_cast_fp16")];
	bool input_217_interleave_0 = const()[name = string("input_217_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_217_cast_fp16 = concat(axis = var_3991, interleave = input_217_interleave_0, values = (x_143_cast_fp16, var_3993_cast_fp16))[name = string("input_217_cast_fp16")];
	tensor<int32, [1]> normed_217_axes_0 = const()[name = string("normed_217_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3988_to_fp16 = const()[name = string("op_3988_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_217_cast_fp16 = layer_norm(axes = normed_217_axes_0, epsilon = var_3988_to_fp16, x = input_217_cast_fp16)[name = string("normed_217_cast_fp16")];
	tensor<int32, [2]> var_3998_split_sizes_0 = const()[name = string("op_3998_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3998_axis_0 = const()[name = string("op_3998_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_3998_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_3998_cast_fp16_1 = split(axis = var_3998_axis_0, split_sizes = var_3998_split_sizes_0, x = normed_217_cast_fp16)[name = string("op_3998_cast_fp16")];
	tensor<fp16, [1536]> layers_9_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_9_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321924672)))];
	tensor<fp16, [1, 512, 1536]> h_55_cast_fp16 = mul(x = var_3998_cast_fp16_0, y = layers_9_input_layernorm_weight_promoted_to_fp16)[name = string("h_55_cast_fp16")];
	tensor<int32, [3]> var_4004 = const()[name = string("op_4004"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_4007_axes_0 = const()[name = string("op_4007_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_4005_cast_fp16 = transpose(perm = var_4004, x = h_55_cast_fp16)[name = string("transpose_12")];
	tensor<fp16, [1, 1536, 1, 512]> var_4007_cast_fp16 = expand_dims(axes = var_4007_axes_0, x = var_4005_cast_fp16)[name = string("op_4007_cast_fp16")];
	string q_raw_pad_type_0 = const()[name = string("q_raw_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> q_raw_strides_0 = const()[name = string("q_raw_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> q_raw_pad_0 = const()[name = string("q_raw_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> q_raw_dilations_0 = const()[name = string("q_raw_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 q_raw_groups_0 = const()[name = string("q_raw_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4096, 1, 512]> q_raw = conv(dilations = q_raw_dilations_0, groups = q_raw_groups_0, pad = q_raw_pad_0, pad_type = q_raw_pad_type_0, strides = q_raw_strides_0, weight = layers_9_self_attn_q_proj_weight_palettized, x = var_4007_cast_fp16)[name = string("q_raw")];
	tensor<int32, [4]> var_4028 = const()[name = string("op_4028"), val = tensor<int32, [4]>([1, 8, 512, 512])];
	tensor<fp16, [1, 8, 512, 512]> var_4029 = reshape(shape = var_4028, x = q_raw)[name = string("op_4029")];
	tensor<int32, [4]> transpose_58_perm_0 = const()[name = string("transpose_58_perm_0"), val = tensor<int32, [4]>([0, 3, 1, 2])];
	tensor<int32, [3]> var_4052 = const()[name = string("op_4052"), val = tensor<int32, [3]>([512, 8, 512])];
	tensor<fp16, [1, 512, 8, 512]> transpose_58 = transpose(perm = transpose_58_perm_0, x = var_4029)[name = string("transpose_11")];
	tensor<fp16, [512, 8, 512]> x_145 = reshape(shape = var_4052, x = transpose_58)[name = string("x_145")];
	int32 var_4058 = const()[name = string("op_4058"), val = int32(-1)];
	fp16 const_73_promoted = const()[name = string("const_73_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [512, 8, 512]> var_4060 = mul(x = x_145, y = const_73_promoted)[name = string("op_4060")];
	bool input_221_interleave_0 = const()[name = string("input_221_interleave_0"), val = bool(false)];
	tensor<fp16, [512, 8, 1024]> input_221 = concat(axis = var_4058, interleave = input_221_interleave_0, values = (x_145, var_4060))[name = string("input_221")];
	tensor<int32, [1]> normed_221_axes_0 = const()[name = string("normed_221_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4055_to_fp16 = const()[name = string("op_4055_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [512, 8, 1024]> normed_221_cast_fp16 = layer_norm(axes = normed_221_axes_0, epsilon = var_4055_to_fp16, x = input_221)[name = string("normed_221_cast_fp16")];
	tensor<int32, [2]> var_4065_split_sizes_0 = const()[name = string("op_4065_split_sizes_0"), val = tensor<int32, [2]>([512, 512])];
	int32 var_4065_axis_0 = const()[name = string("op_4065_axis_0"), val = int32(-1)];
	tensor<fp16, [512, 8, 512]> var_4065_0, tensor<fp16, [512, 8, 512]> var_4065_1 = split(axis = var_4065_axis_0, split_sizes = var_4065_split_sizes_0, x = normed_221_cast_fp16)[name = string("op_4065")];
	tensor<fp16, [512, 8, 512]> q_75 = mul(x = var_4065_0, y = layers_4_self_attn_q_norm_weight)[name = string("q_75")];
	tensor<int32, [4]> var_4072 = const()[name = string("op_4072"), val = tensor<int32, [4]>([1, 512, 8, 512])];
	tensor<fp16, [1, 512, 8, 512]> var_4073 = reshape(shape = var_4072, x = q_75)[name = string("op_4073")];
	tensor<int32, [4]> var_4078 = const()[name = string("op_4078"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<fp16, [1, 8, 512, 512]> q_77 = transpose(perm = var_4078, x = var_4073)[name = string("transpose_10")];
	tensor<fp16, [1, 8, 512, 512]> var_4080_cast_fp16 = mul(x = q_77, y = cos_f)[name = string("op_4080_cast_fp16")];
	tensor<int32, [2]> var_4081_split_sizes_0 = const()[name = string("op_4081_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_4081_axis_0 = const()[name = string("op_4081_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 512, 256]> var_4081_0, tensor<fp16, [1, 8, 512, 256]> var_4081_1 = split(axis = var_4081_axis_0, split_sizes = var_4081_split_sizes_0, x = q_77)[name = string("op_4081")];
	fp16 const_74_promoted = const()[name = string("const_74_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 512, 256]> var_4083 = mul(x = var_4081_1, y = const_74_promoted)[name = string("op_4083")];
	int32 var_4085 = const()[name = string("op_4085"), val = int32(-1)];
	bool var_4086_interleave_0 = const()[name = string("op_4086_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 512]> var_4086 = concat(axis = var_4085, interleave = var_4086_interleave_0, values = (var_4083, var_4081_0))[name = string("op_4086")];
	tensor<fp16, [1, 8, 512, 512]> var_4087_cast_fp16 = mul(x = var_4086, y = sin_f)[name = string("op_4087_cast_fp16")];
	tensor<fp16, [1, 8, 512, 512]> q_cast_fp16 = add(x = var_4080_cast_fp16, y = var_4087_cast_fp16)[name = string("q_cast_fp16")];
	bool attn_weights_37_transpose_x_0 = const()[name = string("attn_weights_37_transpose_x_0"), val = bool(false)];
	bool attn_weights_37_transpose_y_0 = const()[name = string("attn_weights_37_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 512]> attn_weights_37_cast_fp16 = matmul(transpose_x = attn_weights_37_transpose_x_0, transpose_y = attn_weights_37_transpose_y_0, x = q_cast_fp16, y = transpose_49_cast_fp16)[name = string("attn_weights_37_cast_fp16")];
	tensor<fp16, [1, 8, 512, 512]> x_147_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = causal_mask)[name = string("x_147_cast_fp16")];
	tensor<int32, [1]> reduce_max_9_axes_0 = const()[name = string("reduce_max_9_axes_0"), val = tensor<int32, [1]>([-1])];
	bool reduce_max_9_keep_dims_0 = const()[name = string("reduce_max_9_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 512, 1]> reduce_max_9 = reduce_max(axes = reduce_max_9_axes_0, keep_dims = reduce_max_9_keep_dims_0, x = x_147_cast_fp16)[name = string("reduce_max_9")];
	tensor<fp16, [1, 8, 512, 512]> var_4119 = sub(x = x_147_cast_fp16, y = reduce_max_9)[name = string("op_4119")];
	tensor<fp16, [1, 8, 512, 512]> var_4125 = exp(x = var_4119)[name = string("op_4125")];
	tensor<int32, [1]> var_4135_axes_0 = const()[name = string("op_4135_axes_0"), val = tensor<int32, [1]>([-1])];
	bool var_4135_keep_dims_0 = const()[name = string("op_4135_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 8, 512, 1]> var_4135 = reduce_sum(axes = var_4135_axes_0, keep_dims = var_4135_keep_dims_0, x = var_4125)[name = string("op_4135")];
	tensor<fp16, [1, 8, 512, 512]> var_4141_cast_fp16 = real_div(x = var_4125, y = var_4135)[name = string("op_4141_cast_fp16")];
	bool attn_output_73_transpose_x_0 = const()[name = string("attn_output_73_transpose_x_0"), val = bool(false)];
	bool attn_output_73_transpose_y_0 = const()[name = string("attn_output_73_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 512]> attn_output_73_cast_fp16 = matmul(transpose_x = attn_output_73_transpose_x_0, transpose_y = attn_output_73_transpose_y_0, x = var_4141_cast_fp16, y = V_expanded_9_cast_fp16)[name = string("attn_output_73_cast_fp16")];
	tensor<int32, [4]> var_4152 = const()[name = string("op_4152"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_4159 = const()[name = string("op_4159"), val = tensor<int32, [3]>([1, 512, 4096])];
	tensor<fp16, [1, 512, 8, 512]> var_4153_cast_fp16 = transpose(perm = var_4152, x = attn_output_73_cast_fp16)[name = string("transpose_9")];
	tensor<fp16, [1, 512, 4096]> attn_output_75_cast_fp16 = reshape(shape = var_4159, x = var_4153_cast_fp16)[name = string("attn_output_75_cast_fp16")];
	tensor<int32, [3]> var_4164 = const()[name = string("op_4164"), val = tensor<int32, [3]>([0, 2, 1])];
	string var_4180_pad_type_0 = const()[name = string("op_4180_pad_type_0"), val = string("valid")];
	int32 var_4180_groups_0 = const()[name = string("op_4180_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_4180_strides_0 = const()[name = string("op_4180_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_4180_pad_0 = const()[name = string("op_4180_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_4180_dilations_0 = const()[name = string("op_4180_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1536, 4096, 1]> squeeze_9_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 4096, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321927808))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325073600))))[name = string("squeeze_9_cast_fp16_to_fp32_to_fp16_palettized")];
	tensor<fp16, [1, 4096, 512]> var_4165_cast_fp16 = transpose(perm = var_4164, x = attn_output_75_cast_fp16)[name = string("transpose_8")];
	tensor<fp16, [1, 1536, 512]> var_4180_cast_fp16 = conv(dilations = var_4180_dilations_0, groups = var_4180_groups_0, pad = var_4180_pad_0, pad_type = var_4180_pad_type_0, strides = var_4180_strides_0, weight = squeeze_9_cast_fp16_to_fp32_to_fp16_palettized, x = var_4165_cast_fp16)[name = string("op_4180_cast_fp16")];
	tensor<int32, [3]> var_4184 = const()[name = string("op_4184"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_4190 = const()[name = string("op_4190"), val = int32(-1)];
	fp16 const_75_promoted_to_fp16 = const()[name = string("const_75_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_151_cast_fp16 = transpose(perm = var_4184, x = var_4180_cast_fp16)[name = string("transpose_7")];
	tensor<fp16, [1, 512, 1536]> var_4192_cast_fp16 = mul(x = x_151_cast_fp16, y = const_75_promoted_to_fp16)[name = string("op_4192_cast_fp16")];
	bool input_225_interleave_0 = const()[name = string("input_225_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_225_cast_fp16 = concat(axis = var_4190, interleave = input_225_interleave_0, values = (x_151_cast_fp16, var_4192_cast_fp16))[name = string("input_225_cast_fp16")];
	tensor<int32, [1]> normed_225_axes_0 = const()[name = string("normed_225_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4187_to_fp16 = const()[name = string("op_4187_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_225_cast_fp16 = layer_norm(axes = normed_225_axes_0, epsilon = var_4187_to_fp16, x = input_225_cast_fp16)[name = string("normed_225_cast_fp16")];
	tensor<int32, [2]> var_4197_split_sizes_0 = const()[name = string("op_4197_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_4197_axis_0 = const()[name = string("op_4197_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_4197_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_4197_cast_fp16_1 = split(axis = var_4197_axis_0, split_sizes = var_4197_split_sizes_0, x = normed_225_cast_fp16)[name = string("op_4197_cast_fp16")];
	tensor<fp16, [1536]> layers_9_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_9_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325075200)))];
	tensor<fp16, [1, 512, 1536]> attn_output_cast_fp16 = mul(x = var_4197_cast_fp16_0, y = layers_9_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_cast_fp16")];
	tensor<fp16, [1, 512, 1536]> x_153_cast_fp16 = add(x = x_143_cast_fp16, y = attn_output_cast_fp16)[name = string("x_153_cast_fp16")];
	int32 var_4206 = const()[name = string("op_4206"), val = int32(-1)];
	fp16 const_76_promoted_to_fp16 = const()[name = string("const_76_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> var_4208_cast_fp16 = mul(x = x_153_cast_fp16, y = const_76_promoted_to_fp16)[name = string("op_4208_cast_fp16")];
	bool input_227_interleave_0 = const()[name = string("input_227_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_227_cast_fp16 = concat(axis = var_4206, interleave = input_227_interleave_0, values = (x_153_cast_fp16, var_4208_cast_fp16))[name = string("input_227_cast_fp16")];
	tensor<int32, [1]> normed_229_axes_0 = const()[name = string("normed_229_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4203_to_fp16 = const()[name = string("op_4203_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_229_cast_fp16 = layer_norm(axes = normed_229_axes_0, epsilon = var_4203_to_fp16, x = input_227_cast_fp16)[name = string("normed_229_cast_fp16")];
	tensor<int32, [2]> var_4213_split_sizes_0 = const()[name = string("op_4213_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_4213_axis_0 = const()[name = string("op_4213_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_4213_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_4213_cast_fp16_1 = split(axis = var_4213_axis_0, split_sizes = var_4213_split_sizes_0, x = normed_229_cast_fp16)[name = string("op_4213_cast_fp16")];
	tensor<fp16, [1536]> layers_9_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_9_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325078336)))];
	tensor<fp16, [1, 512, 1536]> h_57_cast_fp16 = mul(x = var_4213_cast_fp16_0, y = layers_9_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_57_cast_fp16")];
	tensor<int32, [3]> var_4224 = const()[name = string("op_4224"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_229_axes_0 = const()[name = string("input_229_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_4225 = transpose(perm = var_4224, x = h_57_cast_fp16)[name = string("transpose_6")];
	tensor<fp16, [1, 1536, 1, 512]> input_229 = expand_dims(axes = input_229_axes_0, x = var_4225)[name = string("input_229")];
	string gate_37_pad_type_0 = const()[name = string("gate_37_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gate_37_strides_0 = const()[name = string("gate_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gate_37_pad_0 = const()[name = string("gate_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gate_37_dilations_0 = const()[name = string("gate_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gate_37_groups_0 = const()[name = string("gate_37_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 512]> gate_37 = conv(dilations = gate_37_dilations_0, groups = gate_37_groups_0, pad = gate_37_pad_0, pad_type = gate_37_pad_type_0, strides = gate_37_strides_0, weight = layers_9_mlp_gate_proj_weight_palettized, x = input_229)[name = string("gate_37")];
	string up_pad_type_0 = const()[name = string("up_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> up_strides_0 = const()[name = string("up_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> up_pad_0 = const()[name = string("up_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> up_dilations_0 = const()[name = string("up_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 up_groups_0 = const()[name = string("up_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 512]> up = conv(dilations = up_dilations_0, groups = up_groups_0, pad = up_pad_0, pad_type = up_pad_type_0, strides = up_strides_0, weight = layers_9_mlp_up_proj_weight_palettized, x = input_229)[name = string("up")];
	string gate_mode_0 = const()[name = string("gate_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 512]> gate = gelu(mode = gate_mode_0, x = gate_37)[name = string("gate")];
	tensor<fp16, [1, 12288, 1, 512]> input_231 = mul(x = gate, y = up)[name = string("input_231")];
	string mlp_out_pad_type_0 = const()[name = string("mlp_out_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> mlp_out_strides_0 = const()[name = string("mlp_out_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> mlp_out_pad_0 = const()[name = string("mlp_out_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> mlp_out_dilations_0 = const()[name = string("mlp_out_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 mlp_out_groups_0 = const()[name = string("mlp_out_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 512]> mlp_out = conv(dilations = mlp_out_dilations_0, groups = mlp_out_groups_0, pad = mlp_out_pad_0, pad_type = mlp_out_pad_type_0, strides = mlp_out_strides_0, weight = layers_9_mlp_down_proj_weight_palettized, x = input_231)[name = string("mlp_out")];
	tensor<int32, [1]> var_4265_axes_0 = const()[name = string("op_4265_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_4265 = squeeze(axes = var_4265_axes_0, x = mlp_out)[name = string("op_4265")];
	tensor<int32, [3]> var_4269 = const()[name = string("op_4269"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_4275 = const()[name = string("op_4275"), val = int32(-1)];
	fp16 const_77_promoted = const()[name = string("const_77_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_155 = transpose(perm = var_4269, x = var_4265)[name = string("transpose_5")];
	tensor<fp16, [1, 512, 1536]> var_4277 = mul(x = x_155, y = const_77_promoted)[name = string("op_4277")];
	bool input_233_interleave_0 = const()[name = string("input_233_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_233 = concat(axis = var_4275, interleave = input_233_interleave_0, values = (x_155, var_4277))[name = string("input_233")];
	tensor<int32, [1]> normed_233_axes_0 = const()[name = string("normed_233_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4272_to_fp16 = const()[name = string("op_4272_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_233_cast_fp16 = layer_norm(axes = normed_233_axes_0, epsilon = var_4272_to_fp16, x = input_233)[name = string("normed_233_cast_fp16")];
	tensor<int32, [2]> var_4282_split_sizes_0 = const()[name = string("op_4282_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_4282_axis_0 = const()[name = string("op_4282_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_4282_0, tensor<fp16, [1, 512, 1536]> var_4282_1 = split(axis = var_4282_axis_0, split_sizes = var_4282_split_sizes_0, x = normed_233_cast_fp16)[name = string("op_4282")];
	tensor<fp16, [1, 512, 1536]> hidden_states_mlp = mul(x = var_4282_0, y = layers_9_post_feedforward_layernorm_weight)[name = string("hidden_states_mlp")];
	tensor<fp16, [1, 512, 1536]> hidden_states_75_cast_fp16 = add(x = x_153_cast_fp16, y = hidden_states_mlp)[name = string("hidden_states_75_cast_fp16")];
	tensor<int32, [3]> per_layer_slice_begin_0 = const()[name = string("per_layer_slice_begin_0"), val = tensor<int32, [3]>([0, 0, 8704])];
	tensor<int32, [3]> per_layer_slice_end_0 = const()[name = string("per_layer_slice_end_0"), val = tensor<int32, [3]>([1, 512, 1])];
	tensor<bool, [3]> per_layer_slice_end_mask_0 = const()[name = string("per_layer_slice_end_mask_0"), val = tensor<bool, [3]>([true, true, true])];
	tensor<fp16, [1, 512, 256]> per_layer_slice_cast_fp16 = slice_by_index(begin = per_layer_slice_begin_0, end = per_layer_slice_end_0, end_mask = per_layer_slice_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_cast_fp16")];
	tensor<int32, [3]> var_4310 = const()[name = string("op_4310"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_235_axes_0 = const()[name = string("input_235_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_4311 = transpose(perm = var_4310, x = hidden_states_75_cast_fp16)[name = string("transpose_4")];
	tensor<fp16, [1, 1536, 1, 512]> input_235 = expand_dims(axes = input_235_axes_0, x = var_4311)[name = string("input_235")];
	string gated_55_pad_type_0 = const()[name = string("gated_55_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_55_strides_0 = const()[name = string("gated_55_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_55_pad_0 = const()[name = string("gated_55_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_55_dilations_0 = const()[name = string("gated_55_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_55_groups_0 = const()[name = string("gated_55_groups_0"), val = int32(1)];
	tensor<fp16, [1, 256, 1, 512]> gated_55 = conv(dilations = gated_55_dilations_0, groups = gated_55_groups_0, pad = gated_55_pad_0, pad_type = gated_55_pad_type_0, strides = gated_55_strides_0, weight = layers_9_per_layer_input_gate_weight_palettized, x = input_235)[name = string("gated_55")];
	string gated_57_mode_0 = const()[name = string("gated_57_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 256, 1, 512]> gated_57 = gelu(mode = gated_57_mode_0, x = gated_55)[name = string("gated_57")];
	tensor<int32, [3]> var_4330 = const()[name = string("op_4330"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> per_layer_slice_conv_axes_0 = const()[name = string("per_layer_slice_conv_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 256, 512]> var_4331_cast_fp16 = transpose(perm = var_4330, x = per_layer_slice_cast_fp16)[name = string("transpose_3")];
	tensor<fp16, [1, 256, 1, 512]> per_layer_slice_conv_cast_fp16 = expand_dims(axes = per_layer_slice_conv_axes_0, x = var_4331_cast_fp16)[name = string("per_layer_slice_conv_cast_fp16")];
	tensor<fp16, [1, 256, 1, 512]> input_237_cast_fp16 = mul(x = gated_57, y = per_layer_slice_conv_cast_fp16)[name = string("input_237_cast_fp16")];
	string gated_pad_type_0 = const()[name = string("gated_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> gated_strides_0 = const()[name = string("gated_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> gated_pad_0 = const()[name = string("gated_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> gated_dilations_0 = const()[name = string("gated_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 gated_groups_0 = const()[name = string("gated_groups_0"), val = int32(1)];
	tensor<fp16, [1536, 256, 1, 1]> layers_9_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325081472))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325278144))))[name = string("layers_9_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1536, 1, 512]> gated_cast_fp16 = conv(dilations = gated_dilations_0, groups = gated_groups_0, pad = gated_pad_0, pad_type = gated_pad_type_0, strides = gated_strides_0, weight = layers_9_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_237_cast_fp16)[name = string("gated_cast_fp16")];
	tensor<int32, [1]> var_4347_axes_0 = const()[name = string("op_4347_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 512]> var_4347_cast_fp16 = squeeze(axes = var_4347_axes_0, x = gated_cast_fp16)[name = string("op_4347_cast_fp16")];
	tensor<int32, [3]> var_4351 = const()[name = string("op_4351"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_4357 = const()[name = string("op_4357"), val = int32(-1)];
	fp16 const_78_promoted_to_fp16 = const()[name = string("const_78_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> x_157_cast_fp16 = transpose(perm = var_4351, x = var_4347_cast_fp16)[name = string("transpose_2")];
	tensor<fp16, [1, 512, 1536]> var_4359_cast_fp16 = mul(x = x_157_cast_fp16, y = const_78_promoted_to_fp16)[name = string("op_4359_cast_fp16")];
	bool input_239_interleave_0 = const()[name = string("input_239_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_239_cast_fp16 = concat(axis = var_4357, interleave = input_239_interleave_0, values = (x_157_cast_fp16, var_4359_cast_fp16))[name = string("input_239_cast_fp16")];
	tensor<int32, [1]> normed_237_axes_0 = const()[name = string("normed_237_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4354_to_fp16 = const()[name = string("op_4354_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_237_cast_fp16 = layer_norm(axes = normed_237_axes_0, epsilon = var_4354_to_fp16, x = input_239_cast_fp16)[name = string("normed_237_cast_fp16")];
	tensor<int32, [2]> var_4364_split_sizes_0 = const()[name = string("op_4364_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_4364_axis_0 = const()[name = string("op_4364_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_4364_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_4364_cast_fp16_1 = split(axis = var_4364_axis_0, split_sizes = var_4364_split_sizes_0, x = normed_237_cast_fp16)[name = string("op_4364_cast_fp16")];
	tensor<fp16, [1536]> layers_9_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_9_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325279744)))];
	tensor<fp16, [1, 512, 1536]> hidden_states_79_cast_fp16 = mul(x = var_4364_cast_fp16_0, y = layers_9_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_79_cast_fp16")];
	tensor<fp16, [1, 512, 1536]> hidden_states_81_cast_fp16 = add(x = hidden_states_75_cast_fp16, y = hidden_states_79_cast_fp16)[name = string("hidden_states_81_cast_fp16")];
	tensor<fp16, [1]> const_79_promoted_to_fp16 = const()[name = string("const_79_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.56p-3])];
	tensor<fp16, [1, 512, 1536]> x_cast_fp16 = mul(x = hidden_states_81_cast_fp16, y = const_79_promoted_to_fp16)[name = string("x_cast_fp16")];
	int32 var_4379 = const()[name = string("op_4379"), val = int32(-1)];
	fp16 const_80_promoted_to_fp16 = const()[name = string("const_80_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 512, 1536]> var_4381_cast_fp16 = mul(x = x_cast_fp16, y = const_80_promoted_to_fp16)[name = string("op_4381_cast_fp16")];
	bool input_241_interleave_0 = const()[name = string("input_241_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 512, 3072]> input_241_cast_fp16 = concat(axis = var_4379, interleave = input_241_interleave_0, values = (x_cast_fp16, var_4381_cast_fp16))[name = string("input_241_cast_fp16")];
	tensor<int32, [1]> normed_241_axes_0 = const()[name = string("normed_241_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4376_to_fp16 = const()[name = string("op_4376_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 512, 3072]> normed_241_cast_fp16 = layer_norm(axes = normed_241_axes_0, epsilon = var_4376_to_fp16, x = input_241_cast_fp16)[name = string("normed_241_cast_fp16")];
	tensor<int32, [2]> var_4386_split_sizes_0 = const()[name = string("op_4386_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_4386_axis_0 = const()[name = string("op_4386_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 512, 1536]> var_4386_cast_fp16_0, tensor<fp16, [1, 512, 1536]> var_4386_cast_fp16_1 = split(axis = var_4386_axis_0, split_sizes = var_4386_split_sizes_0, x = normed_241_cast_fp16)[name = string("op_4386_cast_fp16")];
	tensor<fp16, [1536]> norm_weight_promoted_to_fp16 = const()[name = string("norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325282880)))];
	tensor<fp16, [1, 512, 1536]> hidden_states_cast_fp16 = mul(x = var_4386_cast_fp16_0, y = norm_weight_promoted_to_fp16)[name = string("hidden_states_cast_fp16")];
	tensor<fp16, [1, 512, 1536]> var_4389_cast_fp16 = mul(x = hidden_states_cast_fp16, y = last_position_mask)[name = string("op_4389_cast_fp16")];
	tensor<int32, [1]> last_1_axes_0 = const()[name = string("last_1_axes_0"), val = tensor<int32, [1]>([1])];
	bool last_1_keep_dims_0 = const()[name = string("last_1_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 1, 1536]> last_1_cast_fp16 = reduce_sum(axes = last_1_axes_0, keep_dims = last_1_keep_dims_0, x = var_4389_cast_fp16)[name = string("last_1_cast_fp16")];
	tensor<int32, [3]> var_4403 = const()[name = string("op_4403"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [262144, 1536, 1]> squeeze_10_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [262144, 1536, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325286016))), lut = tensor<fp16, [8192, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(526612672))))[name = string("squeeze_10_palettized")];
	string var_4419_pad_type_0 = const()[name = string("op_4419_pad_type_0"), val = string("valid")];
	int32 var_4419_groups_0 = const()[name = string("op_4419_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_4419_strides_0 = const()[name = string("op_4419_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_4419_pad_0 = const()[name = string("op_4419_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_4419_dilations_0 = const()[name = string("op_4419_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1, 1536, 1]> var_4404 = transpose(perm = var_4403, x = last_1_cast_fp16)[name = string("transpose_1")];
	tensor<fp16, [1, 262144, 1]> var_4419 = conv(dilations = var_4419_dilations_0, groups = var_4419_groups_0, pad = var_4419_pad_0, pad_type = var_4419_pad_type_0, strides = var_4419_strides_0, weight = squeeze_10_palettized, x = var_4404)[name = string("op_4419")];
	tensor<int32, [3]> var_4423 = const()[name = string("op_4423"), val = tensor<int32, [3]>([0, 2, 1])];
	fp16 _inversed_4426_y_0_to_fp16 = const()[name = string("_inversed_4426_y_0_to_fp16"), val = fp16(0x1.11p-5)];
	tensor<fp16, [1, 1, 262144]> logits_1 = transpose(perm = var_4423, x = var_4419)[name = string("transpose_0")];
	tensor<fp16, [1, 1, 262144]> _inversed_4426_cast_fp16 = mul(x = logits_1, y = _inversed_4426_y_0_to_fp16)[name = string("_inversed_4426_cast_fp16")];
	tensor<fp16, [1, 1, 262144]> var_4427_cast_fp16 = tanh(x = _inversed_4426_cast_fp16)[name = string("op_4427_cast_fp16")];
	fp16 var_4428_to_fp16 = const()[name = string("op_4428_to_fp16"), val = fp16(0x1.ep+4)];
	tensor<fp16, [1, 1, 262144]> logits_3_cast_fp16 = mul(x = var_4427_cast_fp16, y = var_4428_to_fp16)[name = string("logits_3_cast_fp16")];
	tensor<int32, [1]> logits_axes_0 = const()[name = string("logits_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 262144]> logits_cast_fp16 = squeeze(axes = logits_axes_0, x = logits_3_cast_fp16)[name = string("logits_cast_fp16")];
	int32 var_4433 = const()[name = string("op_4433"), val = int32(-1)];
	int32 token_id_axis_0 = const()[name = string("token_id_axis_0"), val = int32(-1)];
	bool token_id_keep_dims_0 = const()[name = string("token_id_keep_dims_0"), val = bool(false)];
	string token_id_output_dtype_0 = const()[name = string("token_id_output_dtype_0"), val = string("int32")];
	tensor<int32, [1]> token_id = reduce_argmax(axis = token_id_axis_0, keep_dims = token_id_keep_dims_0, output_dtype = token_id_output_dtype_0, x = logits_cast_fp16)[name = string("token_id_cast_fp16")];
	tensor<int32, [1]> var_4435_axes_0 = const()[name = string("op_4435_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<int32, [1, 1]> var_4435 = expand_dims(axes = var_4435_axes_0, x = token_id)[name = string("op_4435")];
	bool var_4436_validate_indices_0 = const()[name = string("op_4436_validate_indices_0"), val = bool(false)];
	tensor<fp16, [1, 1]> var_4436_cast_fp16 = gather_along_axis(axis = var_4433, indices = var_4435, validate_indices = var_4436_validate_indices_0, x = logits_cast_fp16)[name = string("op_4436_cast_fp16")];
	tensor<int32, [1]> var_4437_axes_0 = const()[name = string("op_4437_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [1]> token_logit = squeeze(axes = var_4437_axes_0, x = var_4436_cast_fp16)[name = string("op_4437_cast_fp16")];
	} -> (token_id, token_logit);
	}