Duplicate from mlboydaisuke/gemma-4-E2B-coreml

71c57fe 20 days ago

396 kB

	program(1.3)
	[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}})]
	{
	func main<ios18>(tensor<fp16, [1, 1, 1, 512]> causal_mask, tensor<fp16, [1, 1, 1, 512]> cos_f, tensor<fp16, [1, 1, 1, 256]> cos_s, tensor<fp16, [1, 1, 1536]> hidden_states, state<tensor<fp16, [24, 1, 512, 512]>> kv_cache_0, tensor<fp16, [1, 1, 8960]> per_layer_combined, tensor<fp16, [1, 1, 1, 512]> sin_f, tensor<fp16, [1, 1, 1, 256]> sin_s, tensor<fp16, [1, 1, 512, 1]> update_mask) {
	tensor<fp16, [2048, 1536, 1, 1]> layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1572992))))[name = string("layers_0_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [256, 1536, 1, 1]> layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1575104))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1771776))))[name = string("layers_0_self_attn_k_proj_weight_palettized")];
	tensor<fp16, [256, 1536, 1, 1]> layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1772096))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1968768))))[name = string("layers_0_self_attn_v_proj_weight_palettized")];
	tensor<fp16, [6144, 1536, 1, 1]> layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [6144, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1969088))), lut = tensor<fp16, [192, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6687744))))[name = string("layers_0_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [6144, 1536, 1, 1]> layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [6144, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6693952))), lut = tensor<fp16, [192, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11412608))))[name = string("layers_0_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 6144, 1, 1]> layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 6144, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11418816))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16137472))))[name = string("layers_0_mlp_down_proj_weight_palettized")];
	tensor<fp16, [256, 1536]> layers_0_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16139072))), lut = tensor<fp16, [8, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16335744))))[name = string("layers_0_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [2048, 1536, 1, 1]> layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16336064))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17908992))))[name = string("layers_1_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [256, 1536, 1, 1]> layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17911104))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18107776))))[name = string("layers_1_self_attn_k_proj_weight_palettized")];
	tensor<fp16, [256, 1536, 1, 1]> layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18108096))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18304768))))[name = string("layers_1_self_attn_v_proj_weight_palettized")];
	tensor<fp16, [6144, 1536, 1, 1]> layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [6144, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18305088))), lut = tensor<fp16, [192, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23023744))))[name = string("layers_1_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [6144, 1536, 1, 1]> layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [6144, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23029952))), lut = tensor<fp16, [192, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27748608))))[name = string("layers_1_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 6144, 1, 1]> layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 6144, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27754816))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32473472))))[name = string("layers_1_mlp_down_proj_weight_palettized")];
	tensor<fp16, [256, 1536]> layers_1_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32475072))), lut = tensor<fp16, [8, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32671744))))[name = string("layers_1_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [4096, 1536, 1, 1]> layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [4096, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32672064))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35817856))))[name = string("layers_2_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [512, 1536, 1, 1]> layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [512, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35822016))), lut = tensor<fp16, [16, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36215296))))[name = string("layers_2_self_attn_k_proj_weight_palettized")];
	tensor<fp16, [512, 1536, 1, 1]> layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [512, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36215872))), lut = tensor<fp16, [16, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36609152))))[name = string("layers_2_self_attn_v_proj_weight_palettized")];
	tensor<fp16, [6144, 1536, 1, 1]> layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [6144, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36609728))), lut = tensor<fp16, [192, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41328384))))[name = string("layers_2_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [6144, 1536, 1, 1]> layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [6144, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41334592))), lut = tensor<fp16, [192, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46053248))))[name = string("layers_2_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 6144, 1, 1]> layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 6144, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46059456))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50778112))))[name = string("layers_2_mlp_down_proj_weight_palettized")];
	tensor<fp16, [256, 1536]> layers_2_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50779712))), lut = tensor<fp16, [8, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50976384))))[name = string("layers_2_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [2048, 1536, 1, 1]> layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50976704))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52549632))))[name = string("layers_3_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52551744))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61988992))))[name = string("layers_3_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62001344))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71438592))))[name = string("layers_3_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71450944))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80888192))))[name = string("layers_3_mlp_down_proj_weight_palettized")];
	tensor<fp16, [256, 1536]> layers_3_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80889792))), lut = tensor<fp16, [8, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81086464))))[name = string("layers_3_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [2048, 1536, 1, 1]> layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81086784))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82659712))))[name = string("layers_4_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82661824))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92099072))))[name = string("layers_4_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92111424))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101548672))))[name = string("layers_4_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101561024))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110998272))))[name = string("layers_4_mlp_down_proj_weight_palettized")];
	tensor<fp16, [256, 1536]> layers_4_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110999872))), lut = tensor<fp16, [8, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111196544))))[name = string("layers_4_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [2048, 1536, 1, 1]> layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111196864))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112769792))))[name = string("layers_5_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112771904))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122209152))))[name = string("layers_5_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122221504))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131658752))))[name = string("layers_5_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131671104))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141108352))))[name = string("layers_5_mlp_down_proj_weight_palettized")];
	tensor<fp16, [256, 1536]> layers_5_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141109952))), lut = tensor<fp16, [8, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141306624))))[name = string("layers_5_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [2048, 1536, 1, 1]> layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141306944))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142879872))))[name = string("layers_6_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142881984))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152319232))))[name = string("layers_6_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152331584))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161768832))))[name = string("layers_6_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161781184))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171218432))))[name = string("layers_6_mlp_down_proj_weight_palettized")];
	tensor<fp16, [256, 1536]> layers_6_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171220032))), lut = tensor<fp16, [8, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171416704))))[name = string("layers_6_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [4096, 1536, 1, 1]> layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [4096, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171417024))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174562816))))[name = string("layers_7_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174566976))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184004224))))[name = string("layers_7_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184016576))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193453824))))[name = string("layers_7_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193466176))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202903424))))[name = string("layers_7_mlp_down_proj_weight_palettized")];
	tensor<fp16, [256, 1536]> layers_7_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202905024))), lut = tensor<fp16, [8, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203101696))))[name = string("layers_7_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [2048, 1536, 1, 1]> layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203102016))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204674944))))[name = string("layers_8_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204677056))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214114304))))[name = string("layers_8_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214126656))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223563904))))[name = string("layers_8_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223576256))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233013504))))[name = string("layers_8_mlp_down_proj_weight_palettized")];
	tensor<fp16, [256, 1536]> layers_8_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233015104))), lut = tensor<fp16, [8, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233211776))))[name = string("layers_8_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [2048, 1536, 1, 1]> layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233212096))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234785024))))[name = string("layers_9_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_9_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234787136))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244224384))))[name = string("layers_9_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_9_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244236736))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253673984))))[name = string("layers_9_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_9_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253686336))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263123584))))[name = string("layers_9_mlp_down_proj_weight_palettized")];
	tensor<fp16, [256, 1536]> layers_9_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263125184))), lut = tensor<fp16, [8, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263321856))))[name = string("layers_9_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [2048, 1536, 1, 1]> layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263322176))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264895104))))[name = string("layers_10_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_10_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264897216))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274334464))))[name = string("layers_10_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_10_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274346816))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283784064))))[name = string("layers_10_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_10_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283796416))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(293233664))))[name = string("layers_10_mlp_down_proj_weight_palettized")];
	tensor<fp16, [256, 1536]> layers_10_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(293235264))), lut = tensor<fp16, [8, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(293431936))))[name = string("layers_10_per_layer_input_gate_weight_palettized")];
	tensor<fp16, [2048, 1536, 1, 1]> layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(293432256))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295005184))))[name = string("layers_11_self_attn_q_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_11_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295007296))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304444544))))[name = string("layers_11_mlp_gate_proj_weight_palettized")];
	tensor<fp16, [12288, 1536, 1, 1]> layers_11_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304456896))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(313894144))))[name = string("layers_11_mlp_up_proj_weight_palettized")];
	tensor<fp16, [1536, 12288, 1, 1]> layers_11_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(313906496))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323343744))))[name = string("layers_11_mlp_down_proj_weight_palettized")];
	tensor<fp16, [256, 1536]> layers_11_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323345344))), lut = tensor<fp16, [8, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323542016))))[name = string("layers_11_per_layer_input_gate_weight_palettized")];
	int32 var_626 = const()[name = string("op_626"), val = int32(-1)];
	fp16 const_0_promoted_to_fp16 = const()[name = string("const_0_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_632_cast_fp16 = mul(x = hidden_states, y = const_0_promoted_to_fp16)[name = string("op_632_cast_fp16")];
	bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_1_cast_fp16 = concat(axis = var_626, interleave = input_1_interleave_0, values = (hidden_states, var_632_cast_fp16))[name = string("input_1_cast_fp16")];
	tensor<int32, [1]> normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_624_to_fp16 = const()[name = string("op_624_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_624_to_fp16, x = input_1_cast_fp16)[name = string("normed_1_cast_fp16")];
	tensor<int32, [2]> var_637_split_sizes_0 = const()[name = string("op_637_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_637_axis_0 = const()[name = string("op_637_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_637_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_637_cast_fp16_1 = split(axis = var_637_axis_0, split_sizes = var_637_split_sizes_0, x = normed_1_cast_fp16)[name = string("op_637_cast_fp16")];
	tensor<fp16, [1536]> const_1_to_fp16 = const()[name = string("const_1_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323542336)))];
	tensor<fp16, [1, 1, 1536]> var_640_cast_fp16 = mul(x = var_637_cast_fp16_0, y = const_1_to_fp16)[name = string("op_640_cast_fp16")];
	tensor<int32, [3]> var_645 = const()[name = string("op_645"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_648_axes_0 = const()[name = string("op_648_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_646 = transpose(perm = var_645, x = var_640_cast_fp16)[name = string("transpose_107")];
	tensor<fp16, [1, 1536, 1, 1]> var_648 = expand_dims(axes = var_648_axes_0, x = var_646)[name = string("op_648")];
	string var_664_pad_type_0 = const()[name = string("op_664_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_664_strides_0 = const()[name = string("op_664_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_664_pad_0 = const()[name = string("op_664_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_664_dilations_0 = const()[name = string("op_664_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_664_groups_0 = const()[name = string("op_664_groups_0"), val = int32(1)];
	tensor<fp16, [1, 2048, 1, 1]> var_664 = conv(dilations = var_664_dilations_0, groups = var_664_groups_0, pad = var_664_pad_0, pad_type = var_664_pad_type_0, strides = var_664_strides_0, weight = layers_0_self_attn_q_proj_weight_palettized, x = var_648)[name = string("op_664")];
	tensor<int32, [4]> var_669 = const()[name = string("op_669"), val = tensor<int32, [4]>([1, 8, 256, 1])];
	tensor<fp16, [1, 8, 256, 1]> var_670 = reshape(shape = var_669, x = var_664)[name = string("op_670")];
	tensor<int32, [4]> var_675 = const()[name = string("op_675"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	tensor<int32, [3]> var_685 = const()[name = string("op_685"), val = tensor<int32, [3]>([1, 8, 256])];
	tensor<fp16, [1, 8, 1, 256]> var_676 = transpose(perm = var_675, x = var_670)[name = string("transpose_106")];
	tensor<fp16, [1, 8, 256]> x_3 = reshape(shape = var_685, x = var_676)[name = string("x_3")];
	int32 var_691 = const()[name = string("op_691"), val = int32(-1)];
	fp16 const_2_promoted_to_fp16 = const()[name = string("const_2_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 256]> var_697_cast_fp16 = mul(x = x_3, y = const_2_promoted_to_fp16)[name = string("op_697_cast_fp16")];
	bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512]> input_5_cast_fp16 = concat(axis = var_691, interleave = input_5_interleave_0, values = (x_3, var_697_cast_fp16))[name = string("input_5_cast_fp16")];
	tensor<int32, [1]> normed_5_axes_0 = const()[name = string("normed_5_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_689_to_fp16 = const()[name = string("op_689_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 8, 512]> normed_5_cast_fp16 = layer_norm(axes = normed_5_axes_0, epsilon = var_689_to_fp16, x = input_5_cast_fp16)[name = string("normed_5_cast_fp16")];
	tensor<int32, [2]> var_702_split_sizes_0 = const()[name = string("op_702_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_702_axis_0 = const()[name = string("op_702_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 256]> var_702_cast_fp16_0, tensor<fp16, [1, 8, 256]> var_702_cast_fp16_1 = split(axis = var_702_axis_0, split_sizes = var_702_split_sizes_0, x = normed_5_cast_fp16)[name = string("op_702_cast_fp16")];
	tensor<fp16, [256]> const_3_to_fp16 = const()[name = string("const_3_to_fp16"), val = tensor<fp16, [256]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323545472)))];
	tensor<fp16, [1, 8, 256]> var_705_cast_fp16 = mul(x = var_702_cast_fp16_0, y = const_3_to_fp16)[name = string("op_705_cast_fp16")];
	tensor<int32, [4]> var_711 = const()[name = string("op_711"), val = tensor<int32, [4]>([1, 8, 1, 256])];
	tensor<fp16, [1, 8, 1, 256]> q_3 = reshape(shape = var_711, x = var_705_cast_fp16)[name = string("q_3")];
	tensor<fp16, [1, 8, 1, 256]> var_713_cast_fp16 = mul(x = q_3, y = cos_s)[name = string("op_713_cast_fp16")];
	tensor<int32, [2]> var_714_split_sizes_0 = const()[name = string("op_714_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_714_axis_0 = const()[name = string("op_714_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 128]> var_714_0, tensor<fp16, [1, 8, 1, 128]> var_714_1 = split(axis = var_714_axis_0, split_sizes = var_714_split_sizes_0, x = q_3)[name = string("op_714")];
	fp16 const_4_promoted = const()[name = string("const_4_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 128]> var_716 = mul(x = var_714_1, y = const_4_promoted)[name = string("op_716")];
	int32 var_718 = const()[name = string("op_718"), val = int32(-1)];
	bool var_719_interleave_0 = const()[name = string("op_719_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> var_719 = concat(axis = var_718, interleave = var_719_interleave_0, values = (var_716, var_714_0))[name = string("op_719")];
	tensor<fp16, [1, 8, 1, 256]> var_720_cast_fp16 = mul(x = var_719, y = sin_s)[name = string("op_720_cast_fp16")];
	tensor<fp16, [1, 8, 1, 256]> q_7_cast_fp16 = add(x = var_713_cast_fp16, y = var_720_cast_fp16)[name = string("q_7_cast_fp16")];
	string var_733_pad_type_0 = const()[name = string("op_733_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_733_strides_0 = const()[name = string("op_733_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_733_pad_0 = const()[name = string("op_733_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_733_dilations_0 = const()[name = string("op_733_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_733_groups_0 = const()[name = string("op_733_groups_0"), val = int32(1)];
	tensor<fp16, [1, 256, 1, 1]> var_733 = conv(dilations = var_733_dilations_0, groups = var_733_groups_0, pad = var_733_pad_0, pad_type = var_733_pad_type_0, strides = var_733_strides_0, weight = layers_0_self_attn_k_proj_weight_palettized, x = var_648)[name = string("op_733")];
	tensor<int32, [4]> var_738 = const()[name = string("op_738"), val = tensor<int32, [4]>([1, 1, 256, 1])];
	tensor<fp16, [1, 1, 256, 1]> var_739 = reshape(shape = var_738, x = var_733)[name = string("op_739")];
	tensor<int32, [4]> var_744 = const()[name = string("op_744"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	string var_761_pad_type_0 = const()[name = string("op_761_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_761_strides_0 = const()[name = string("op_761_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_761_pad_0 = const()[name = string("op_761_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_761_dilations_0 = const()[name = string("op_761_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_761_groups_0 = const()[name = string("op_761_groups_0"), val = int32(1)];
	tensor<fp16, [1, 256, 1, 1]> var_761 = conv(dilations = var_761_dilations_0, groups = var_761_groups_0, pad = var_761_pad_0, pad_type = var_761_pad_type_0, strides = var_761_strides_0, weight = layers_0_self_attn_v_proj_weight_palettized, x = var_648)[name = string("op_761")];
	tensor<int32, [4]> var_766 = const()[name = string("op_766"), val = tensor<int32, [4]>([1, 1, 256, 1])];
	tensor<fp16, [1, 1, 256, 1]> var_767 = reshape(shape = var_766, x = var_761)[name = string("op_767")];
	tensor<int32, [4]> var_772 = const()[name = string("op_772"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	tensor<int32, [3]> var_782 = const()[name = string("op_782"), val = tensor<int32, [3]>([1, 1, 256])];
	tensor<fp16, [1, 1, 1, 256]> var_745 = transpose(perm = var_744, x = var_739)[name = string("transpose_105")];
	tensor<fp16, [1, 1, 256]> x_7 = reshape(shape = var_782, x = var_745)[name = string("x_7")];
	int32 var_788 = const()[name = string("op_788"), val = int32(-1)];
	fp16 const_5_promoted_to_fp16 = const()[name = string("const_5_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 256]> var_794_cast_fp16 = mul(x = x_7, y = const_5_promoted_to_fp16)[name = string("op_794_cast_fp16")];
	bool input_7_interleave_0 = const()[name = string("input_7_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 512]> input_7_cast_fp16 = concat(axis = var_788, interleave = input_7_interleave_0, values = (x_7, var_794_cast_fp16))[name = string("input_7_cast_fp16")];
	tensor<int32, [1]> normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_786_to_fp16 = const()[name = string("op_786_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 512]> normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_786_to_fp16, x = input_7_cast_fp16)[name = string("normed_9_cast_fp16")];
	tensor<int32, [2]> var_799_split_sizes_0 = const()[name = string("op_799_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_799_axis_0 = const()[name = string("op_799_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 256]> var_799_cast_fp16_0, tensor<fp16, [1, 1, 256]> var_799_cast_fp16_1 = split(axis = var_799_axis_0, split_sizes = var_799_split_sizes_0, x = normed_9_cast_fp16)[name = string("op_799_cast_fp16")];
	tensor<fp16, [256]> const_6_to_fp16 = const()[name = string("const_6_to_fp16"), val = tensor<fp16, [256]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323546048)))];
	tensor<fp16, [1, 1, 256]> var_802_cast_fp16 = mul(x = var_799_cast_fp16_0, y = const_6_to_fp16)[name = string("op_802_cast_fp16")];
	tensor<int32, [4]> var_808 = const()[name = string("op_808"), val = tensor<int32, [4]>([1, 1, 1, 256])];
	tensor<fp16, [1, 1, 1, 256]> q_5 = reshape(shape = var_808, x = var_802_cast_fp16)[name = string("q_5")];
	fp16 var_815_promoted_to_fp16 = const()[name = string("op_815_promoted_to_fp16"), val = fp16(0x1p+1)];
	tensor<fp16, [1, 1, 1, 256]> var_773 = transpose(perm = var_772, x = var_767)[name = string("transpose_104")];
	tensor<fp16, [1, 1, 1, 256]> var_816_cast_fp16 = pow(x = var_773, y = var_815_promoted_to_fp16)[name = string("op_816_cast_fp16")];
	tensor<int32, [1]> var_821_axes_0 = const()[name = string("op_821_axes_0"), val = tensor<int32, [1]>([-1])];
	bool var_821_keep_dims_0 = const()[name = string("op_821_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 1, 1, 1]> var_821_cast_fp16 = reduce_mean(axes = var_821_axes_0, keep_dims = var_821_keep_dims_0, x = var_816_cast_fp16)[name = string("op_821_cast_fp16")];
	fp16 var_823_to_fp16 = const()[name = string("op_823_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 1, 1]> mean_sq_1_cast_fp16 = add(x = var_821_cast_fp16, y = var_823_to_fp16)[name = string("mean_sq_1_cast_fp16")];
	fp16 var_830_to_fp16 = const()[name = string("op_830_to_fp16"), val = fp16(-0x1p-1)];
	tensor<fp16, [1, 1, 1, 1]> var_831_cast_fp16 = pow(x = mean_sq_1_cast_fp16, y = var_830_to_fp16)[name = string("op_831_cast_fp16")];
	tensor<fp16, [1, 1, 1, 256]> var_832_cast_fp16 = mul(x = var_773, y = var_831_cast_fp16)[name = string("op_832_cast_fp16")];
	tensor<fp16, [1, 1, 1, 256]> var_838_cast_fp16 = mul(x = q_5, y = cos_s)[name = string("op_838_cast_fp16")];
	tensor<int32, [2]> var_839_split_sizes_0 = const()[name = string("op_839_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_839_axis_0 = const()[name = string("op_839_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1, 128]> var_839_0, tensor<fp16, [1, 1, 1, 128]> var_839_1 = split(axis = var_839_axis_0, split_sizes = var_839_split_sizes_0, x = q_5)[name = string("op_839")];
	fp16 const_7_promoted = const()[name = string("const_7_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1, 128]> var_841 = mul(x = var_839_1, y = const_7_promoted)[name = string("op_841")];
	int32 var_843 = const()[name = string("op_843"), val = int32(-1)];
	bool var_844_interleave_0 = const()[name = string("op_844_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 1, 256]> var_844 = concat(axis = var_843, interleave = var_844_interleave_0, values = (var_841, var_839_0))[name = string("op_844")];
	tensor<fp16, [1, 1, 1, 256]> var_845_cast_fp16 = mul(x = var_844, y = sin_s)[name = string("op_845_cast_fp16")];
	tensor<fp16, [1, 1, 1, 256]> input_9_cast_fp16 = add(x = var_838_cast_fp16, y = var_845_cast_fp16)[name = string("input_9_cast_fp16")];
	tensor<fp16, [24, 1, 512, 512]> read_state_0 = read_state(input = kv_cache_0)[name = string("read_state_0")];
	tensor<int32, [4]> var_850_begin_0 = const()[name = string("op_850_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [4]> var_850_end_0 = const()[name = string("op_850_end_0"), val = tensor<int32, [4]>([1, 1, 512, 512])];
	tensor<bool, [4]> var_850_end_mask_0 = const()[name = string("op_850_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> var_850_squeeze_mask_0 = const()[name = string("op_850_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [1, 512, 512]> var_850_cast_fp16 = slice_by_index(begin = var_850_begin_0, end = var_850_end_0, end_mask = var_850_end_mask_0, squeeze_mask = var_850_squeeze_mask_0, x = read_state_0)[name = string("op_850_cast_fp16")];
	tensor<int32, [1]> Kc_1_axes_0 = const()[name = string("Kc_1_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 1, 512, 512]> Kc_1_cast_fp16 = expand_dims(axes = Kc_1_axes_0, x = var_850_cast_fp16)[name = string("Kc_1_cast_fp16")];
	tensor<int32, [4]> var_855_begin_0 = const()[name = string("op_855_begin_0"), val = tensor<int32, [4]>([12, 0, 0, 0])];
	tensor<int32, [4]> var_855_end_0 = const()[name = string("op_855_end_0"), val = tensor<int32, [4]>([13, 1, 512, 512])];
	tensor<bool, [4]> var_855_end_mask_0 = const()[name = string("op_855_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> var_855_squeeze_mask_0 = const()[name = string("op_855_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [1, 512, 512]> var_855_cast_fp16 = slice_by_index(begin = var_855_begin_0, end = var_855_end_0, end_mask = var_855_end_mask_0, squeeze_mask = var_855_squeeze_mask_0, x = read_state_0)[name = string("op_855_cast_fp16")];
	tensor<int32, [1]> Vc_1_axes_0 = const()[name = string("Vc_1_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 1, 512, 512]> Vc_1_cast_fp16 = expand_dims(axes = Vc_1_axes_0, x = var_855_cast_fp16)[name = string("Vc_1_cast_fp16")];
	tensor<int32, [8]> kp_1_pad_0 = const()[name = string("kp_1_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 0, 256])];
	string kp_1_mode_0 = const()[name = string("kp_1_mode_0"), val = string("constant")];
	fp16 const_8_to_fp16 = const()[name = string("const_8_to_fp16"), val = fp16(0x0p+0)];
	tensor<fp16, [1, 1, 1, 512]> kp_1_cast_fp16 = pad(constant_val = const_8_to_fp16, mode = kp_1_mode_0, pad = kp_1_pad_0, x = input_9_cast_fp16)[name = string("kp_1_cast_fp16")];
	tensor<int32, [8]> vp_1_pad_0 = const()[name = string("vp_1_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 0, 256])];
	string vp_1_mode_0 = const()[name = string("vp_1_mode_0"), val = string("constant")];
	fp16 const_9_to_fp16 = const()[name = string("const_9_to_fp16"), val = fp16(0x0p+0)];
	tensor<fp16, [1, 1, 1, 512]> vp_1_cast_fp16 = pad(constant_val = const_9_to_fp16, mode = vp_1_mode_0, pad = vp_1_pad_0, x = var_832_cast_fp16)[name = string("vp_1_cast_fp16")];
	fp16 var_870_promoted_to_fp16 = const()[name = string("op_870_promoted_to_fp16"), val = fp16(0x1p+0)];
	tensor<fp16, [1, 1, 512, 1]> var_872_cast_fp16 = sub(x = var_870_promoted_to_fp16, y = update_mask)[name = string("op_872_cast_fp16")];
	tensor<fp16, [1, 1, 512, 512]> var_873_cast_fp16 = mul(x = Kc_1_cast_fp16, y = var_872_cast_fp16)[name = string("op_873_cast_fp16")];
	tensor<int32, [4]> var_874_reps_0 = const()[name = string("op_874_reps_0"), val = tensor<int32, [4]>([1, 1, 512, 1])];
	tensor<fp16, [1, 1, 512, 512]> var_874_cast_fp16 = tile(reps = var_874_reps_0, x = kp_1_cast_fp16)[name = string("op_874_cast_fp16")];
	tensor<fp16, [1, 1, 512, 512]> var_875_cast_fp16 = mul(x = var_874_cast_fp16, y = update_mask)[name = string("op_875_cast_fp16")];
	tensor<fp16, [1, 1, 512, 512]> Kn_1_cast_fp16 = add(x = var_873_cast_fp16, y = var_875_cast_fp16)[name = string("Kn_1_cast_fp16")];
	tensor<fp16, [1, 1, 512, 512]> var_881_cast_fp16 = mul(x = Vc_1_cast_fp16, y = var_872_cast_fp16)[name = string("op_881_cast_fp16")];
	tensor<int32, [4]> var_882_reps_0 = const()[name = string("op_882_reps_0"), val = tensor<int32, [4]>([1, 1, 512, 1])];
	tensor<fp16, [1, 1, 512, 512]> var_882_cast_fp16 = tile(reps = var_882_reps_0, x = vp_1_cast_fp16)[name = string("op_882_cast_fp16")];
	tensor<fp16, [1, 1, 512, 512]> var_883_cast_fp16 = mul(x = var_882_cast_fp16, y = update_mask)[name = string("op_883_cast_fp16")];
	tensor<fp16, [1, 1, 512, 512]> Vn_1_cast_fp16 = add(x = var_881_cast_fp16, y = var_883_cast_fp16)[name = string("Vn_1_cast_fp16")];
	tensor<int32, [1]> var_887_axes_0 = const()[name = string("op_887_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 512, 512]> var_887_cast_fp16 = squeeze(axes = var_887_axes_0, x = Kn_1_cast_fp16)[name = string("op_887_cast_fp16")];
	tensor<int32, [4]> concat_0 = const()[name = string("concat_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [4]> concat_1 = const()[name = string("concat_1"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [4]> kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [24, 1, 512, 512]> kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_0, begin_mask = kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_1, end_mask = kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_1_stride_0, update = var_887_cast_fp16, x = read_state_0)[name = string("kv_cache_0_internal_tensor_assign_1_cast_fp16")];
	write_state(data = kv_cache_0_internal_tensor_assign_1_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_6_write_state")];
	tensor<fp16, [24, 1, 512, 512]> coreml_update_state_6 = read_state(input = kv_cache_0)[name = string("coreml_update_state_6")];
	tensor<int32, [1]> var_894_axes_0 = const()[name = string("op_894_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 512, 512]> var_894_cast_fp16 = squeeze(axes = var_894_axes_0, x = Vn_1_cast_fp16)[name = string("op_894_cast_fp16")];
	tensor<int32, [4]> concat_2 = const()[name = string("concat_2"), val = tensor<int32, [4]>([12, 0, 0, 0])];
	tensor<int32, [4]> concat_3 = const()[name = string("concat_3"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [4]> kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [24, 1, 512, 512]> kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_2, begin_mask = kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_3, end_mask = kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_2_stride_0, update = var_894_cast_fp16, x = coreml_update_state_6)[name = string("kv_cache_0_internal_tensor_assign_2_cast_fp16")];
	write_state(data = kv_cache_0_internal_tensor_assign_2_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_7_write_state")];
	tensor<fp16, [24, 1, 512, 512]> coreml_update_state_7 = read_state(input = kv_cache_0)[name = string("coreml_update_state_7")];
	tensor<int32, [4]> Ka_1_begin_0 = const()[name = string("Ka_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [4]> Ka_1_end_0 = const()[name = string("Ka_1_end_0"), val = tensor<int32, [4]>([1, 1, 512, 256])];
	tensor<bool, [4]> Ka_1_end_mask_0 = const()[name = string("Ka_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
	tensor<fp16, [1, 1, 512, 256]> Ka_1_cast_fp16 = slice_by_index(begin = Ka_1_begin_0, end = Ka_1_end_0, end_mask = Ka_1_end_mask_0, x = Kn_1_cast_fp16)[name = string("Ka_1_cast_fp16")];
	tensor<int32, [4]> Va_1_begin_0 = const()[name = string("Va_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [4]> Va_1_end_0 = const()[name = string("Va_1_end_0"), val = tensor<int32, [4]>([1, 1, 512, 256])];
	tensor<bool, [4]> Va_1_end_mask_0 = const()[name = string("Va_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
	tensor<fp16, [1, 1, 512, 256]> Va_1_cast_fp16 = slice_by_index(begin = Va_1_begin_0, end = Va_1_end_0, end_mask = Va_1_end_mask_0, x = Vn_1_cast_fp16)[name = string("Va_1_cast_fp16")];
	tensor<int32, [4]> transpose_0_perm_0 = const()[name = string("transpose_0_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	tensor<int32, [4]> tile_0_reps_0 = const()[name = string("tile_0_reps_0"), val = tensor<int32, [4]>([8, 1, 1, 1])];
	tensor<fp16, [1, 1, 512, 256]> transpose_0_cast_fp16 = transpose(perm = transpose_0_perm_0, x = Ka_1_cast_fp16)[name = string("transpose_103")];
	tensor<fp16, [8, 1, 512, 256]> tile_0_cast_fp16 = tile(reps = tile_0_reps_0, x = transpose_0_cast_fp16)[name = string("tile_0_cast_fp16")];
	tensor<int32, [5]> concat_4 = const()[name = string("concat_4"), val = tensor<int32, [5]>([8, 1, 1, 512, 256])];
	tensor<fp16, [8, 1, 1, 512, 256]> reshape_0_cast_fp16 = reshape(shape = concat_4, x = tile_0_cast_fp16)[name = string("reshape_0_cast_fp16")];
	tensor<int32, [5]> transpose_1_perm_0 = const()[name = string("transpose_1_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])];
	tensor<int32, [4]> concat_5 = const()[name = string("concat_5"), val = tensor<int32, [4]>([-1, 1, 512, 256])];
	tensor<fp16, [1, 8, 1, 512, 256]> transpose_1_cast_fp16 = transpose(perm = transpose_1_perm_0, x = reshape_0_cast_fp16)[name = string("transpose_102")];
	tensor<fp16, [8, 1, 512, 256]> reshape_1_cast_fp16 = reshape(shape = concat_5, x = transpose_1_cast_fp16)[name = string("reshape_1_cast_fp16")];
	tensor<int32, [4]> transpose_48_perm_0 = const()[name = string("transpose_48_perm_0"), val = tensor<int32, [4]>([1, 0, -1, -2])];
	tensor<int32, [4]> transpose_2_perm_0 = const()[name = string("transpose_2_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	tensor<int32, [4]> tile_1_reps_0 = const()[name = string("tile_1_reps_0"), val = tensor<int32, [4]>([8, 1, 1, 1])];
	tensor<fp16, [1, 1, 512, 256]> transpose_2_cast_fp16 = transpose(perm = transpose_2_perm_0, x = Va_1_cast_fp16)[name = string("transpose_101")];
	tensor<fp16, [8, 1, 512, 256]> tile_1_cast_fp16 = tile(reps = tile_1_reps_0, x = transpose_2_cast_fp16)[name = string("tile_1_cast_fp16")];
	tensor<int32, [5]> concat_6 = const()[name = string("concat_6"), val = tensor<int32, [5]>([8, 1, 1, 512, 256])];
	tensor<fp16, [8, 1, 1, 512, 256]> reshape_2_cast_fp16 = reshape(shape = concat_6, x = tile_1_cast_fp16)[name = string("reshape_2_cast_fp16")];
	tensor<int32, [5]> transpose_3_perm_0 = const()[name = string("transpose_3_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])];
	tensor<int32, [4]> concat_7 = const()[name = string("concat_7"), val = tensor<int32, [4]>([-1, 1, 512, 256])];
	tensor<fp16, [1, 8, 1, 512, 256]> transpose_3_cast_fp16 = transpose(perm = transpose_3_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_100")];
	tensor<fp16, [8, 1, 512, 256]> reshape_3_cast_fp16 = reshape(shape = concat_7, x = transpose_3_cast_fp16)[name = string("reshape_3_cast_fp16")];
	tensor<int32, [4]> Ve_1_perm_0 = const()[name = string("Ve_1_perm_0"), val = tensor<int32, [4]>([1, 0, -2, -1])];
	bool var_931_transpose_x_0 = const()[name = string("op_931_transpose_x_0"), val = bool(false)];
	bool var_931_transpose_y_0 = const()[name = string("op_931_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 256, 512]> transpose_48_cast_fp16 = transpose(perm = transpose_48_perm_0, x = reshape_1_cast_fp16)[name = string("transpose_99")];
	tensor<fp16, [1, 8, 1, 512]> var_931_cast_fp16 = matmul(transpose_x = var_931_transpose_x_0, transpose_y = var_931_transpose_y_0, x = q_7_cast_fp16, y = transpose_48_cast_fp16)[name = string("op_931_cast_fp16")];
	tensor<fp16, [1, 8, 1, 512]> var_938_cast_fp16 = add(x = var_931_cast_fp16, y = causal_mask)[name = string("op_938_cast_fp16")];
	int32 var_939 = const()[name = string("op_939"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 512]> var_941_cast_fp16 = softmax(axis = var_939, x = var_938_cast_fp16)[name = string("op_941_cast_fp16")];
	bool var_957_transpose_x_0 = const()[name = string("op_957_transpose_x_0"), val = bool(false)];
	bool var_957_transpose_y_0 = const()[name = string("op_957_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 256]> Ve_1_cast_fp16 = transpose(perm = Ve_1_perm_0, x = reshape_3_cast_fp16)[name = string("transpose_98")];
	tensor<fp16, [1, 8, 1, 256]> var_957_cast_fp16 = matmul(transpose_x = var_957_transpose_x_0, transpose_y = var_957_transpose_y_0, x = var_941_cast_fp16, y = Ve_1_cast_fp16)[name = string("op_957_cast_fp16")];
	tensor<int32, [4]> var_967 = const()[name = string("op_967"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_974 = const()[name = string("op_974"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 8, 256]> var_968 = transpose(perm = var_967, x = var_957_cast_fp16)[name = string("transpose_97")];
	tensor<fp16, [1, 1, 2048]> var_975 = reshape(shape = var_974, x = var_968)[name = string("op_975")];
	tensor<int32, [3]> var_979 = const()[name = string("op_979"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1536, 2048, 1]> squeeze_0_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323546624))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325119552))))[name = string("squeeze_0_palettized")];
	string var_995_pad_type_0 = const()[name = string("op_995_pad_type_0"), val = string("valid")];
	int32 var_995_groups_0 = const()[name = string("op_995_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_995_strides_0 = const()[name = string("op_995_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_995_pad_0 = const()[name = string("op_995_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_995_dilations_0 = const()[name = string("op_995_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1, 2048, 1]> var_980 = transpose(perm = var_979, x = var_975)[name = string("transpose_96")];
	tensor<fp16, [1, 1536, 1]> var_995 = conv(dilations = var_995_dilations_0, groups = var_995_groups_0, pad = var_995_pad_0, pad_type = var_995_pad_type_0, strides = var_995_strides_0, weight = squeeze_0_palettized, x = var_980)[name = string("op_995")];
	tensor<int32, [3]> var_999 = const()[name = string("op_999"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_1005 = const()[name = string("op_1005"), val = int32(-1)];
	fp16 const_10_promoted_to_fp16 = const()[name = string("const_10_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_13 = transpose(perm = var_999, x = var_995)[name = string("transpose_95")];
	tensor<fp16, [1, 1, 1536]> var_1011_cast_fp16 = mul(x = x_13, y = const_10_promoted_to_fp16)[name = string("op_1011_cast_fp16")];
	bool input_15_interleave_0 = const()[name = string("input_15_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_15_cast_fp16 = concat(axis = var_1005, interleave = input_15_interleave_0, values = (x_13, var_1011_cast_fp16))[name = string("input_15_cast_fp16")];
	tensor<int32, [1]> normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1003_to_fp16 = const()[name = string("op_1003_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_1003_to_fp16, x = input_15_cast_fp16)[name = string("normed_13_cast_fp16")];
	tensor<int32, [2]> var_1016_split_sizes_0 = const()[name = string("op_1016_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1016_axis_0 = const()[name = string("op_1016_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_1016_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1016_cast_fp16_1 = split(axis = var_1016_axis_0, split_sizes = var_1016_split_sizes_0, x = normed_13_cast_fp16)[name = string("op_1016_cast_fp16")];
	tensor<fp16, [1536]> const_11_to_fp16 = const()[name = string("const_11_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325121152)))];
	tensor<fp16, [1, 1, 1536]> var_1019_cast_fp16 = mul(x = var_1016_cast_fp16_0, y = const_11_to_fp16)[name = string("op_1019_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> x_17_cast_fp16 = add(x = hidden_states, y = var_1019_cast_fp16)[name = string("x_17_cast_fp16")];
	int32 var_1027 = const()[name = string("op_1027"), val = int32(-1)];
	fp16 const_12_promoted_to_fp16 = const()[name = string("const_12_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_1033_cast_fp16 = mul(x = x_17_cast_fp16, y = const_12_promoted_to_fp16)[name = string("op_1033_cast_fp16")];
	bool input_17_interleave_0 = const()[name = string("input_17_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_17_cast_fp16 = concat(axis = var_1027, interleave = input_17_interleave_0, values = (x_17_cast_fp16, var_1033_cast_fp16))[name = string("input_17_cast_fp16")];
	tensor<int32, [1]> normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1025_to_fp16 = const()[name = string("op_1025_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_1025_to_fp16, x = input_17_cast_fp16)[name = string("normed_17_cast_fp16")];
	tensor<int32, [2]> var_1038_split_sizes_0 = const()[name = string("op_1038_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1038_axis_0 = const()[name = string("op_1038_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_1038_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1038_cast_fp16_1 = split(axis = var_1038_axis_0, split_sizes = var_1038_split_sizes_0, x = normed_17_cast_fp16)[name = string("op_1038_cast_fp16")];
	tensor<fp16, [1536]> const_13_to_fp16 = const()[name = string("const_13_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325124288)))];
	tensor<fp16, [1, 1, 1536]> var_1041_cast_fp16 = mul(x = var_1038_cast_fp16_0, y = const_13_to_fp16)[name = string("op_1041_cast_fp16")];
	tensor<int32, [3]> var_1051 = const()[name = string("op_1051"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_19_axes_0 = const()[name = string("input_19_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_1052 = transpose(perm = var_1051, x = var_1041_cast_fp16)[name = string("transpose_94")];
	tensor<fp16, [1, 1536, 1, 1]> input_19 = expand_dims(axes = input_19_axes_0, x = var_1052)[name = string("input_19")];
	string var_1065_pad_type_0 = const()[name = string("op_1065_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_1065_strides_0 = const()[name = string("op_1065_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_1065_pad_0 = const()[name = string("op_1065_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_1065_dilations_0 = const()[name = string("op_1065_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_1065_groups_0 = const()[name = string("op_1065_groups_0"), val = int32(1)];
	tensor<fp16, [1, 6144, 1, 1]> var_1065 = conv(dilations = var_1065_dilations_0, groups = var_1065_groups_0, pad = var_1065_pad_0, pad_type = var_1065_pad_type_0, strides = var_1065_strides_0, weight = layers_0_mlp_gate_proj_weight_palettized, x = input_19)[name = string("op_1065")];
	string var_1067_mode_0 = const()[name = string("op_1067_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 6144, 1, 1]> var_1067 = gelu(mode = var_1067_mode_0, x = var_1065)[name = string("op_1067")];
	string var_1078_pad_type_0 = const()[name = string("op_1078_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_1078_strides_0 = const()[name = string("op_1078_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_1078_pad_0 = const()[name = string("op_1078_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_1078_dilations_0 = const()[name = string("op_1078_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_1078_groups_0 = const()[name = string("op_1078_groups_0"), val = int32(1)];
	tensor<fp16, [1, 6144, 1, 1]> var_1078 = conv(dilations = var_1078_dilations_0, groups = var_1078_groups_0, pad = var_1078_pad_0, pad_type = var_1078_pad_type_0, strides = var_1078_strides_0, weight = layers_0_mlp_up_proj_weight_palettized, x = input_19)[name = string("op_1078")];
	tensor<fp16, [1, 6144, 1, 1]> input_21 = mul(x = var_1067, y = var_1078)[name = string("input_21")];
	string var_1090_pad_type_0 = const()[name = string("op_1090_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_1090_strides_0 = const()[name = string("op_1090_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_1090_pad_0 = const()[name = string("op_1090_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_1090_dilations_0 = const()[name = string("op_1090_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_1090_groups_0 = const()[name = string("op_1090_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 1]> var_1090 = conv(dilations = var_1090_dilations_0, groups = var_1090_groups_0, pad = var_1090_pad_0, pad_type = var_1090_pad_type_0, strides = var_1090_strides_0, weight = layers_0_mlp_down_proj_weight_palettized, x = input_21)[name = string("op_1090")];
	tensor<int32, [1]> var_1092_axes_0 = const()[name = string("op_1092_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_1092 = squeeze(axes = var_1092_axes_0, x = var_1090)[name = string("op_1092")];
	tensor<int32, [3]> var_1096 = const()[name = string("op_1096"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_1102 = const()[name = string("op_1102"), val = int32(-1)];
	fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_21 = transpose(perm = var_1096, x = var_1092)[name = string("transpose_93")];
	tensor<fp16, [1, 1, 1536]> var_1108_cast_fp16 = mul(x = x_21, y = const_14_promoted_to_fp16)[name = string("op_1108_cast_fp16")];
	bool input_23_interleave_0 = const()[name = string("input_23_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_23_cast_fp16 = concat(axis = var_1102, interleave = input_23_interleave_0, values = (x_21, var_1108_cast_fp16))[name = string("input_23_cast_fp16")];
	tensor<int32, [1]> normed_21_axes_0 = const()[name = string("normed_21_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1100_to_fp16 = const()[name = string("op_1100_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_21_cast_fp16 = layer_norm(axes = normed_21_axes_0, epsilon = var_1100_to_fp16, x = input_23_cast_fp16)[name = string("normed_21_cast_fp16")];
	tensor<int32, [2]> var_1113_split_sizes_0 = const()[name = string("op_1113_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1113_axis_0 = const()[name = string("op_1113_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_1113_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1113_cast_fp16_1 = split(axis = var_1113_axis_0, split_sizes = var_1113_split_sizes_0, x = normed_21_cast_fp16)[name = string("op_1113_cast_fp16")];
	tensor<fp16, [1536]> const_15_to_fp16 = const()[name = string("const_15_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325127424)))];
	tensor<fp16, [1, 1, 1536]> var_1116_cast_fp16 = mul(x = var_1113_cast_fp16_0, y = const_15_to_fp16)[name = string("op_1116_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_13_cast_fp16 = add(x = x_17_cast_fp16, y = var_1116_cast_fp16)[name = string("hidden_states_13_cast_fp16")];
	tensor<fp16, [256]> linear_0_bias_0 = const()[name = string("linear_0_bias_0"), val = tensor<fp16, [256]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325130560)))];
	tensor<fp16, [1, 1, 256]> var_1127 = linear(bias = linear_0_bias_0, weight = layers_0_per_layer_input_gate_weight_palettized, x = hidden_states_13_cast_fp16)[name = string("linear_0")];
	string gated_1_mode_0 = const()[name = string("gated_1_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 1, 256]> gated_1 = gelu(mode = gated_1_mode_0, x = var_1127)[name = string("gated_1")];
	tensor<int32, [3]> var_1144_begin_0 = const()[name = string("op_1144_begin_0"), val = tensor<int32, [3]>([0, 0, 3072])];
	tensor<int32, [3]> var_1144_end_0 = const()[name = string("op_1144_end_0"), val = tensor<int32, [3]>([1, 1, 3328])];
	tensor<bool, [3]> var_1144_end_mask_0 = const()[name = string("op_1144_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 1, 256]> var_1144_cast_fp16 = slice_by_index(begin = var_1144_begin_0, end = var_1144_end_0, end_mask = var_1144_end_mask_0, x = per_layer_combined)[name = string("op_1144_cast_fp16")];
	tensor<fp16, [1, 1, 256]> input_27_cast_fp16 = mul(x = gated_1, y = var_1144_cast_fp16)[name = string("input_27_cast_fp16")];
	tensor<fp16, [1536, 256]> layers_0_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325131136))), lut = tensor<fp16, [48, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325327808))))[name = string("layers_0_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1536]> linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325329408)))];
	tensor<fp16, [1, 1, 1536]> linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_27_cast_fp16)[name = string("linear_1_cast_fp16")];
	int32 var_1153 = const()[name = string("op_1153"), val = int32(-1)];
	fp16 const_16_promoted_to_fp16 = const()[name = string("const_16_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_1159_cast_fp16 = mul(x = linear_1_cast_fp16, y = const_16_promoted_to_fp16)[name = string("op_1159_cast_fp16")];
	bool input_29_interleave_0 = const()[name = string("input_29_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_29_cast_fp16 = concat(axis = var_1153, interleave = input_29_interleave_0, values = (linear_1_cast_fp16, var_1159_cast_fp16))[name = string("input_29_cast_fp16")];
	tensor<int32, [1]> normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1151_to_fp16 = const()[name = string("op_1151_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_1151_to_fp16, x = input_29_cast_fp16)[name = string("normed_25_cast_fp16")];
	tensor<int32, [2]> var_1164_split_sizes_0 = const()[name = string("op_1164_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1164_axis_0 = const()[name = string("op_1164_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_1164_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1164_cast_fp16_1 = split(axis = var_1164_axis_0, split_sizes = var_1164_split_sizes_0, x = normed_25_cast_fp16)[name = string("op_1164_cast_fp16")];
	tensor<fp16, [1536]> const_17_to_fp16 = const()[name = string("const_17_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325332544)))];
	tensor<fp16, [1, 1, 1536]> var_1167_cast_fp16 = mul(x = var_1164_cast_fp16_0, y = const_17_to_fp16)[name = string("op_1167_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_17 = add(x = hidden_states_13_cast_fp16, y = var_1167_cast_fp16)[name = string("hidden_states_17")];
	tensor<fp16, [1]> layers_0_layer_scalar_to_fp16 = const()[name = string("layers_0_layer_scalar_to_fp16"), val = tensor<fp16, [1]>([0x1.4cp-2])];
	tensor<fp16, [1, 1, 1536]> x_29_cast_fp16 = mul(x = hidden_states_17, y = layers_0_layer_scalar_to_fp16)[name = string("x_29_cast_fp16")];
	int32 var_1175 = const()[name = string("op_1175"), val = int32(-1)];
	fp16 const_18_promoted_to_fp16 = const()[name = string("const_18_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_1181_cast_fp16 = mul(x = x_29_cast_fp16, y = const_18_promoted_to_fp16)[name = string("op_1181_cast_fp16")];
	bool input_31_interleave_0 = const()[name = string("input_31_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_31_cast_fp16 = concat(axis = var_1175, interleave = input_31_interleave_0, values = (x_29_cast_fp16, var_1181_cast_fp16))[name = string("input_31_cast_fp16")];
	tensor<int32, [1]> normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1173_to_fp16 = const()[name = string("op_1173_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_1173_to_fp16, x = input_31_cast_fp16)[name = string("normed_29_cast_fp16")];
	tensor<int32, [2]> var_1186_split_sizes_0 = const()[name = string("op_1186_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1186_axis_0 = const()[name = string("op_1186_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_1186_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1186_cast_fp16_1 = split(axis = var_1186_axis_0, split_sizes = var_1186_split_sizes_0, x = normed_29_cast_fp16)[name = string("op_1186_cast_fp16")];
	tensor<fp16, [1536]> const_19_to_fp16 = const()[name = string("const_19_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325335680)))];
	tensor<fp16, [1, 1, 1536]> var_1189_cast_fp16 = mul(x = var_1186_cast_fp16_0, y = const_19_to_fp16)[name = string("op_1189_cast_fp16")];
	tensor<int32, [3]> var_1197 = const()[name = string("op_1197"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_1200_axes_0 = const()[name = string("op_1200_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_1198_cast_fp16 = transpose(perm = var_1197, x = var_1189_cast_fp16)[name = string("transpose_92")];
	tensor<fp16, [1, 1536, 1, 1]> var_1200_cast_fp16 = expand_dims(axes = var_1200_axes_0, x = var_1198_cast_fp16)[name = string("op_1200_cast_fp16")];
	string var_1216_pad_type_0 = const()[name = string("op_1216_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_1216_strides_0 = const()[name = string("op_1216_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_1216_pad_0 = const()[name = string("op_1216_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_1216_dilations_0 = const()[name = string("op_1216_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_1216_groups_0 = const()[name = string("op_1216_groups_0"), val = int32(1)];
	tensor<fp16, [1, 2048, 1, 1]> var_1216 = conv(dilations = var_1216_dilations_0, groups = var_1216_groups_0, pad = var_1216_pad_0, pad_type = var_1216_pad_type_0, strides = var_1216_strides_0, weight = layers_1_self_attn_q_proj_weight_palettized, x = var_1200_cast_fp16)[name = string("op_1216")];
	tensor<int32, [4]> var_1221 = const()[name = string("op_1221"), val = tensor<int32, [4]>([1, 8, 256, 1])];
	tensor<fp16, [1, 8, 256, 1]> var_1222 = reshape(shape = var_1221, x = var_1216)[name = string("op_1222")];
	tensor<int32, [4]> var_1227 = const()[name = string("op_1227"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	tensor<int32, [3]> var_1237 = const()[name = string("op_1237"), val = tensor<int32, [3]>([1, 8, 256])];
	tensor<fp16, [1, 8, 1, 256]> var_1228 = transpose(perm = var_1227, x = var_1222)[name = string("transpose_91")];
	tensor<fp16, [1, 8, 256]> x_33 = reshape(shape = var_1237, x = var_1228)[name = string("x_33")];
	int32 var_1243 = const()[name = string("op_1243"), val = int32(-1)];
	fp16 const_20_promoted_to_fp16 = const()[name = string("const_20_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 256]> var_1249_cast_fp16 = mul(x = x_33, y = const_20_promoted_to_fp16)[name = string("op_1249_cast_fp16")];
	bool input_35_interleave_0 = const()[name = string("input_35_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512]> input_35_cast_fp16 = concat(axis = var_1243, interleave = input_35_interleave_0, values = (x_33, var_1249_cast_fp16))[name = string("input_35_cast_fp16")];
	tensor<int32, [1]> normed_33_axes_0 = const()[name = string("normed_33_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1241_to_fp16 = const()[name = string("op_1241_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 8, 512]> normed_33_cast_fp16 = layer_norm(axes = normed_33_axes_0, epsilon = var_1241_to_fp16, x = input_35_cast_fp16)[name = string("normed_33_cast_fp16")];
	tensor<int32, [2]> var_1254_split_sizes_0 = const()[name = string("op_1254_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_1254_axis_0 = const()[name = string("op_1254_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 256]> var_1254_cast_fp16_0, tensor<fp16, [1, 8, 256]> var_1254_cast_fp16_1 = split(axis = var_1254_axis_0, split_sizes = var_1254_split_sizes_0, x = normed_33_cast_fp16)[name = string("op_1254_cast_fp16")];
	tensor<fp16, [256]> const_21_to_fp16 = const()[name = string("const_21_to_fp16"), val = tensor<fp16, [256]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325338816)))];
	tensor<fp16, [1, 8, 256]> var_1257_cast_fp16 = mul(x = var_1254_cast_fp16_0, y = const_21_to_fp16)[name = string("op_1257_cast_fp16")];
	tensor<int32, [4]> var_1263 = const()[name = string("op_1263"), val = tensor<int32, [4]>([1, 8, 1, 256])];
	tensor<fp16, [1, 8, 1, 256]> q_11 = reshape(shape = var_1263, x = var_1257_cast_fp16)[name = string("q_11")];
	tensor<fp16, [1, 8, 1, 256]> var_1265_cast_fp16 = mul(x = q_11, y = cos_s)[name = string("op_1265_cast_fp16")];
	tensor<int32, [2]> var_1266_split_sizes_0 = const()[name = string("op_1266_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_1266_axis_0 = const()[name = string("op_1266_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 128]> var_1266_0, tensor<fp16, [1, 8, 1, 128]> var_1266_1 = split(axis = var_1266_axis_0, split_sizes = var_1266_split_sizes_0, x = q_11)[name = string("op_1266")];
	fp16 const_22_promoted = const()[name = string("const_22_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 128]> var_1268 = mul(x = var_1266_1, y = const_22_promoted)[name = string("op_1268")];
	int32 var_1270 = const()[name = string("op_1270"), val = int32(-1)];
	bool var_1271_interleave_0 = const()[name = string("op_1271_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> var_1271 = concat(axis = var_1270, interleave = var_1271_interleave_0, values = (var_1268, var_1266_0))[name = string("op_1271")];
	tensor<fp16, [1, 8, 1, 256]> var_1272_cast_fp16 = mul(x = var_1271, y = sin_s)[name = string("op_1272_cast_fp16")];
	tensor<fp16, [1, 8, 1, 256]> q_15_cast_fp16 = add(x = var_1265_cast_fp16, y = var_1272_cast_fp16)[name = string("q_15_cast_fp16")];
	string var_1285_pad_type_0 = const()[name = string("op_1285_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_1285_strides_0 = const()[name = string("op_1285_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_1285_pad_0 = const()[name = string("op_1285_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_1285_dilations_0 = const()[name = string("op_1285_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_1285_groups_0 = const()[name = string("op_1285_groups_0"), val = int32(1)];
	tensor<fp16, [1, 256, 1, 1]> var_1285 = conv(dilations = var_1285_dilations_0, groups = var_1285_groups_0, pad = var_1285_pad_0, pad_type = var_1285_pad_type_0, strides = var_1285_strides_0, weight = layers_1_self_attn_k_proj_weight_palettized, x = var_1200_cast_fp16)[name = string("op_1285")];
	tensor<int32, [4]> var_1290 = const()[name = string("op_1290"), val = tensor<int32, [4]>([1, 1, 256, 1])];
	tensor<fp16, [1, 1, 256, 1]> var_1291 = reshape(shape = var_1290, x = var_1285)[name = string("op_1291")];
	tensor<int32, [4]> var_1296 = const()[name = string("op_1296"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	string var_1313_pad_type_0 = const()[name = string("op_1313_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_1313_strides_0 = const()[name = string("op_1313_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_1313_pad_0 = const()[name = string("op_1313_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_1313_dilations_0 = const()[name = string("op_1313_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_1313_groups_0 = const()[name = string("op_1313_groups_0"), val = int32(1)];
	tensor<fp16, [1, 256, 1, 1]> var_1313 = conv(dilations = var_1313_dilations_0, groups = var_1313_groups_0, pad = var_1313_pad_0, pad_type = var_1313_pad_type_0, strides = var_1313_strides_0, weight = layers_1_self_attn_v_proj_weight_palettized, x = var_1200_cast_fp16)[name = string("op_1313")];
	tensor<int32, [4]> var_1318 = const()[name = string("op_1318"), val = tensor<int32, [4]>([1, 1, 256, 1])];
	tensor<fp16, [1, 1, 256, 1]> var_1319 = reshape(shape = var_1318, x = var_1313)[name = string("op_1319")];
	tensor<int32, [4]> var_1324 = const()[name = string("op_1324"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	tensor<int32, [3]> var_1334 = const()[name = string("op_1334"), val = tensor<int32, [3]>([1, 1, 256])];
	tensor<fp16, [1, 1, 1, 256]> var_1297 = transpose(perm = var_1296, x = var_1291)[name = string("transpose_90")];
	tensor<fp16, [1, 1, 256]> x_37 = reshape(shape = var_1334, x = var_1297)[name = string("x_37")];
	int32 var_1340 = const()[name = string("op_1340"), val = int32(-1)];
	fp16 const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 256]> var_1346_cast_fp16 = mul(x = x_37, y = const_23_promoted_to_fp16)[name = string("op_1346_cast_fp16")];
	bool input_37_interleave_0 = const()[name = string("input_37_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 512]> input_37_cast_fp16 = concat(axis = var_1340, interleave = input_37_interleave_0, values = (x_37, var_1346_cast_fp16))[name = string("input_37_cast_fp16")];
	tensor<int32, [1]> normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1338_to_fp16 = const()[name = string("op_1338_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 512]> normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_1338_to_fp16, x = input_37_cast_fp16)[name = string("normed_37_cast_fp16")];
	tensor<int32, [2]> var_1351_split_sizes_0 = const()[name = string("op_1351_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_1351_axis_0 = const()[name = string("op_1351_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 256]> var_1351_cast_fp16_0, tensor<fp16, [1, 1, 256]> var_1351_cast_fp16_1 = split(axis = var_1351_axis_0, split_sizes = var_1351_split_sizes_0, x = normed_37_cast_fp16)[name = string("op_1351_cast_fp16")];
	tensor<fp16, [256]> const_24_to_fp16 = const()[name = string("const_24_to_fp16"), val = tensor<fp16, [256]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325339392)))];
	tensor<fp16, [1, 1, 256]> var_1354_cast_fp16 = mul(x = var_1351_cast_fp16_0, y = const_24_to_fp16)[name = string("op_1354_cast_fp16")];
	tensor<int32, [4]> var_1360 = const()[name = string("op_1360"), val = tensor<int32, [4]>([1, 1, 1, 256])];
	tensor<fp16, [1, 1, 1, 256]> q_13 = reshape(shape = var_1360, x = var_1354_cast_fp16)[name = string("q_13")];
	fp16 var_1367_promoted_to_fp16 = const()[name = string("op_1367_promoted_to_fp16"), val = fp16(0x1p+1)];
	tensor<fp16, [1, 1, 1, 256]> var_1325 = transpose(perm = var_1324, x = var_1319)[name = string("transpose_89")];
	tensor<fp16, [1, 1, 1, 256]> var_1368_cast_fp16 = pow(x = var_1325, y = var_1367_promoted_to_fp16)[name = string("op_1368_cast_fp16")];
	tensor<int32, [1]> var_1373_axes_0 = const()[name = string("op_1373_axes_0"), val = tensor<int32, [1]>([-1])];
	bool var_1373_keep_dims_0 = const()[name = string("op_1373_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 1, 1, 1]> var_1373_cast_fp16 = reduce_mean(axes = var_1373_axes_0, keep_dims = var_1373_keep_dims_0, x = var_1368_cast_fp16)[name = string("op_1373_cast_fp16")];
	fp16 var_1375_to_fp16 = const()[name = string("op_1375_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 1, 1]> mean_sq_3_cast_fp16 = add(x = var_1373_cast_fp16, y = var_1375_to_fp16)[name = string("mean_sq_3_cast_fp16")];
	fp16 var_1382_to_fp16 = const()[name = string("op_1382_to_fp16"), val = fp16(-0x1p-1)];
	tensor<fp16, [1, 1, 1, 1]> var_1383_cast_fp16 = pow(x = mean_sq_3_cast_fp16, y = var_1382_to_fp16)[name = string("op_1383_cast_fp16")];
	tensor<fp16, [1, 1, 1, 256]> var_1384_cast_fp16 = mul(x = var_1325, y = var_1383_cast_fp16)[name = string("op_1384_cast_fp16")];
	tensor<fp16, [1, 1, 1, 256]> var_1390_cast_fp16 = mul(x = q_13, y = cos_s)[name = string("op_1390_cast_fp16")];
	tensor<int32, [2]> var_1391_split_sizes_0 = const()[name = string("op_1391_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_1391_axis_0 = const()[name = string("op_1391_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1, 128]> var_1391_0, tensor<fp16, [1, 1, 1, 128]> var_1391_1 = split(axis = var_1391_axis_0, split_sizes = var_1391_split_sizes_0, x = q_13)[name = string("op_1391")];
	fp16 const_25_promoted = const()[name = string("const_25_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1, 128]> var_1393 = mul(x = var_1391_1, y = const_25_promoted)[name = string("op_1393")];
	int32 var_1395 = const()[name = string("op_1395"), val = int32(-1)];
	bool var_1396_interleave_0 = const()[name = string("op_1396_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 1, 256]> var_1396 = concat(axis = var_1395, interleave = var_1396_interleave_0, values = (var_1393, var_1391_0))[name = string("op_1396")];
	tensor<fp16, [1, 1, 1, 256]> var_1397_cast_fp16 = mul(x = var_1396, y = sin_s)[name = string("op_1397_cast_fp16")];
	tensor<fp16, [1, 1, 1, 256]> input_39_cast_fp16 = add(x = var_1390_cast_fp16, y = var_1397_cast_fp16)[name = string("input_39_cast_fp16")];
	tensor<int32, [4]> var_1402_begin_0 = const()[name = string("op_1402_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
	tensor<int32, [4]> var_1402_end_0 = const()[name = string("op_1402_end_0"), val = tensor<int32, [4]>([2, 1, 512, 512])];
	tensor<bool, [4]> var_1402_end_mask_0 = const()[name = string("op_1402_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> var_1402_squeeze_mask_0 = const()[name = string("op_1402_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [1, 512, 512]> var_1402_cast_fp16 = slice_by_index(begin = var_1402_begin_0, end = var_1402_end_0, end_mask = var_1402_end_mask_0, squeeze_mask = var_1402_squeeze_mask_0, x = coreml_update_state_7)[name = string("op_1402_cast_fp16")];
	tensor<int32, [1]> Kc_3_axes_0 = const()[name = string("Kc_3_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 1, 512, 512]> Kc_3_cast_fp16 = expand_dims(axes = Kc_3_axes_0, x = var_1402_cast_fp16)[name = string("Kc_3_cast_fp16")];
	tensor<int32, [4]> var_1407_begin_0 = const()[name = string("op_1407_begin_0"), val = tensor<int32, [4]>([13, 0, 0, 0])];
	tensor<int32, [4]> var_1407_end_0 = const()[name = string("op_1407_end_0"), val = tensor<int32, [4]>([14, 1, 512, 512])];
	tensor<bool, [4]> var_1407_end_mask_0 = const()[name = string("op_1407_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> var_1407_squeeze_mask_0 = const()[name = string("op_1407_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [1, 512, 512]> var_1407_cast_fp16 = slice_by_index(begin = var_1407_begin_0, end = var_1407_end_0, end_mask = var_1407_end_mask_0, squeeze_mask = var_1407_squeeze_mask_0, x = coreml_update_state_7)[name = string("op_1407_cast_fp16")];
	tensor<int32, [1]> Vc_3_axes_0 = const()[name = string("Vc_3_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 1, 512, 512]> Vc_3_cast_fp16 = expand_dims(axes = Vc_3_axes_0, x = var_1407_cast_fp16)[name = string("Vc_3_cast_fp16")];
	tensor<int32, [8]> kp_pad_0 = const()[name = string("kp_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 0, 256])];
	string kp_mode_0 = const()[name = string("kp_mode_0"), val = string("constant")];
	fp16 const_26_to_fp16 = const()[name = string("const_26_to_fp16"), val = fp16(0x0p+0)];
	tensor<fp16, [1, 1, 1, 512]> kp_cast_fp16 = pad(constant_val = const_26_to_fp16, mode = kp_mode_0, pad = kp_pad_0, x = input_39_cast_fp16)[name = string("kp_cast_fp16")];
	tensor<int32, [8]> vp_pad_0 = const()[name = string("vp_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 0, 256])];
	string vp_mode_0 = const()[name = string("vp_mode_0"), val = string("constant")];
	fp16 const_27_to_fp16 = const()[name = string("const_27_to_fp16"), val = fp16(0x0p+0)];
	tensor<fp16, [1, 1, 1, 512]> vp_cast_fp16 = pad(constant_val = const_27_to_fp16, mode = vp_mode_0, pad = vp_pad_0, x = var_1384_cast_fp16)[name = string("vp_cast_fp16")];
	tensor<fp16, [1, 1, 512, 512]> var_1425_cast_fp16 = mul(x = Kc_3_cast_fp16, y = var_872_cast_fp16)[name = string("op_1425_cast_fp16")];
	tensor<int32, [4]> var_1426_reps_0 = const()[name = string("op_1426_reps_0"), val = tensor<int32, [4]>([1, 1, 512, 1])];
	tensor<fp16, [1, 1, 512, 512]> var_1426_cast_fp16 = tile(reps = var_1426_reps_0, x = kp_cast_fp16)[name = string("op_1426_cast_fp16")];
	tensor<fp16, [1, 1, 512, 512]> var_1427_cast_fp16 = mul(x = var_1426_cast_fp16, y = update_mask)[name = string("op_1427_cast_fp16")];
	tensor<fp16, [1, 1, 512, 512]> Kn_3_cast_fp16 = add(x = var_1425_cast_fp16, y = var_1427_cast_fp16)[name = string("Kn_3_cast_fp16")];
	tensor<fp16, [1, 1, 512, 512]> var_1433_cast_fp16 = mul(x = Vc_3_cast_fp16, y = var_872_cast_fp16)[name = string("op_1433_cast_fp16")];
	tensor<int32, [4]> var_1434_reps_0 = const()[name = string("op_1434_reps_0"), val = tensor<int32, [4]>([1, 1, 512, 1])];
	tensor<fp16, [1, 1, 512, 512]> var_1434_cast_fp16 = tile(reps = var_1434_reps_0, x = vp_cast_fp16)[name = string("op_1434_cast_fp16")];
	tensor<fp16, [1, 1, 512, 512]> var_1435_cast_fp16 = mul(x = var_1434_cast_fp16, y = update_mask)[name = string("op_1435_cast_fp16")];
	tensor<fp16, [1, 1, 512, 512]> Vn_3_cast_fp16 = add(x = var_1433_cast_fp16, y = var_1435_cast_fp16)[name = string("Vn_3_cast_fp16")];
	tensor<int32, [1]> var_1439_axes_0 = const()[name = string("op_1439_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 512, 512]> var_1439_cast_fp16 = squeeze(axes = var_1439_axes_0, x = Kn_3_cast_fp16)[name = string("op_1439_cast_fp16")];
	tensor<int32, [4]> concat_8 = const()[name = string("concat_8"), val = tensor<int32, [4]>([1, 0, 0, 0])];
	tensor<int32, [4]> concat_9 = const()[name = string("concat_9"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [4]> kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [24, 1, 512, 512]> kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_8, begin_mask = kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_9, end_mask = kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_3_stride_0, update = var_1439_cast_fp16, x = coreml_update_state_7)[name = string("kv_cache_0_internal_tensor_assign_3_cast_fp16")];
	write_state(data = kv_cache_0_internal_tensor_assign_3_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_8_write_state")];
	tensor<fp16, [24, 1, 512, 512]> coreml_update_state_8 = read_state(input = kv_cache_0)[name = string("coreml_update_state_8")];
	tensor<int32, [1]> var_1446_axes_0 = const()[name = string("op_1446_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 512, 512]> var_1446_cast_fp16 = squeeze(axes = var_1446_axes_0, x = Vn_3_cast_fp16)[name = string("op_1446_cast_fp16")];
	tensor<int32, [4]> concat_10 = const()[name = string("concat_10"), val = tensor<int32, [4]>([13, 0, 0, 0])];
	tensor<int32, [4]> concat_11 = const()[name = string("concat_11"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [4]> kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [24, 1, 512, 512]> kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_10, begin_mask = kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_11, end_mask = kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_4_stride_0, update = var_1446_cast_fp16, x = coreml_update_state_8)[name = string("kv_cache_0_internal_tensor_assign_4_cast_fp16")];
	write_state(data = kv_cache_0_internal_tensor_assign_4_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_9_write_state")];
	tensor<fp16, [24, 1, 512, 512]> coreml_update_state_9 = read_state(input = kv_cache_0)[name = string("coreml_update_state_9")];
	tensor<int32, [4]> Ka_3_begin_0 = const()[name = string("Ka_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [4]> Ka_3_end_0 = const()[name = string("Ka_3_end_0"), val = tensor<int32, [4]>([1, 1, 512, 256])];
	tensor<bool, [4]> Ka_3_end_mask_0 = const()[name = string("Ka_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
	tensor<fp16, [1, 1, 512, 256]> kv13_k = slice_by_index(begin = Ka_3_begin_0, end = Ka_3_end_0, end_mask = Ka_3_end_mask_0, x = Kn_3_cast_fp16)[name = string("Ka_3_cast_fp16")];
	tensor<int32, [4]> Va_3_begin_0 = const()[name = string("Va_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [4]> Va_3_end_0 = const()[name = string("Va_3_end_0"), val = tensor<int32, [4]>([1, 1, 512, 256])];
	tensor<bool, [4]> Va_3_end_mask_0 = const()[name = string("Va_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
	tensor<fp16, [1, 1, 512, 256]> kv13_v = slice_by_index(begin = Va_3_begin_0, end = Va_3_end_0, end_mask = Va_3_end_mask_0, x = Vn_3_cast_fp16)[name = string("Va_3_cast_fp16")];
	tensor<int32, [4]> transpose_4_perm_0 = const()[name = string("transpose_4_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	tensor<int32, [4]> tile_2_reps_0 = const()[name = string("tile_2_reps_0"), val = tensor<int32, [4]>([8, 1, 1, 1])];
	tensor<fp16, [1, 1, 512, 256]> transpose_4_cast_fp16 = transpose(perm = transpose_4_perm_0, x = kv13_k)[name = string("transpose_88")];
	tensor<fp16, [8, 1, 512, 256]> tile_2_cast_fp16 = tile(reps = tile_2_reps_0, x = transpose_4_cast_fp16)[name = string("tile_2_cast_fp16")];
	tensor<int32, [5]> concat_12 = const()[name = string("concat_12"), val = tensor<int32, [5]>([8, 1, 1, 512, 256])];
	tensor<fp16, [8, 1, 1, 512, 256]> reshape_4_cast_fp16 = reshape(shape = concat_12, x = tile_2_cast_fp16)[name = string("reshape_4_cast_fp16")];
	tensor<int32, [5]> transpose_5_perm_0 = const()[name = string("transpose_5_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])];
	tensor<int32, [4]> concat_13 = const()[name = string("concat_13"), val = tensor<int32, [4]>([-1, 1, 512, 256])];
	tensor<fp16, [1, 8, 1, 512, 256]> transpose_5_cast_fp16 = transpose(perm = transpose_5_perm_0, x = reshape_4_cast_fp16)[name = string("transpose_87")];
	tensor<fp16, [8, 1, 512, 256]> reshape_5_cast_fp16 = reshape(shape = concat_13, x = transpose_5_cast_fp16)[name = string("reshape_5_cast_fp16")];
	tensor<int32, [4]> transpose_49_perm_0 = const()[name = string("transpose_49_perm_0"), val = tensor<int32, [4]>([1, 0, -1, -2])];
	tensor<int32, [4]> transpose_6_perm_0 = const()[name = string("transpose_6_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	tensor<int32, [4]> tile_3_reps_0 = const()[name = string("tile_3_reps_0"), val = tensor<int32, [4]>([8, 1, 1, 1])];
	tensor<fp16, [1, 1, 512, 256]> transpose_6_cast_fp16 = transpose(perm = transpose_6_perm_0, x = kv13_v)[name = string("transpose_86")];
	tensor<fp16, [8, 1, 512, 256]> tile_3_cast_fp16 = tile(reps = tile_3_reps_0, x = transpose_6_cast_fp16)[name = string("tile_3_cast_fp16")];
	tensor<int32, [5]> concat_14 = const()[name = string("concat_14"), val = tensor<int32, [5]>([8, 1, 1, 512, 256])];
	tensor<fp16, [8, 1, 1, 512, 256]> reshape_6_cast_fp16 = reshape(shape = concat_14, x = tile_3_cast_fp16)[name = string("reshape_6_cast_fp16")];
	tensor<int32, [5]> transpose_7_perm_0 = const()[name = string("transpose_7_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])];
	tensor<int32, [4]> concat_15 = const()[name = string("concat_15"), val = tensor<int32, [4]>([-1, 1, 512, 256])];
	tensor<fp16, [1, 8, 1, 512, 256]> transpose_7_cast_fp16 = transpose(perm = transpose_7_perm_0, x = reshape_6_cast_fp16)[name = string("transpose_85")];
	tensor<fp16, [8, 1, 512, 256]> reshape_7_cast_fp16 = reshape(shape = concat_15, x = transpose_7_cast_fp16)[name = string("reshape_7_cast_fp16")];
	tensor<int32, [4]> Ve_3_perm_0 = const()[name = string("Ve_3_perm_0"), val = tensor<int32, [4]>([1, 0, -2, -1])];
	bool var_1493_transpose_x_0 = const()[name = string("op_1493_transpose_x_0"), val = bool(false)];
	bool var_1493_transpose_y_0 = const()[name = string("op_1493_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 256, 512]> transpose_49_cast_fp16 = transpose(perm = transpose_49_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_84")];
	tensor<fp16, [1, 8, 1, 512]> var_1493_cast_fp16 = matmul(transpose_x = var_1493_transpose_x_0, transpose_y = var_1493_transpose_y_0, x = q_15_cast_fp16, y = transpose_49_cast_fp16)[name = string("op_1493_cast_fp16")];
	tensor<fp16, [1, 8, 1, 512]> var_1500_cast_fp16 = add(x = var_1493_cast_fp16, y = causal_mask)[name = string("op_1500_cast_fp16")];
	int32 var_1501 = const()[name = string("op_1501"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 512]> var_1503_cast_fp16 = softmax(axis = var_1501, x = var_1500_cast_fp16)[name = string("op_1503_cast_fp16")];
	bool var_1519_transpose_x_0 = const()[name = string("op_1519_transpose_x_0"), val = bool(false)];
	bool var_1519_transpose_y_0 = const()[name = string("op_1519_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 256]> Ve_3_cast_fp16 = transpose(perm = Ve_3_perm_0, x = reshape_7_cast_fp16)[name = string("transpose_83")];
	tensor<fp16, [1, 8, 1, 256]> var_1519_cast_fp16 = matmul(transpose_x = var_1519_transpose_x_0, transpose_y = var_1519_transpose_y_0, x = var_1503_cast_fp16, y = Ve_3_cast_fp16)[name = string("op_1519_cast_fp16")];
	tensor<int32, [4]> var_1529 = const()[name = string("op_1529"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_1536 = const()[name = string("op_1536"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 8, 256]> var_1530 = transpose(perm = var_1529, x = var_1519_cast_fp16)[name = string("transpose_82")];
	tensor<fp16, [1, 1, 2048]> var_1537 = reshape(shape = var_1536, x = var_1530)[name = string("op_1537")];
	tensor<int32, [3]> var_1541 = const()[name = string("op_1541"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1536, 2048, 1]> squeeze_1_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325339968))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326912896))))[name = string("squeeze_1_palettized")];
	string var_1557_pad_type_0 = const()[name = string("op_1557_pad_type_0"), val = string("valid")];
	int32 var_1557_groups_0 = const()[name = string("op_1557_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_1557_strides_0 = const()[name = string("op_1557_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_1557_pad_0 = const()[name = string("op_1557_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_1557_dilations_0 = const()[name = string("op_1557_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1, 2048, 1]> var_1542 = transpose(perm = var_1541, x = var_1537)[name = string("transpose_81")];
	tensor<fp16, [1, 1536, 1]> var_1557 = conv(dilations = var_1557_dilations_0, groups = var_1557_groups_0, pad = var_1557_pad_0, pad_type = var_1557_pad_type_0, strides = var_1557_strides_0, weight = squeeze_1_palettized, x = var_1542)[name = string("op_1557")];
	tensor<int32, [3]> var_1561 = const()[name = string("op_1561"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_1567 = const()[name = string("op_1567"), val = int32(-1)];
	fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_43 = transpose(perm = var_1561, x = var_1557)[name = string("transpose_80")];
	tensor<fp16, [1, 1, 1536]> var_1573_cast_fp16 = mul(x = x_43, y = const_28_promoted_to_fp16)[name = string("op_1573_cast_fp16")];
	bool input_45_interleave_0 = const()[name = string("input_45_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_45_cast_fp16 = concat(axis = var_1567, interleave = input_45_interleave_0, values = (x_43, var_1573_cast_fp16))[name = string("input_45_cast_fp16")];
	tensor<int32, [1]> normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1565_to_fp16 = const()[name = string("op_1565_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_1565_to_fp16, x = input_45_cast_fp16)[name = string("normed_41_cast_fp16")];
	tensor<int32, [2]> var_1578_split_sizes_0 = const()[name = string("op_1578_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1578_axis_0 = const()[name = string("op_1578_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_1578_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1578_cast_fp16_1 = split(axis = var_1578_axis_0, split_sizes = var_1578_split_sizes_0, x = normed_41_cast_fp16)[name = string("op_1578_cast_fp16")];
	tensor<fp16, [1536]> const_29_to_fp16 = const()[name = string("const_29_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326914496)))];
	tensor<fp16, [1, 1, 1536]> var_1581_cast_fp16 = mul(x = var_1578_cast_fp16_0, y = const_29_to_fp16)[name = string("op_1581_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> x_47_cast_fp16 = add(x = x_29_cast_fp16, y = var_1581_cast_fp16)[name = string("x_47_cast_fp16")];
	int32 var_1588 = const()[name = string("op_1588"), val = int32(-1)];
	fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_1594_cast_fp16 = mul(x = x_47_cast_fp16, y = const_30_promoted_to_fp16)[name = string("op_1594_cast_fp16")];
	bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_47_cast_fp16 = concat(axis = var_1588, interleave = input_47_interleave_0, values = (x_47_cast_fp16, var_1594_cast_fp16))[name = string("input_47_cast_fp16")];
	tensor<int32, [1]> normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1586_to_fp16 = const()[name = string("op_1586_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_1586_to_fp16, x = input_47_cast_fp16)[name = string("normed_45_cast_fp16")];
	tensor<int32, [2]> var_1599_split_sizes_0 = const()[name = string("op_1599_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1599_axis_0 = const()[name = string("op_1599_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_1599_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1599_cast_fp16_1 = split(axis = var_1599_axis_0, split_sizes = var_1599_split_sizes_0, x = normed_45_cast_fp16)[name = string("op_1599_cast_fp16")];
	tensor<fp16, [1536]> const_31_to_fp16 = const()[name = string("const_31_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326917632)))];
	tensor<fp16, [1, 1, 1536]> var_1602_cast_fp16 = mul(x = var_1599_cast_fp16_0, y = const_31_to_fp16)[name = string("op_1602_cast_fp16")];
	tensor<int32, [3]> var_1615 = const()[name = string("op_1615"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_49_axes_0 = const()[name = string("input_49_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_1616 = transpose(perm = var_1615, x = var_1602_cast_fp16)[name = string("transpose_79")];
	tensor<fp16, [1, 1536, 1, 1]> input_49 = expand_dims(axes = input_49_axes_0, x = var_1616)[name = string("input_49")];
	string var_1629_pad_type_0 = const()[name = string("op_1629_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_1629_strides_0 = const()[name = string("op_1629_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_1629_pad_0 = const()[name = string("op_1629_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_1629_dilations_0 = const()[name = string("op_1629_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_1629_groups_0 = const()[name = string("op_1629_groups_0"), val = int32(1)];
	tensor<fp16, [1, 6144, 1, 1]> var_1629 = conv(dilations = var_1629_dilations_0, groups = var_1629_groups_0, pad = var_1629_pad_0, pad_type = var_1629_pad_type_0, strides = var_1629_strides_0, weight = layers_1_mlp_gate_proj_weight_palettized, x = input_49)[name = string("op_1629")];
	string var_1631_mode_0 = const()[name = string("op_1631_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 6144, 1, 1]> var_1631 = gelu(mode = var_1631_mode_0, x = var_1629)[name = string("op_1631")];
	string var_1642_pad_type_0 = const()[name = string("op_1642_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_1642_strides_0 = const()[name = string("op_1642_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_1642_pad_0 = const()[name = string("op_1642_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_1642_dilations_0 = const()[name = string("op_1642_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_1642_groups_0 = const()[name = string("op_1642_groups_0"), val = int32(1)];
	tensor<fp16, [1, 6144, 1, 1]> var_1642 = conv(dilations = var_1642_dilations_0, groups = var_1642_groups_0, pad = var_1642_pad_0, pad_type = var_1642_pad_type_0, strides = var_1642_strides_0, weight = layers_1_mlp_up_proj_weight_palettized, x = input_49)[name = string("op_1642")];
	tensor<fp16, [1, 6144, 1, 1]> input_51 = mul(x = var_1631, y = var_1642)[name = string("input_51")];
	string var_1654_pad_type_0 = const()[name = string("op_1654_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_1654_strides_0 = const()[name = string("op_1654_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_1654_pad_0 = const()[name = string("op_1654_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_1654_dilations_0 = const()[name = string("op_1654_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_1654_groups_0 = const()[name = string("op_1654_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 1]> var_1654 = conv(dilations = var_1654_dilations_0, groups = var_1654_groups_0, pad = var_1654_pad_0, pad_type = var_1654_pad_type_0, strides = var_1654_strides_0, weight = layers_1_mlp_down_proj_weight_palettized, x = input_51)[name = string("op_1654")];
	tensor<int32, [1]> var_1656_axes_0 = const()[name = string("op_1656_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_1656 = squeeze(axes = var_1656_axes_0, x = var_1654)[name = string("op_1656")];
	tensor<int32, [3]> var_1660 = const()[name = string("op_1660"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_1666 = const()[name = string("op_1666"), val = int32(-1)];
	fp16 const_32_promoted_to_fp16 = const()[name = string("const_32_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_51 = transpose(perm = var_1660, x = var_1656)[name = string("transpose_78")];
	tensor<fp16, [1, 1, 1536]> var_1672_cast_fp16 = mul(x = x_51, y = const_32_promoted_to_fp16)[name = string("op_1672_cast_fp16")];
	bool input_53_interleave_0 = const()[name = string("input_53_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_53_cast_fp16 = concat(axis = var_1666, interleave = input_53_interleave_0, values = (x_51, var_1672_cast_fp16))[name = string("input_53_cast_fp16")];
	tensor<int32, [1]> normed_49_axes_0 = const()[name = string("normed_49_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1664_to_fp16 = const()[name = string("op_1664_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_49_cast_fp16 = layer_norm(axes = normed_49_axes_0, epsilon = var_1664_to_fp16, x = input_53_cast_fp16)[name = string("normed_49_cast_fp16")];
	tensor<int32, [2]> var_1677_split_sizes_0 = const()[name = string("op_1677_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1677_axis_0 = const()[name = string("op_1677_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_1677_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1677_cast_fp16_1 = split(axis = var_1677_axis_0, split_sizes = var_1677_split_sizes_0, x = normed_49_cast_fp16)[name = string("op_1677_cast_fp16")];
	tensor<fp16, [1536]> const_33_to_fp16 = const()[name = string("const_33_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326920768)))];
	tensor<fp16, [1, 1, 1536]> var_1680_cast_fp16 = mul(x = var_1677_cast_fp16_0, y = const_33_to_fp16)[name = string("op_1680_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_27_cast_fp16 = add(x = x_47_cast_fp16, y = var_1680_cast_fp16)[name = string("hidden_states_27_cast_fp16")];
	tensor<fp16, [1, 1, 256]> var_1691 = linear(bias = linear_0_bias_0, weight = layers_1_per_layer_input_gate_weight_palettized, x = hidden_states_27_cast_fp16)[name = string("linear_2")];
	string gated_3_mode_0 = const()[name = string("gated_3_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 1, 256]> gated_3 = gelu(mode = gated_3_mode_0, x = var_1691)[name = string("gated_3")];
	tensor<int32, [3]> var_1708_begin_0 = const()[name = string("op_1708_begin_0"), val = tensor<int32, [3]>([0, 0, 3328])];
	tensor<int32, [3]> var_1708_end_0 = const()[name = string("op_1708_end_0"), val = tensor<int32, [3]>([1, 1, 3584])];
	tensor<bool, [3]> var_1708_end_mask_0 = const()[name = string("op_1708_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 1, 256]> var_1708_cast_fp16 = slice_by_index(begin = var_1708_begin_0, end = var_1708_end_0, end_mask = var_1708_end_mask_0, x = per_layer_combined)[name = string("op_1708_cast_fp16")];
	tensor<fp16, [1, 1, 256]> input_57_cast_fp16 = mul(x = gated_3, y = var_1708_cast_fp16)[name = string("input_57_cast_fp16")];
	tensor<fp16, [1536, 256]> layers_1_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326923904))), lut = tensor<fp16, [48, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327120576))))[name = string("layers_1_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1, 1536]> linear_3_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_57_cast_fp16)[name = string("linear_3_cast_fp16")];
	int32 var_1717 = const()[name = string("op_1717"), val = int32(-1)];
	fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_1723_cast_fp16 = mul(x = linear_3_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_1723_cast_fp16")];
	bool input_59_interleave_0 = const()[name = string("input_59_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_59_cast_fp16 = concat(axis = var_1717, interleave = input_59_interleave_0, values = (linear_3_cast_fp16, var_1723_cast_fp16))[name = string("input_59_cast_fp16")];
	tensor<int32, [1]> normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1715_to_fp16 = const()[name = string("op_1715_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_1715_to_fp16, x = input_59_cast_fp16)[name = string("normed_53_cast_fp16")];
	tensor<int32, [2]> var_1728_split_sizes_0 = const()[name = string("op_1728_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1728_axis_0 = const()[name = string("op_1728_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_1728_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1728_cast_fp16_1 = split(axis = var_1728_axis_0, split_sizes = var_1728_split_sizes_0, x = normed_53_cast_fp16)[name = string("op_1728_cast_fp16")];
	tensor<fp16, [1536]> const_35_to_fp16 = const()[name = string("const_35_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327122176)))];
	tensor<fp16, [1, 1, 1536]> var_1731_cast_fp16 = mul(x = var_1728_cast_fp16_0, y = const_35_to_fp16)[name = string("op_1731_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_31_cast_fp16 = add(x = hidden_states_27_cast_fp16, y = var_1731_cast_fp16)[name = string("hidden_states_31_cast_fp16")];
	tensor<fp16, [1]> layers_1_layer_scalar_to_fp16 = const()[name = string("layers_1_layer_scalar_to_fp16"), val = tensor<fp16, [1]>([0x1.6ap-4])];
	tensor<fp16, [1, 1, 1536]> x_59_cast_fp16 = mul(x = hidden_states_31_cast_fp16, y = layers_1_layer_scalar_to_fp16)[name = string("x_59_cast_fp16")];
	int32 var_1739 = const()[name = string("op_1739"), val = int32(-1)];
	fp16 const_36_promoted_to_fp16 = const()[name = string("const_36_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_1745_cast_fp16 = mul(x = x_59_cast_fp16, y = const_36_promoted_to_fp16)[name = string("op_1745_cast_fp16")];
	bool input_61_interleave_0 = const()[name = string("input_61_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_61_cast_fp16 = concat(axis = var_1739, interleave = input_61_interleave_0, values = (x_59_cast_fp16, var_1745_cast_fp16))[name = string("input_61_cast_fp16")];
	tensor<int32, [1]> normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1737_to_fp16 = const()[name = string("op_1737_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_1737_to_fp16, x = input_61_cast_fp16)[name = string("normed_57_cast_fp16")];
	tensor<int32, [2]> var_1750_split_sizes_0 = const()[name = string("op_1750_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_1750_axis_0 = const()[name = string("op_1750_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_1750_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1750_cast_fp16_1 = split(axis = var_1750_axis_0, split_sizes = var_1750_split_sizes_0, x = normed_57_cast_fp16)[name = string("op_1750_cast_fp16")];
	tensor<fp16, [1536]> const_37_to_fp16 = const()[name = string("const_37_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327125312)))];
	tensor<fp16, [1, 1, 1536]> var_1753_cast_fp16 = mul(x = var_1750_cast_fp16_0, y = const_37_to_fp16)[name = string("op_1753_cast_fp16")];
	tensor<int32, [3]> var_1761 = const()[name = string("op_1761"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_1764_axes_0 = const()[name = string("op_1764_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_1762_cast_fp16 = transpose(perm = var_1761, x = var_1753_cast_fp16)[name = string("transpose_77")];
	tensor<fp16, [1, 1536, 1, 1]> var_1764_cast_fp16 = expand_dims(axes = var_1764_axes_0, x = var_1762_cast_fp16)[name = string("op_1764_cast_fp16")];
	string var_1780_pad_type_0 = const()[name = string("op_1780_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_1780_strides_0 = const()[name = string("op_1780_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_1780_pad_0 = const()[name = string("op_1780_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_1780_dilations_0 = const()[name = string("op_1780_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_1780_groups_0 = const()[name = string("op_1780_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4096, 1, 1]> var_1780 = conv(dilations = var_1780_dilations_0, groups = var_1780_groups_0, pad = var_1780_pad_0, pad_type = var_1780_pad_type_0, strides = var_1780_strides_0, weight = layers_2_self_attn_q_proj_weight_palettized, x = var_1764_cast_fp16)[name = string("op_1780")];
	tensor<int32, [4]> var_1785 = const()[name = string("op_1785"), val = tensor<int32, [4]>([1, 8, 512, 1])];
	tensor<fp16, [1, 8, 512, 1]> var_1786 = reshape(shape = var_1785, x = var_1780)[name = string("op_1786")];
	tensor<int32, [4]> var_1791 = const()[name = string("op_1791"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	tensor<int32, [3]> var_1801 = const()[name = string("op_1801"), val = tensor<int32, [3]>([1, 8, 512])];
	tensor<fp16, [1, 8, 1, 512]> var_1792 = transpose(perm = var_1791, x = var_1786)[name = string("transpose_76")];
	tensor<fp16, [1, 8, 512]> x_63 = reshape(shape = var_1801, x = var_1792)[name = string("x_63")];
	int32 var_1807 = const()[name = string("op_1807"), val = int32(-1)];
	fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 512]> var_1813_cast_fp16 = mul(x = x_63, y = const_38_promoted_to_fp16)[name = string("op_1813_cast_fp16")];
	bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1024]> input_65_cast_fp16 = concat(axis = var_1807, interleave = input_65_interleave_0, values = (x_63, var_1813_cast_fp16))[name = string("input_65_cast_fp16")];
	tensor<int32, [1]> normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1805_to_fp16 = const()[name = string("op_1805_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 8, 1024]> normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_1805_to_fp16, x = input_65_cast_fp16)[name = string("normed_61_cast_fp16")];
	tensor<int32, [2]> var_1818_split_sizes_0 = const()[name = string("op_1818_split_sizes_0"), val = tensor<int32, [2]>([512, 512])];
	int32 var_1818_axis_0 = const()[name = string("op_1818_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 512]> var_1818_cast_fp16_0, tensor<fp16, [1, 8, 512]> var_1818_cast_fp16_1 = split(axis = var_1818_axis_0, split_sizes = var_1818_split_sizes_0, x = normed_61_cast_fp16)[name = string("op_1818_cast_fp16")];
	tensor<fp16, [512]> const_39_to_fp16 = const()[name = string("const_39_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327128448)))];
	tensor<fp16, [1, 8, 512]> var_1821_cast_fp16 = mul(x = var_1818_cast_fp16_0, y = const_39_to_fp16)[name = string("op_1821_cast_fp16")];
	tensor<int32, [4]> var_1827 = const()[name = string("op_1827"), val = tensor<int32, [4]>([1, 8, 1, 512])];
	tensor<fp16, [1, 8, 1, 512]> q_19 = reshape(shape = var_1827, x = var_1821_cast_fp16)[name = string("q_19")];
	tensor<fp16, [1, 8, 1, 512]> var_1829_cast_fp16 = mul(x = q_19, y = cos_f)[name = string("op_1829_cast_fp16")];
	tensor<int32, [2]> var_1830_split_sizes_0 = const()[name = string("op_1830_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_1830_axis_0 = const()[name = string("op_1830_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 256]> var_1830_0, tensor<fp16, [1, 8, 1, 256]> var_1830_1 = split(axis = var_1830_axis_0, split_sizes = var_1830_split_sizes_0, x = q_19)[name = string("op_1830")];
	fp16 const_40_promoted = const()[name = string("const_40_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 256]> var_1832 = mul(x = var_1830_1, y = const_40_promoted)[name = string("op_1832")];
	int32 var_1834 = const()[name = string("op_1834"), val = int32(-1)];
	bool var_1835_interleave_0 = const()[name = string("op_1835_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 512]> var_1835 = concat(axis = var_1834, interleave = var_1835_interleave_0, values = (var_1832, var_1830_0))[name = string("op_1835")];
	tensor<fp16, [1, 8, 1, 512]> var_1836_cast_fp16 = mul(x = var_1835, y = sin_f)[name = string("op_1836_cast_fp16")];
	tensor<fp16, [1, 8, 1, 512]> q_23_cast_fp16 = add(x = var_1829_cast_fp16, y = var_1836_cast_fp16)[name = string("q_23_cast_fp16")];
	string var_1849_pad_type_0 = const()[name = string("op_1849_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_1849_strides_0 = const()[name = string("op_1849_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_1849_pad_0 = const()[name = string("op_1849_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_1849_dilations_0 = const()[name = string("op_1849_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_1849_groups_0 = const()[name = string("op_1849_groups_0"), val = int32(1)];
	tensor<fp16, [1, 512, 1, 1]> var_1849 = conv(dilations = var_1849_dilations_0, groups = var_1849_groups_0, pad = var_1849_pad_0, pad_type = var_1849_pad_type_0, strides = var_1849_strides_0, weight = layers_2_self_attn_k_proj_weight_palettized, x = var_1764_cast_fp16)[name = string("op_1849")];
	tensor<int32, [4]> var_1854 = const()[name = string("op_1854"), val = tensor<int32, [4]>([1, 1, 512, 1])];
	tensor<fp16, [1, 1, 512, 1]> var_1855 = reshape(shape = var_1854, x = var_1849)[name = string("op_1855")];
	tensor<int32, [4]> var_1860 = const()[name = string("op_1860"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	string var_1877_pad_type_0 = const()[name = string("op_1877_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_1877_strides_0 = const()[name = string("op_1877_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_1877_pad_0 = const()[name = string("op_1877_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_1877_dilations_0 = const()[name = string("op_1877_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_1877_groups_0 = const()[name = string("op_1877_groups_0"), val = int32(1)];
	tensor<fp16, [1, 512, 1, 1]> var_1877 = conv(dilations = var_1877_dilations_0, groups = var_1877_groups_0, pad = var_1877_pad_0, pad_type = var_1877_pad_type_0, strides = var_1877_strides_0, weight = layers_2_self_attn_v_proj_weight_palettized, x = var_1764_cast_fp16)[name = string("op_1877")];
	tensor<int32, [4]> var_1882 = const()[name = string("op_1882"), val = tensor<int32, [4]>([1, 1, 512, 1])];
	tensor<fp16, [1, 1, 512, 1]> var_1883 = reshape(shape = var_1882, x = var_1877)[name = string("op_1883")];
	tensor<int32, [4]> var_1888 = const()[name = string("op_1888"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	tensor<int32, [3]> var_1898 = const()[name = string("op_1898"), val = tensor<int32, [3]>([1, 1, 512])];
	tensor<fp16, [1, 1, 1, 512]> var_1861 = transpose(perm = var_1860, x = var_1855)[name = string("transpose_75")];
	tensor<fp16, [1, 1, 512]> x_67 = reshape(shape = var_1898, x = var_1861)[name = string("x_67")];
	int32 var_1904 = const()[name = string("op_1904"), val = int32(-1)];
	fp16 const_41_promoted_to_fp16 = const()[name = string("const_41_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 512]> var_1910_cast_fp16 = mul(x = x_67, y = const_41_promoted_to_fp16)[name = string("op_1910_cast_fp16")];
	bool input_67_interleave_0 = const()[name = string("input_67_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 1024]> input_67_cast_fp16 = concat(axis = var_1904, interleave = input_67_interleave_0, values = (x_67, var_1910_cast_fp16))[name = string("input_67_cast_fp16")];
	tensor<int32, [1]> normed_65_axes_0 = const()[name = string("normed_65_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_1902_to_fp16 = const()[name = string("op_1902_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 1024]> normed_65_cast_fp16 = layer_norm(axes = normed_65_axes_0, epsilon = var_1902_to_fp16, x = input_67_cast_fp16)[name = string("normed_65_cast_fp16")];
	tensor<int32, [2]> var_1915_split_sizes_0 = const()[name = string("op_1915_split_sizes_0"), val = tensor<int32, [2]>([512, 512])];
	int32 var_1915_axis_0 = const()[name = string("op_1915_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 512]> var_1915_cast_fp16_0, tensor<fp16, [1, 1, 512]> var_1915_cast_fp16_1 = split(axis = var_1915_axis_0, split_sizes = var_1915_split_sizes_0, x = normed_65_cast_fp16)[name = string("op_1915_cast_fp16")];
	tensor<fp16, [512]> const_42_to_fp16 = const()[name = string("const_42_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327129536)))];
	tensor<fp16, [1, 1, 512]> var_1918_cast_fp16 = mul(x = var_1915_cast_fp16_0, y = const_42_to_fp16)[name = string("op_1918_cast_fp16")];
	tensor<int32, [4]> var_1924 = const()[name = string("op_1924"), val = tensor<int32, [4]>([1, 1, 1, 512])];
	tensor<fp16, [1, 1, 1, 512]> q_21 = reshape(shape = var_1924, x = var_1918_cast_fp16)[name = string("q_21")];
	fp16 var_1931_promoted_to_fp16 = const()[name = string("op_1931_promoted_to_fp16"), val = fp16(0x1p+1)];
	tensor<fp16, [1, 1, 1, 512]> var_1889 = transpose(perm = var_1888, x = var_1883)[name = string("transpose_74")];
	tensor<fp16, [1, 1, 1, 512]> var_1932_cast_fp16 = pow(x = var_1889, y = var_1931_promoted_to_fp16)[name = string("op_1932_cast_fp16")];
	tensor<int32, [1]> var_1937_axes_0 = const()[name = string("op_1937_axes_0"), val = tensor<int32, [1]>([-1])];
	bool var_1937_keep_dims_0 = const()[name = string("op_1937_keep_dims_0"), val = bool(true)];
	tensor<fp16, [1, 1, 1, 1]> var_1937_cast_fp16 = reduce_mean(axes = var_1937_axes_0, keep_dims = var_1937_keep_dims_0, x = var_1932_cast_fp16)[name = string("op_1937_cast_fp16")];
	fp16 var_1939_to_fp16 = const()[name = string("op_1939_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 1, 1]> mean_sq_cast_fp16 = add(x = var_1937_cast_fp16, y = var_1939_to_fp16)[name = string("mean_sq_cast_fp16")];
	fp16 var_1946_to_fp16 = const()[name = string("op_1946_to_fp16"), val = fp16(-0x1p-1)];
	tensor<fp16, [1, 1, 1, 1]> var_1947_cast_fp16 = pow(x = mean_sq_cast_fp16, y = var_1946_to_fp16)[name = string("op_1947_cast_fp16")];
	tensor<fp16, [1, 1, 1, 512]> var_1948_cast_fp16 = mul(x = var_1889, y = var_1947_cast_fp16)[name = string("op_1948_cast_fp16")];
	tensor<fp16, [1, 1, 1, 512]> var_1954_cast_fp16 = mul(x = q_21, y = cos_f)[name = string("op_1954_cast_fp16")];
	tensor<int32, [2]> var_1955_split_sizes_0 = const()[name = string("op_1955_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_1955_axis_0 = const()[name = string("op_1955_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1, 256]> var_1955_0, tensor<fp16, [1, 1, 1, 256]> var_1955_1 = split(axis = var_1955_axis_0, split_sizes = var_1955_split_sizes_0, x = q_21)[name = string("op_1955")];
	fp16 const_43_promoted = const()[name = string("const_43_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1, 256]> var_1957 = mul(x = var_1955_1, y = const_43_promoted)[name = string("op_1957")];
	int32 var_1959 = const()[name = string("op_1959"), val = int32(-1)];
	bool var_1960_interleave_0 = const()[name = string("op_1960_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 1, 512]> var_1960 = concat(axis = var_1959, interleave = var_1960_interleave_0, values = (var_1957, var_1955_0))[name = string("op_1960")];
	tensor<fp16, [1, 1, 1, 512]> var_1961_cast_fp16 = mul(x = var_1960, y = sin_f)[name = string("op_1961_cast_fp16")];
	tensor<fp16, [1, 1, 1, 512]> k_cast_fp16 = add(x = var_1954_cast_fp16, y = var_1961_cast_fp16)[name = string("k_cast_fp16")];
	tensor<int32, [4]> var_1966_begin_0 = const()[name = string("op_1966_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
	tensor<int32, [4]> var_1966_end_0 = const()[name = string("op_1966_end_0"), val = tensor<int32, [4]>([3, 1, 512, 512])];
	tensor<bool, [4]> var_1966_end_mask_0 = const()[name = string("op_1966_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> var_1966_squeeze_mask_0 = const()[name = string("op_1966_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [1, 512, 512]> var_1966_cast_fp16 = slice_by_index(begin = var_1966_begin_0, end = var_1966_end_0, end_mask = var_1966_end_mask_0, squeeze_mask = var_1966_squeeze_mask_0, x = coreml_update_state_9)[name = string("op_1966_cast_fp16")];
	tensor<int32, [1]> Kc_axes_0 = const()[name = string("Kc_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 1, 512, 512]> Kc_cast_fp16 = expand_dims(axes = Kc_axes_0, x = var_1966_cast_fp16)[name = string("Kc_cast_fp16")];
	tensor<int32, [4]> var_1971_begin_0 = const()[name = string("op_1971_begin_0"), val = tensor<int32, [4]>([14, 0, 0, 0])];
	tensor<int32, [4]> var_1971_end_0 = const()[name = string("op_1971_end_0"), val = tensor<int32, [4]>([15, 1, 512, 512])];
	tensor<bool, [4]> var_1971_end_mask_0 = const()[name = string("op_1971_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> var_1971_squeeze_mask_0 = const()[name = string("op_1971_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [1, 512, 512]> var_1971_cast_fp16 = slice_by_index(begin = var_1971_begin_0, end = var_1971_end_0, end_mask = var_1971_end_mask_0, squeeze_mask = var_1971_squeeze_mask_0, x = coreml_update_state_9)[name = string("op_1971_cast_fp16")];
	tensor<int32, [1]> Vc_axes_0 = const()[name = string("Vc_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 1, 512, 512]> Vc_cast_fp16 = expand_dims(axes = Vc_axes_0, x = var_1971_cast_fp16)[name = string("Vc_cast_fp16")];
	tensor<fp16, [1, 1, 512, 512]> var_1977_cast_fp16 = mul(x = Kc_cast_fp16, y = var_872_cast_fp16)[name = string("op_1977_cast_fp16")];
	tensor<int32, [4]> var_1978_reps_0 = const()[name = string("op_1978_reps_0"), val = tensor<int32, [4]>([1, 1, 512, 1])];
	tensor<fp16, [1, 1, 512, 512]> var_1978_cast_fp16 = tile(reps = var_1978_reps_0, x = k_cast_fp16)[name = string("op_1978_cast_fp16")];
	tensor<fp16, [1, 1, 512, 512]> var_1979_cast_fp16 = mul(x = var_1978_cast_fp16, y = update_mask)[name = string("op_1979_cast_fp16")];
	tensor<fp16, [1, 1, 512, 512]> kv14_k = add(x = var_1977_cast_fp16, y = var_1979_cast_fp16)[name = string("Kn_cast_fp16")];
	tensor<fp16, [1, 1, 512, 512]> var_1985_cast_fp16 = mul(x = Vc_cast_fp16, y = var_872_cast_fp16)[name = string("op_1985_cast_fp16")];
	tensor<int32, [4]> var_1986_reps_0 = const()[name = string("op_1986_reps_0"), val = tensor<int32, [4]>([1, 1, 512, 1])];
	tensor<fp16, [1, 1, 512, 512]> var_1986 = tile(reps = var_1986_reps_0, x = var_1948_cast_fp16)[name = string("op_1986")];
	tensor<fp16, [1, 1, 512, 512]> var_1987_cast_fp16 = mul(x = var_1986, y = update_mask)[name = string("op_1987_cast_fp16")];
	tensor<fp16, [1, 1, 512, 512]> kv14_v = add(x = var_1985_cast_fp16, y = var_1987_cast_fp16)[name = string("Vn_cast_fp16")];
	tensor<int32, [1]> var_1991_axes_0 = const()[name = string("op_1991_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 512, 512]> var_1991_cast_fp16 = squeeze(axes = var_1991_axes_0, x = kv14_k)[name = string("op_1991_cast_fp16")];
	tensor<int32, [4]> concat_16 = const()[name = string("concat_16"), val = tensor<int32, [4]>([2, 0, 0, 0])];
	tensor<int32, [4]> concat_17 = const()[name = string("concat_17"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [4]> kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [24, 1, 512, 512]> kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_16, begin_mask = kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_17, end_mask = kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_5_stride_0, update = var_1991_cast_fp16, x = coreml_update_state_9)[name = string("kv_cache_0_internal_tensor_assign_5_cast_fp16")];
	write_state(data = kv_cache_0_internal_tensor_assign_5_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_10_write_state")];
	tensor<fp16, [24, 1, 512, 512]> coreml_update_state_10 = read_state(input = kv_cache_0)[name = string("coreml_update_state_10")];
	tensor<int32, [1]> var_1998_axes_0 = const()[name = string("op_1998_axes_0"), val = tensor<int32, [1]>([0])];
	tensor<fp16, [1, 512, 512]> var_1998_cast_fp16 = squeeze(axes = var_1998_axes_0, x = kv14_v)[name = string("op_1998_cast_fp16")];
	tensor<int32, [4]> concat_18 = const()[name = string("concat_18"), val = tensor<int32, [4]>([14, 0, 0, 0])];
	tensor<int32, [4]> concat_19 = const()[name = string("concat_19"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [4]> kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
	tensor<bool, [4]> kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
	tensor<fp16, [24, 1, 512, 512]> kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_18, begin_mask = kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_19, end_mask = kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = kv_cache_0_internal_tensor_assign_6_stride_0, update = var_1998_cast_fp16, x = coreml_update_state_10)[name = string("kv_cache_0_internal_tensor_assign_6_cast_fp16")];
	write_state(data = kv_cache_0_internal_tensor_assign_6_cast_fp16, input = kv_cache_0)[name = string("coreml_update_state_11_write_state")];
	tensor<int32, [4]> transpose_8_perm_0 = const()[name = string("transpose_8_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	tensor<int32, [4]> tile_4_reps_0 = const()[name = string("tile_4_reps_0"), val = tensor<int32, [4]>([8, 1, 1, 1])];
	tensor<fp16, [1, 1, 512, 512]> transpose_8_cast_fp16 = transpose(perm = transpose_8_perm_0, x = kv14_k)[name = string("transpose_73")];
	tensor<fp16, [8, 1, 512, 512]> tile_4_cast_fp16 = tile(reps = tile_4_reps_0, x = transpose_8_cast_fp16)[name = string("tile_4_cast_fp16")];
	tensor<int32, [5]> concat_20 = const()[name = string("concat_20"), val = tensor<int32, [5]>([8, 1, 1, 512, 512])];
	tensor<fp16, [8, 1, 1, 512, 512]> reshape_8_cast_fp16 = reshape(shape = concat_20, x = tile_4_cast_fp16)[name = string("reshape_8_cast_fp16")];
	tensor<int32, [5]> transpose_9_perm_0 = const()[name = string("transpose_9_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])];
	tensor<int32, [4]> concat_21 = const()[name = string("concat_21"), val = tensor<int32, [4]>([-1, 1, 512, 512])];
	tensor<fp16, [1, 8, 1, 512, 512]> transpose_9_cast_fp16 = transpose(perm = transpose_9_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_72")];
	tensor<fp16, [8, 1, 512, 512]> reshape_9_cast_fp16 = reshape(shape = concat_21, x = transpose_9_cast_fp16)[name = string("reshape_9_cast_fp16")];
	tensor<int32, [4]> transpose_50_perm_0 = const()[name = string("transpose_50_perm_0"), val = tensor<int32, [4]>([1, 0, -1, -2])];
	tensor<int32, [4]> transpose_10_perm_0 = const()[name = string("transpose_10_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
	tensor<int32, [4]> tile_5_reps_0 = const()[name = string("tile_5_reps_0"), val = tensor<int32, [4]>([8, 1, 1, 1])];
	tensor<fp16, [1, 1, 512, 512]> transpose_10_cast_fp16 = transpose(perm = transpose_10_perm_0, x = kv14_v)[name = string("transpose_71")];
	tensor<fp16, [8, 1, 512, 512]> tile_5_cast_fp16 = tile(reps = tile_5_reps_0, x = transpose_10_cast_fp16)[name = string("tile_5_cast_fp16")];
	tensor<int32, [5]> concat_22 = const()[name = string("concat_22"), val = tensor<int32, [5]>([8, 1, 1, 512, 512])];
	tensor<fp16, [8, 1, 1, 512, 512]> reshape_10_cast_fp16 = reshape(shape = concat_22, x = tile_5_cast_fp16)[name = string("reshape_10_cast_fp16")];
	tensor<int32, [5]> transpose_11_perm_0 = const()[name = string("transpose_11_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])];
	tensor<int32, [4]> concat_23 = const()[name = string("concat_23"), val = tensor<int32, [4]>([-1, 1, 512, 512])];
	tensor<fp16, [1, 8, 1, 512, 512]> transpose_11_cast_fp16 = transpose(perm = transpose_11_perm_0, x = reshape_10_cast_fp16)[name = string("transpose_70")];
	tensor<fp16, [8, 1, 512, 512]> reshape_11_cast_fp16 = reshape(shape = concat_23, x = transpose_11_cast_fp16)[name = string("reshape_11_cast_fp16")];
	tensor<int32, [4]> Ve_5_perm_0 = const()[name = string("Ve_5_perm_0"), val = tensor<int32, [4]>([1, 0, -2, -1])];
	bool var_2045_transpose_x_0 = const()[name = string("op_2045_transpose_x_0"), val = bool(false)];
	bool var_2045_transpose_y_0 = const()[name = string("op_2045_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 512]> transpose_50_cast_fp16 = transpose(perm = transpose_50_perm_0, x = reshape_9_cast_fp16)[name = string("transpose_69")];
	tensor<fp16, [1, 8, 1, 512]> var_2045_cast_fp16 = matmul(transpose_x = var_2045_transpose_x_0, transpose_y = var_2045_transpose_y_0, x = q_23_cast_fp16, y = transpose_50_cast_fp16)[name = string("op_2045_cast_fp16")];
	tensor<fp16, [1, 8, 1, 512]> var_2052_cast_fp16 = add(x = var_2045_cast_fp16, y = causal_mask)[name = string("op_2052_cast_fp16")];
	int32 var_2053 = const()[name = string("op_2053"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 512]> var_2055_cast_fp16 = softmax(axis = var_2053, x = var_2052_cast_fp16)[name = string("op_2055_cast_fp16")];
	bool var_2071_transpose_x_0 = const()[name = string("op_2071_transpose_x_0"), val = bool(false)];
	bool var_2071_transpose_y_0 = const()[name = string("op_2071_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512, 512]> Ve_5_cast_fp16 = transpose(perm = Ve_5_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_68")];
	tensor<fp16, [1, 8, 1, 512]> var_2071_cast_fp16 = matmul(transpose_x = var_2071_transpose_x_0, transpose_y = var_2071_transpose_y_0, x = var_2055_cast_fp16, y = Ve_5_cast_fp16)[name = string("op_2071_cast_fp16")];
	tensor<int32, [4]> var_2081 = const()[name = string("op_2081"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_2088 = const()[name = string("op_2088"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 8, 512]> var_2082 = transpose(perm = var_2081, x = var_2071_cast_fp16)[name = string("transpose_67")];
	tensor<fp16, [1, 1, 4096]> var_2089 = reshape(shape = var_2088, x = var_2082)[name = string("op_2089")];
	tensor<int32, [3]> var_2093 = const()[name = string("op_2093"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1536, 4096, 1]> squeeze_2_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 4096, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327130624))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330276416))))[name = string("squeeze_2_palettized")];
	string var_2109_pad_type_0 = const()[name = string("op_2109_pad_type_0"), val = string("valid")];
	int32 var_2109_groups_0 = const()[name = string("op_2109_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_2109_strides_0 = const()[name = string("op_2109_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_2109_pad_0 = const()[name = string("op_2109_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_2109_dilations_0 = const()[name = string("op_2109_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1, 4096, 1]> var_2094 = transpose(perm = var_2093, x = var_2089)[name = string("transpose_66")];
	tensor<fp16, [1, 1536, 1]> var_2109 = conv(dilations = var_2109_dilations_0, groups = var_2109_groups_0, pad = var_2109_pad_0, pad_type = var_2109_pad_type_0, strides = var_2109_strides_0, weight = squeeze_2_palettized, x = var_2094)[name = string("op_2109")];
	tensor<int32, [3]> var_2113 = const()[name = string("op_2113"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_2119 = const()[name = string("op_2119"), val = int32(-1)];
	fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_73 = transpose(perm = var_2113, x = var_2109)[name = string("transpose_65")];
	tensor<fp16, [1, 1, 1536]> var_2125_cast_fp16 = mul(x = x_73, y = const_44_promoted_to_fp16)[name = string("op_2125_cast_fp16")];
	bool input_71_interleave_0 = const()[name = string("input_71_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_71_cast_fp16 = concat(axis = var_2119, interleave = input_71_interleave_0, values = (x_73, var_2125_cast_fp16))[name = string("input_71_cast_fp16")];
	tensor<int32, [1]> normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2117_to_fp16 = const()[name = string("op_2117_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_2117_to_fp16, x = input_71_cast_fp16)[name = string("normed_69_cast_fp16")];
	tensor<int32, [2]> var_2130_split_sizes_0 = const()[name = string("op_2130_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2130_axis_0 = const()[name = string("op_2130_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_2130_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2130_cast_fp16_1 = split(axis = var_2130_axis_0, split_sizes = var_2130_split_sizes_0, x = normed_69_cast_fp16)[name = string("op_2130_cast_fp16")];
	tensor<fp16, [1536]> const_45_to_fp16 = const()[name = string("const_45_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330278016)))];
	tensor<fp16, [1, 1, 1536]> var_2133_cast_fp16 = mul(x = var_2130_cast_fp16_0, y = const_45_to_fp16)[name = string("op_2133_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> x_77_cast_fp16 = add(x = x_59_cast_fp16, y = var_2133_cast_fp16)[name = string("x_77_cast_fp16")];
	int32 var_2140 = const()[name = string("op_2140"), val = int32(-1)];
	fp16 const_46_promoted_to_fp16 = const()[name = string("const_46_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_2146_cast_fp16 = mul(x = x_77_cast_fp16, y = const_46_promoted_to_fp16)[name = string("op_2146_cast_fp16")];
	bool input_73_interleave_0 = const()[name = string("input_73_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_73_cast_fp16 = concat(axis = var_2140, interleave = input_73_interleave_0, values = (x_77_cast_fp16, var_2146_cast_fp16))[name = string("input_73_cast_fp16")];
	tensor<int32, [1]> normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2138_to_fp16 = const()[name = string("op_2138_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_2138_to_fp16, x = input_73_cast_fp16)[name = string("normed_73_cast_fp16")];
	tensor<int32, [2]> var_2151_split_sizes_0 = const()[name = string("op_2151_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2151_axis_0 = const()[name = string("op_2151_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_2151_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2151_cast_fp16_1 = split(axis = var_2151_axis_0, split_sizes = var_2151_split_sizes_0, x = normed_73_cast_fp16)[name = string("op_2151_cast_fp16")];
	tensor<fp16, [1536]> const_47_to_fp16 = const()[name = string("const_47_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330281152)))];
	tensor<fp16, [1, 1, 1536]> var_2154_cast_fp16 = mul(x = var_2151_cast_fp16_0, y = const_47_to_fp16)[name = string("op_2154_cast_fp16")];
	tensor<int32, [3]> var_2167 = const()[name = string("op_2167"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_75_axes_0 = const()[name = string("input_75_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_2168 = transpose(perm = var_2167, x = var_2154_cast_fp16)[name = string("transpose_64")];
	tensor<fp16, [1, 1536, 1, 1]> input_75 = expand_dims(axes = input_75_axes_0, x = var_2168)[name = string("input_75")];
	string var_2181_pad_type_0 = const()[name = string("op_2181_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_2181_strides_0 = const()[name = string("op_2181_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_2181_pad_0 = const()[name = string("op_2181_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_2181_dilations_0 = const()[name = string("op_2181_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_2181_groups_0 = const()[name = string("op_2181_groups_0"), val = int32(1)];
	tensor<fp16, [1, 6144, 1, 1]> var_2181 = conv(dilations = var_2181_dilations_0, groups = var_2181_groups_0, pad = var_2181_pad_0, pad_type = var_2181_pad_type_0, strides = var_2181_strides_0, weight = layers_2_mlp_gate_proj_weight_palettized, x = input_75)[name = string("op_2181")];
	string var_2183_mode_0 = const()[name = string("op_2183_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 6144, 1, 1]> var_2183 = gelu(mode = var_2183_mode_0, x = var_2181)[name = string("op_2183")];
	string var_2194_pad_type_0 = const()[name = string("op_2194_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_2194_strides_0 = const()[name = string("op_2194_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_2194_pad_0 = const()[name = string("op_2194_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_2194_dilations_0 = const()[name = string("op_2194_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_2194_groups_0 = const()[name = string("op_2194_groups_0"), val = int32(1)];
	tensor<fp16, [1, 6144, 1, 1]> var_2194 = conv(dilations = var_2194_dilations_0, groups = var_2194_groups_0, pad = var_2194_pad_0, pad_type = var_2194_pad_type_0, strides = var_2194_strides_0, weight = layers_2_mlp_up_proj_weight_palettized, x = input_75)[name = string("op_2194")];
	tensor<fp16, [1, 6144, 1, 1]> input_77 = mul(x = var_2183, y = var_2194)[name = string("input_77")];
	string var_2206_pad_type_0 = const()[name = string("op_2206_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_2206_strides_0 = const()[name = string("op_2206_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_2206_pad_0 = const()[name = string("op_2206_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_2206_dilations_0 = const()[name = string("op_2206_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_2206_groups_0 = const()[name = string("op_2206_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 1]> var_2206 = conv(dilations = var_2206_dilations_0, groups = var_2206_groups_0, pad = var_2206_pad_0, pad_type = var_2206_pad_type_0, strides = var_2206_strides_0, weight = layers_2_mlp_down_proj_weight_palettized, x = input_77)[name = string("op_2206")];
	tensor<int32, [1]> var_2208_axes_0 = const()[name = string("op_2208_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_2208 = squeeze(axes = var_2208_axes_0, x = var_2206)[name = string("op_2208")];
	tensor<int32, [3]> var_2212 = const()[name = string("op_2212"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_2218 = const()[name = string("op_2218"), val = int32(-1)];
	fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_81 = transpose(perm = var_2212, x = var_2208)[name = string("transpose_63")];
	tensor<fp16, [1, 1, 1536]> var_2224_cast_fp16 = mul(x = x_81, y = const_48_promoted_to_fp16)[name = string("op_2224_cast_fp16")];
	bool input_79_interleave_0 = const()[name = string("input_79_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_79_cast_fp16 = concat(axis = var_2218, interleave = input_79_interleave_0, values = (x_81, var_2224_cast_fp16))[name = string("input_79_cast_fp16")];
	tensor<int32, [1]> normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2216_to_fp16 = const()[name = string("op_2216_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_2216_to_fp16, x = input_79_cast_fp16)[name = string("normed_77_cast_fp16")];
	tensor<int32, [2]> var_2229_split_sizes_0 = const()[name = string("op_2229_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2229_axis_0 = const()[name = string("op_2229_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_2229_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2229_cast_fp16_1 = split(axis = var_2229_axis_0, split_sizes = var_2229_split_sizes_0, x = normed_77_cast_fp16)[name = string("op_2229_cast_fp16")];
	tensor<fp16, [1536]> const_49_to_fp16 = const()[name = string("const_49_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330284288)))];
	tensor<fp16, [1, 1, 1536]> var_2232_cast_fp16 = mul(x = var_2229_cast_fp16_0, y = const_49_to_fp16)[name = string("op_2232_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_41_cast_fp16 = add(x = x_77_cast_fp16, y = var_2232_cast_fp16)[name = string("hidden_states_41_cast_fp16")];
	tensor<fp16, [1, 1, 256]> var_2243 = linear(bias = linear_0_bias_0, weight = layers_2_per_layer_input_gate_weight_palettized, x = hidden_states_41_cast_fp16)[name = string("linear_4")];
	string gated_5_mode_0 = const()[name = string("gated_5_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 1, 256]> gated_5 = gelu(mode = gated_5_mode_0, x = var_2243)[name = string("gated_5")];
	tensor<int32, [3]> var_2260_begin_0 = const()[name = string("op_2260_begin_0"), val = tensor<int32, [3]>([0, 0, 3584])];
	tensor<int32, [3]> var_2260_end_0 = const()[name = string("op_2260_end_0"), val = tensor<int32, [3]>([1, 1, 3840])];
	tensor<bool, [3]> var_2260_end_mask_0 = const()[name = string("op_2260_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 1, 256]> var_2260_cast_fp16 = slice_by_index(begin = var_2260_begin_0, end = var_2260_end_0, end_mask = var_2260_end_mask_0, x = per_layer_combined)[name = string("op_2260_cast_fp16")];
	tensor<fp16, [1, 1, 256]> input_83_cast_fp16 = mul(x = gated_5, y = var_2260_cast_fp16)[name = string("input_83_cast_fp16")];
	tensor<fp16, [1536, 256]> layers_2_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330287424))), lut = tensor<fp16, [48, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330484096))))[name = string("layers_2_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1, 1536]> linear_5_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_83_cast_fp16)[name = string("linear_5_cast_fp16")];
	int32 var_2269 = const()[name = string("op_2269"), val = int32(-1)];
	fp16 const_50_promoted_to_fp16 = const()[name = string("const_50_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_2275_cast_fp16 = mul(x = linear_5_cast_fp16, y = const_50_promoted_to_fp16)[name = string("op_2275_cast_fp16")];
	bool input_85_interleave_0 = const()[name = string("input_85_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_85_cast_fp16 = concat(axis = var_2269, interleave = input_85_interleave_0, values = (linear_5_cast_fp16, var_2275_cast_fp16))[name = string("input_85_cast_fp16")];
	tensor<int32, [1]> normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2267_to_fp16 = const()[name = string("op_2267_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_2267_to_fp16, x = input_85_cast_fp16)[name = string("normed_81_cast_fp16")];
	tensor<int32, [2]> var_2280_split_sizes_0 = const()[name = string("op_2280_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2280_axis_0 = const()[name = string("op_2280_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_2280_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2280_cast_fp16_1 = split(axis = var_2280_axis_0, split_sizes = var_2280_split_sizes_0, x = normed_81_cast_fp16)[name = string("op_2280_cast_fp16")];
	tensor<fp16, [1536]> const_51_to_fp16 = const()[name = string("const_51_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330485696)))];
	tensor<fp16, [1, 1, 1536]> var_2283_cast_fp16 = mul(x = var_2280_cast_fp16_0, y = const_51_to_fp16)[name = string("op_2283_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_45_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = var_2283_cast_fp16)[name = string("hidden_states_45_cast_fp16")];
	tensor<fp16, [1]> layers_2_layer_scalar_to_fp16 = const()[name = string("layers_2_layer_scalar_to_fp16"), val = tensor<fp16, [1]>([0x1.d4p-6])];
	tensor<fp16, [1, 1, 1536]> x_89_cast_fp16 = mul(x = hidden_states_45_cast_fp16, y = layers_2_layer_scalar_to_fp16)[name = string("x_89_cast_fp16")];
	int32 var_2291 = const()[name = string("op_2291"), val = int32(-1)];
	fp16 const_52_promoted_to_fp16 = const()[name = string("const_52_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_2297_cast_fp16 = mul(x = x_89_cast_fp16, y = const_52_promoted_to_fp16)[name = string("op_2297_cast_fp16")];
	bool input_87_interleave_0 = const()[name = string("input_87_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_87_cast_fp16 = concat(axis = var_2291, interleave = input_87_interleave_0, values = (x_89_cast_fp16, var_2297_cast_fp16))[name = string("input_87_cast_fp16")];
	tensor<int32, [1]> normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2289_to_fp16 = const()[name = string("op_2289_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_2289_to_fp16, x = input_87_cast_fp16)[name = string("normed_85_cast_fp16")];
	tensor<int32, [2]> var_2302_split_sizes_0 = const()[name = string("op_2302_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2302_axis_0 = const()[name = string("op_2302_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_2302_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2302_cast_fp16_1 = split(axis = var_2302_axis_0, split_sizes = var_2302_split_sizes_0, x = normed_85_cast_fp16)[name = string("op_2302_cast_fp16")];
	tensor<fp16, [1536]> const_53_to_fp16 = const()[name = string("const_53_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330488832)))];
	tensor<fp16, [1, 1, 1536]> var_2305_cast_fp16 = mul(x = var_2302_cast_fp16_0, y = const_53_to_fp16)[name = string("op_2305_cast_fp16")];
	tensor<int32, [3]> var_2313 = const()[name = string("op_2313"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_2316_axes_0 = const()[name = string("op_2316_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_2314_cast_fp16 = transpose(perm = var_2313, x = var_2305_cast_fp16)[name = string("transpose_62")];
	tensor<fp16, [1, 1536, 1, 1]> var_2316_cast_fp16 = expand_dims(axes = var_2316_axes_0, x = var_2314_cast_fp16)[name = string("op_2316_cast_fp16")];
	string var_2332_pad_type_0 = const()[name = string("op_2332_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_2332_strides_0 = const()[name = string("op_2332_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_2332_pad_0 = const()[name = string("op_2332_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_2332_dilations_0 = const()[name = string("op_2332_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_2332_groups_0 = const()[name = string("op_2332_groups_0"), val = int32(1)];
	tensor<fp16, [1, 2048, 1, 1]> var_2332 = conv(dilations = var_2332_dilations_0, groups = var_2332_groups_0, pad = var_2332_pad_0, pad_type = var_2332_pad_type_0, strides = var_2332_strides_0, weight = layers_3_self_attn_q_proj_weight_palettized, x = var_2316_cast_fp16)[name = string("op_2332")];
	tensor<int32, [4]> var_2337 = const()[name = string("op_2337"), val = tensor<int32, [4]>([1, 8, 256, 1])];
	tensor<fp16, [1, 8, 256, 1]> var_2338 = reshape(shape = var_2337, x = var_2332)[name = string("op_2338")];
	tensor<int32, [4]> var_2343 = const()[name = string("op_2343"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	tensor<int32, [3]> var_2353 = const()[name = string("op_2353"), val = tensor<int32, [3]>([1, 8, 256])];
	tensor<fp16, [1, 8, 1, 256]> var_2344 = transpose(perm = var_2343, x = var_2338)[name = string("transpose_61")];
	tensor<fp16, [1, 8, 256]> x_93 = reshape(shape = var_2353, x = var_2344)[name = string("x_93")];
	int32 var_2359 = const()[name = string("op_2359"), val = int32(-1)];
	fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 256]> var_2365_cast_fp16 = mul(x = x_93, y = const_54_promoted_to_fp16)[name = string("op_2365_cast_fp16")];
	bool input_91_interleave_0 = const()[name = string("input_91_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512]> input_91_cast_fp16 = concat(axis = var_2359, interleave = input_91_interleave_0, values = (x_93, var_2365_cast_fp16))[name = string("input_91_cast_fp16")];
	tensor<int32, [1]> normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2357_to_fp16 = const()[name = string("op_2357_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 8, 512]> normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_2357_to_fp16, x = input_91_cast_fp16)[name = string("normed_89_cast_fp16")];
	tensor<int32, [2]> var_2370_split_sizes_0 = const()[name = string("op_2370_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_2370_axis_0 = const()[name = string("op_2370_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 256]> var_2370_cast_fp16_0, tensor<fp16, [1, 8, 256]> var_2370_cast_fp16_1 = split(axis = var_2370_axis_0, split_sizes = var_2370_split_sizes_0, x = normed_89_cast_fp16)[name = string("op_2370_cast_fp16")];
	tensor<fp16, [1, 8, 256]> var_2373_cast_fp16 = mul(x = var_2370_cast_fp16_0, y = const_21_to_fp16)[name = string("op_2373_cast_fp16")];
	tensor<int32, [4]> var_2379 = const()[name = string("op_2379"), val = tensor<int32, [4]>([1, 8, 1, 256])];
	tensor<fp16, [1, 8, 1, 256]> q_27 = reshape(shape = var_2379, x = var_2373_cast_fp16)[name = string("q_27")];
	tensor<fp16, [1, 8, 1, 256]> var_2381_cast_fp16 = mul(x = q_27, y = cos_s)[name = string("op_2381_cast_fp16")];
	tensor<int32, [2]> var_2382_split_sizes_0 = const()[name = string("op_2382_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_2382_axis_0 = const()[name = string("op_2382_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 128]> var_2382_0, tensor<fp16, [1, 8, 1, 128]> var_2382_1 = split(axis = var_2382_axis_0, split_sizes = var_2382_split_sizes_0, x = q_27)[name = string("op_2382")];
	fp16 const_56_promoted = const()[name = string("const_56_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 128]> var_2384 = mul(x = var_2382_1, y = const_56_promoted)[name = string("op_2384")];
	int32 var_2386 = const()[name = string("op_2386"), val = int32(-1)];
	bool var_2387_interleave_0 = const()[name = string("op_2387_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> var_2387 = concat(axis = var_2386, interleave = var_2387_interleave_0, values = (var_2384, var_2382_0))[name = string("op_2387")];
	tensor<fp16, [1, 8, 1, 256]> var_2388_cast_fp16 = mul(x = var_2387, y = sin_s)[name = string("op_2388_cast_fp16")];
	tensor<fp16, [1, 8, 1, 256]> q_29_cast_fp16 = add(x = var_2381_cast_fp16, y = var_2388_cast_fp16)[name = string("q_29_cast_fp16")];
	bool var_2412_transpose_x_0 = const()[name = string("op_2412_transpose_x_0"), val = bool(false)];
	bool var_2412_transpose_y_0 = const()[name = string("op_2412_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 512]> var_2412_cast_fp16 = matmul(transpose_x = var_2412_transpose_x_0, transpose_y = var_2412_transpose_y_0, x = q_29_cast_fp16, y = transpose_49_cast_fp16)[name = string("op_2412_cast_fp16")];
	tensor<fp16, [1, 8, 1, 512]> var_2419_cast_fp16 = add(x = var_2412_cast_fp16, y = causal_mask)[name = string("op_2419_cast_fp16")];
	int32 var_2420 = const()[name = string("op_2420"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 512]> var_2422_cast_fp16 = softmax(axis = var_2420, x = var_2419_cast_fp16)[name = string("op_2422_cast_fp16")];
	bool var_2438_transpose_x_0 = const()[name = string("op_2438_transpose_x_0"), val = bool(false)];
	bool var_2438_transpose_y_0 = const()[name = string("op_2438_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> var_2438_cast_fp16 = matmul(transpose_x = var_2438_transpose_x_0, transpose_y = var_2438_transpose_y_0, x = var_2422_cast_fp16, y = Ve_3_cast_fp16)[name = string("op_2438_cast_fp16")];
	tensor<int32, [4]> var_2448 = const()[name = string("op_2448"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_2455 = const()[name = string("op_2455"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 8, 256]> var_2449 = transpose(perm = var_2448, x = var_2438_cast_fp16)[name = string("transpose_60")];
	tensor<fp16, [1, 1, 2048]> var_2456 = reshape(shape = var_2455, x = var_2449)[name = string("op_2456")];
	tensor<int32, [3]> var_2460 = const()[name = string("op_2460"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1536, 2048, 1]> squeeze_3_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330491968))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(332064896))))[name = string("squeeze_3_palettized")];
	string var_2476_pad_type_0 = const()[name = string("op_2476_pad_type_0"), val = string("valid")];
	int32 var_2476_groups_0 = const()[name = string("op_2476_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_2476_strides_0 = const()[name = string("op_2476_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_2476_pad_0 = const()[name = string("op_2476_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_2476_dilations_0 = const()[name = string("op_2476_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1, 2048, 1]> var_2461 = transpose(perm = var_2460, x = var_2456)[name = string("transpose_59")];
	tensor<fp16, [1, 1536, 1]> var_2476 = conv(dilations = var_2476_dilations_0, groups = var_2476_groups_0, pad = var_2476_pad_0, pad_type = var_2476_pad_type_0, strides = var_2476_strides_0, weight = squeeze_3_palettized, x = var_2461)[name = string("op_2476")];
	tensor<int32, [3]> var_2480 = const()[name = string("op_2480"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_2486 = const()[name = string("op_2486"), val = int32(-1)];
	fp16 const_57_promoted_to_fp16 = const()[name = string("const_57_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_97 = transpose(perm = var_2480, x = var_2476)[name = string("transpose_58")];
	tensor<fp16, [1, 1, 1536]> var_2492_cast_fp16 = mul(x = x_97, y = const_57_promoted_to_fp16)[name = string("op_2492_cast_fp16")];
	bool input_95_interleave_0 = const()[name = string("input_95_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_95_cast_fp16 = concat(axis = var_2486, interleave = input_95_interleave_0, values = (x_97, var_2492_cast_fp16))[name = string("input_95_cast_fp16")];
	tensor<int32, [1]> normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2484_to_fp16 = const()[name = string("op_2484_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_2484_to_fp16, x = input_95_cast_fp16)[name = string("normed_93_cast_fp16")];
	tensor<int32, [2]> var_2497_split_sizes_0 = const()[name = string("op_2497_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2497_axis_0 = const()[name = string("op_2497_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_2497_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2497_cast_fp16_1 = split(axis = var_2497_axis_0, split_sizes = var_2497_split_sizes_0, x = normed_93_cast_fp16)[name = string("op_2497_cast_fp16")];
	tensor<fp16, [1536]> const_58_to_fp16 = const()[name = string("const_58_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(332066496)))];
	tensor<fp16, [1, 1, 1536]> var_2500_cast_fp16 = mul(x = var_2497_cast_fp16_0, y = const_58_to_fp16)[name = string("op_2500_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> x_101_cast_fp16 = add(x = x_89_cast_fp16, y = var_2500_cast_fp16)[name = string("x_101_cast_fp16")];
	int32 var_2507 = const()[name = string("op_2507"), val = int32(-1)];
	fp16 const_59_promoted_to_fp16 = const()[name = string("const_59_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_2513_cast_fp16 = mul(x = x_101_cast_fp16, y = const_59_promoted_to_fp16)[name = string("op_2513_cast_fp16")];
	bool input_97_interleave_0 = const()[name = string("input_97_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_97_cast_fp16 = concat(axis = var_2507, interleave = input_97_interleave_0, values = (x_101_cast_fp16, var_2513_cast_fp16))[name = string("input_97_cast_fp16")];
	tensor<int32, [1]> normed_97_axes_0 = const()[name = string("normed_97_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2505_to_fp16 = const()[name = string("op_2505_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_97_cast_fp16 = layer_norm(axes = normed_97_axes_0, epsilon = var_2505_to_fp16, x = input_97_cast_fp16)[name = string("normed_97_cast_fp16")];
	tensor<int32, [2]> var_2518_split_sizes_0 = const()[name = string("op_2518_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2518_axis_0 = const()[name = string("op_2518_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_2518_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2518_cast_fp16_1 = split(axis = var_2518_axis_0, split_sizes = var_2518_split_sizes_0, x = normed_97_cast_fp16)[name = string("op_2518_cast_fp16")];
	tensor<fp16, [1536]> const_60_to_fp16 = const()[name = string("const_60_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(332069632)))];
	tensor<fp16, [1, 1, 1536]> var_2521_cast_fp16 = mul(x = var_2518_cast_fp16_0, y = const_60_to_fp16)[name = string("op_2521_cast_fp16")];
	tensor<int32, [3]> var_2534 = const()[name = string("op_2534"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_99_axes_0 = const()[name = string("input_99_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_2535 = transpose(perm = var_2534, x = var_2521_cast_fp16)[name = string("transpose_57")];
	tensor<fp16, [1, 1536, 1, 1]> input_99 = expand_dims(axes = input_99_axes_0, x = var_2535)[name = string("input_99")];
	string var_2548_pad_type_0 = const()[name = string("op_2548_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_2548_strides_0 = const()[name = string("op_2548_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_2548_pad_0 = const()[name = string("op_2548_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_2548_dilations_0 = const()[name = string("op_2548_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_2548_groups_0 = const()[name = string("op_2548_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> var_2548 = conv(dilations = var_2548_dilations_0, groups = var_2548_groups_0, pad = var_2548_pad_0, pad_type = var_2548_pad_type_0, strides = var_2548_strides_0, weight = layers_3_mlp_gate_proj_weight_palettized, x = input_99)[name = string("op_2548")];
	string var_2550_mode_0 = const()[name = string("op_2550_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 1]> var_2550 = gelu(mode = var_2550_mode_0, x = var_2548)[name = string("op_2550")];
	string var_2561_pad_type_0 = const()[name = string("op_2561_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_2561_strides_0 = const()[name = string("op_2561_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_2561_pad_0 = const()[name = string("op_2561_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_2561_dilations_0 = const()[name = string("op_2561_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_2561_groups_0 = const()[name = string("op_2561_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> var_2561 = conv(dilations = var_2561_dilations_0, groups = var_2561_groups_0, pad = var_2561_pad_0, pad_type = var_2561_pad_type_0, strides = var_2561_strides_0, weight = layers_3_mlp_up_proj_weight_palettized, x = input_99)[name = string("op_2561")];
	tensor<fp16, [1, 12288, 1, 1]> input_101 = mul(x = var_2550, y = var_2561)[name = string("input_101")];
	string var_2573_pad_type_0 = const()[name = string("op_2573_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_2573_strides_0 = const()[name = string("op_2573_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_2573_pad_0 = const()[name = string("op_2573_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_2573_dilations_0 = const()[name = string("op_2573_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_2573_groups_0 = const()[name = string("op_2573_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 1]> var_2573 = conv(dilations = var_2573_dilations_0, groups = var_2573_groups_0, pad = var_2573_pad_0, pad_type = var_2573_pad_type_0, strides = var_2573_strides_0, weight = layers_3_mlp_down_proj_weight_palettized, x = input_101)[name = string("op_2573")];
	tensor<int32, [1]> var_2575_axes_0 = const()[name = string("op_2575_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_2575 = squeeze(axes = var_2575_axes_0, x = var_2573)[name = string("op_2575")];
	tensor<int32, [3]> var_2579 = const()[name = string("op_2579"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_2585 = const()[name = string("op_2585"), val = int32(-1)];
	fp16 const_61_promoted_to_fp16 = const()[name = string("const_61_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_105 = transpose(perm = var_2579, x = var_2575)[name = string("transpose_56")];
	tensor<fp16, [1, 1, 1536]> var_2591_cast_fp16 = mul(x = x_105, y = const_61_promoted_to_fp16)[name = string("op_2591_cast_fp16")];
	bool input_103_interleave_0 = const()[name = string("input_103_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_103_cast_fp16 = concat(axis = var_2585, interleave = input_103_interleave_0, values = (x_105, var_2591_cast_fp16))[name = string("input_103_cast_fp16")];
	tensor<int32, [1]> normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2583_to_fp16 = const()[name = string("op_2583_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_2583_to_fp16, x = input_103_cast_fp16)[name = string("normed_101_cast_fp16")];
	tensor<int32, [2]> var_2596_split_sizes_0 = const()[name = string("op_2596_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2596_axis_0 = const()[name = string("op_2596_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_2596_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2596_cast_fp16_1 = split(axis = var_2596_axis_0, split_sizes = var_2596_split_sizes_0, x = normed_101_cast_fp16)[name = string("op_2596_cast_fp16")];
	tensor<fp16, [1536]> const_62_to_fp16 = const()[name = string("const_62_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(332072768)))];
	tensor<fp16, [1, 1, 1536]> var_2599_cast_fp16 = mul(x = var_2596_cast_fp16_0, y = const_62_to_fp16)[name = string("op_2599_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_55_cast_fp16 = add(x = x_101_cast_fp16, y = var_2599_cast_fp16)[name = string("hidden_states_55_cast_fp16")];
	tensor<fp16, [1, 1, 256]> var_2610 = linear(bias = linear_0_bias_0, weight = layers_3_per_layer_input_gate_weight_palettized, x = hidden_states_55_cast_fp16)[name = string("linear_6")];
	string gated_7_mode_0 = const()[name = string("gated_7_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 1, 256]> gated_7 = gelu(mode = gated_7_mode_0, x = var_2610)[name = string("gated_7")];
	tensor<int32, [3]> var_2627_begin_0 = const()[name = string("op_2627_begin_0"), val = tensor<int32, [3]>([0, 0, 3840])];
	tensor<int32, [3]> var_2627_end_0 = const()[name = string("op_2627_end_0"), val = tensor<int32, [3]>([1, 1, 4096])];
	tensor<bool, [3]> var_2627_end_mask_0 = const()[name = string("op_2627_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 1, 256]> var_2627_cast_fp16 = slice_by_index(begin = var_2627_begin_0, end = var_2627_end_0, end_mask = var_2627_end_mask_0, x = per_layer_combined)[name = string("op_2627_cast_fp16")];
	tensor<fp16, [1, 1, 256]> input_107_cast_fp16 = mul(x = gated_7, y = var_2627_cast_fp16)[name = string("input_107_cast_fp16")];
	tensor<fp16, [1536, 256]> layers_3_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(332075904))), lut = tensor<fp16, [48, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(332272576))))[name = string("layers_3_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1, 1536]> linear_7_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_107_cast_fp16)[name = string("linear_7_cast_fp16")];
	int32 var_2636 = const()[name = string("op_2636"), val = int32(-1)];
	fp16 const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_2642_cast_fp16 = mul(x = linear_7_cast_fp16, y = const_63_promoted_to_fp16)[name = string("op_2642_cast_fp16")];
	bool input_109_interleave_0 = const()[name = string("input_109_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_109_cast_fp16 = concat(axis = var_2636, interleave = input_109_interleave_0, values = (linear_7_cast_fp16, var_2642_cast_fp16))[name = string("input_109_cast_fp16")];
	tensor<int32, [1]> normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2634_to_fp16 = const()[name = string("op_2634_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_2634_to_fp16, x = input_109_cast_fp16)[name = string("normed_105_cast_fp16")];
	tensor<int32, [2]> var_2647_split_sizes_0 = const()[name = string("op_2647_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2647_axis_0 = const()[name = string("op_2647_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_2647_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2647_cast_fp16_1 = split(axis = var_2647_axis_0, split_sizes = var_2647_split_sizes_0, x = normed_105_cast_fp16)[name = string("op_2647_cast_fp16")];
	tensor<fp16, [1536]> const_64_to_fp16 = const()[name = string("const_64_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(332274176)))];
	tensor<fp16, [1, 1, 1536]> var_2650_cast_fp16 = mul(x = var_2647_cast_fp16_0, y = const_64_to_fp16)[name = string("op_2650_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_59_cast_fp16 = add(x = hidden_states_55_cast_fp16, y = var_2650_cast_fp16)[name = string("hidden_states_59_cast_fp16")];
	tensor<fp16, [1]> layers_3_layer_scalar_to_fp16 = const()[name = string("layers_3_layer_scalar_to_fp16"), val = tensor<fp16, [1]>([0x1.04p-2])];
	tensor<fp16, [1, 1, 1536]> x_113_cast_fp16 = mul(x = hidden_states_59_cast_fp16, y = layers_3_layer_scalar_to_fp16)[name = string("x_113_cast_fp16")];
	int32 var_2658 = const()[name = string("op_2658"), val = int32(-1)];
	fp16 const_65_promoted_to_fp16 = const()[name = string("const_65_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_2664_cast_fp16 = mul(x = x_113_cast_fp16, y = const_65_promoted_to_fp16)[name = string("op_2664_cast_fp16")];
	bool input_111_interleave_0 = const()[name = string("input_111_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_111_cast_fp16 = concat(axis = var_2658, interleave = input_111_interleave_0, values = (x_113_cast_fp16, var_2664_cast_fp16))[name = string("input_111_cast_fp16")];
	tensor<int32, [1]> normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2656_to_fp16 = const()[name = string("op_2656_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_2656_to_fp16, x = input_111_cast_fp16)[name = string("normed_109_cast_fp16")];
	tensor<int32, [2]> var_2669_split_sizes_0 = const()[name = string("op_2669_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2669_axis_0 = const()[name = string("op_2669_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_2669_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2669_cast_fp16_1 = split(axis = var_2669_axis_0, split_sizes = var_2669_split_sizes_0, x = normed_109_cast_fp16)[name = string("op_2669_cast_fp16")];
	tensor<fp16, [1536]> const_66_to_fp16 = const()[name = string("const_66_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(332277312)))];
	tensor<fp16, [1, 1, 1536]> var_2672_cast_fp16 = mul(x = var_2669_cast_fp16_0, y = const_66_to_fp16)[name = string("op_2672_cast_fp16")];
	tensor<int32, [3]> var_2680 = const()[name = string("op_2680"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_2683_axes_0 = const()[name = string("op_2683_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_2681_cast_fp16 = transpose(perm = var_2680, x = var_2672_cast_fp16)[name = string("transpose_55")];
	tensor<fp16, [1, 1536, 1, 1]> var_2683_cast_fp16 = expand_dims(axes = var_2683_axes_0, x = var_2681_cast_fp16)[name = string("op_2683_cast_fp16")];
	string var_2699_pad_type_0 = const()[name = string("op_2699_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_2699_strides_0 = const()[name = string("op_2699_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_2699_pad_0 = const()[name = string("op_2699_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_2699_dilations_0 = const()[name = string("op_2699_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_2699_groups_0 = const()[name = string("op_2699_groups_0"), val = int32(1)];
	tensor<fp16, [1, 2048, 1, 1]> var_2699 = conv(dilations = var_2699_dilations_0, groups = var_2699_groups_0, pad = var_2699_pad_0, pad_type = var_2699_pad_type_0, strides = var_2699_strides_0, weight = layers_4_self_attn_q_proj_weight_palettized, x = var_2683_cast_fp16)[name = string("op_2699")];
	tensor<int32, [4]> var_2704 = const()[name = string("op_2704"), val = tensor<int32, [4]>([1, 8, 256, 1])];
	tensor<fp16, [1, 8, 256, 1]> var_2705 = reshape(shape = var_2704, x = var_2699)[name = string("op_2705")];
	tensor<int32, [4]> var_2710 = const()[name = string("op_2710"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	tensor<int32, [3]> var_2720 = const()[name = string("op_2720"), val = tensor<int32, [3]>([1, 8, 256])];
	tensor<fp16, [1, 8, 1, 256]> var_2711 = transpose(perm = var_2710, x = var_2705)[name = string("transpose_54")];
	tensor<fp16, [1, 8, 256]> x_117 = reshape(shape = var_2720, x = var_2711)[name = string("x_117")];
	int32 var_2726 = const()[name = string("op_2726"), val = int32(-1)];
	fp16 const_67_promoted_to_fp16 = const()[name = string("const_67_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 256]> var_2732_cast_fp16 = mul(x = x_117, y = const_67_promoted_to_fp16)[name = string("op_2732_cast_fp16")];
	bool input_115_interleave_0 = const()[name = string("input_115_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512]> input_115_cast_fp16 = concat(axis = var_2726, interleave = input_115_interleave_0, values = (x_117, var_2732_cast_fp16))[name = string("input_115_cast_fp16")];
	tensor<int32, [1]> normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2724_to_fp16 = const()[name = string("op_2724_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 8, 512]> normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_2724_to_fp16, x = input_115_cast_fp16)[name = string("normed_113_cast_fp16")];
	tensor<int32, [2]> var_2737_split_sizes_0 = const()[name = string("op_2737_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_2737_axis_0 = const()[name = string("op_2737_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 256]> var_2737_cast_fp16_0, tensor<fp16, [1, 8, 256]> var_2737_cast_fp16_1 = split(axis = var_2737_axis_0, split_sizes = var_2737_split_sizes_0, x = normed_113_cast_fp16)[name = string("op_2737_cast_fp16")];
	tensor<fp16, [1, 8, 256]> var_2740_cast_fp16 = mul(x = var_2737_cast_fp16_0, y = const_21_to_fp16)[name = string("op_2740_cast_fp16")];
	tensor<int32, [4]> var_2746 = const()[name = string("op_2746"), val = tensor<int32, [4]>([1, 8, 1, 256])];
	tensor<fp16, [1, 8, 1, 256]> q_33 = reshape(shape = var_2746, x = var_2740_cast_fp16)[name = string("q_33")];
	tensor<fp16, [1, 8, 1, 256]> var_2748_cast_fp16 = mul(x = q_33, y = cos_s)[name = string("op_2748_cast_fp16")];
	tensor<int32, [2]> var_2749_split_sizes_0 = const()[name = string("op_2749_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_2749_axis_0 = const()[name = string("op_2749_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 128]> var_2749_0, tensor<fp16, [1, 8, 1, 128]> var_2749_1 = split(axis = var_2749_axis_0, split_sizes = var_2749_split_sizes_0, x = q_33)[name = string("op_2749")];
	fp16 const_69_promoted = const()[name = string("const_69_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 128]> var_2751 = mul(x = var_2749_1, y = const_69_promoted)[name = string("op_2751")];
	int32 var_2753 = const()[name = string("op_2753"), val = int32(-1)];
	bool var_2754_interleave_0 = const()[name = string("op_2754_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> var_2754 = concat(axis = var_2753, interleave = var_2754_interleave_0, values = (var_2751, var_2749_0))[name = string("op_2754")];
	tensor<fp16, [1, 8, 1, 256]> var_2755_cast_fp16 = mul(x = var_2754, y = sin_s)[name = string("op_2755_cast_fp16")];
	tensor<fp16, [1, 8, 1, 256]> q_35_cast_fp16 = add(x = var_2748_cast_fp16, y = var_2755_cast_fp16)[name = string("q_35_cast_fp16")];
	bool var_2779_transpose_x_0 = const()[name = string("op_2779_transpose_x_0"), val = bool(false)];
	bool var_2779_transpose_y_0 = const()[name = string("op_2779_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 512]> var_2779_cast_fp16 = matmul(transpose_x = var_2779_transpose_x_0, transpose_y = var_2779_transpose_y_0, x = q_35_cast_fp16, y = transpose_49_cast_fp16)[name = string("op_2779_cast_fp16")];
	tensor<fp16, [1, 8, 1, 512]> var_2786_cast_fp16 = add(x = var_2779_cast_fp16, y = causal_mask)[name = string("op_2786_cast_fp16")];
	int32 var_2787 = const()[name = string("op_2787"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 512]> var_2789_cast_fp16 = softmax(axis = var_2787, x = var_2786_cast_fp16)[name = string("op_2789_cast_fp16")];
	bool var_2805_transpose_x_0 = const()[name = string("op_2805_transpose_x_0"), val = bool(false)];
	bool var_2805_transpose_y_0 = const()[name = string("op_2805_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> var_2805_cast_fp16 = matmul(transpose_x = var_2805_transpose_x_0, transpose_y = var_2805_transpose_y_0, x = var_2789_cast_fp16, y = Ve_3_cast_fp16)[name = string("op_2805_cast_fp16")];
	tensor<int32, [4]> var_2815 = const()[name = string("op_2815"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_2822 = const()[name = string("op_2822"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 8, 256]> var_2816 = transpose(perm = var_2815, x = var_2805_cast_fp16)[name = string("transpose_53")];
	tensor<fp16, [1, 1, 2048]> var_2823 = reshape(shape = var_2822, x = var_2816)[name = string("op_2823")];
	tensor<int32, [3]> var_2827 = const()[name = string("op_2827"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1536, 2048, 1]> squeeze_4_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(332280448))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(333853376))))[name = string("squeeze_4_palettized")];
	string var_2843_pad_type_0 = const()[name = string("op_2843_pad_type_0"), val = string("valid")];
	int32 var_2843_groups_0 = const()[name = string("op_2843_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_2843_strides_0 = const()[name = string("op_2843_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_2843_pad_0 = const()[name = string("op_2843_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_2843_dilations_0 = const()[name = string("op_2843_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1, 2048, 1]> var_2828 = transpose(perm = var_2827, x = var_2823)[name = string("transpose_52")];
	tensor<fp16, [1, 1536, 1]> var_2843 = conv(dilations = var_2843_dilations_0, groups = var_2843_groups_0, pad = var_2843_pad_0, pad_type = var_2843_pad_type_0, strides = var_2843_strides_0, weight = squeeze_4_palettized, x = var_2828)[name = string("op_2843")];
	tensor<int32, [3]> var_2847 = const()[name = string("op_2847"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_2853 = const()[name = string("op_2853"), val = int32(-1)];
	fp16 const_70_promoted_to_fp16 = const()[name = string("const_70_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_121 = transpose(perm = var_2847, x = var_2843)[name = string("transpose_51")];
	tensor<fp16, [1, 1, 1536]> var_2859_cast_fp16 = mul(x = x_121, y = const_70_promoted_to_fp16)[name = string("op_2859_cast_fp16")];
	bool input_119_interleave_0 = const()[name = string("input_119_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_119_cast_fp16 = concat(axis = var_2853, interleave = input_119_interleave_0, values = (x_121, var_2859_cast_fp16))[name = string("input_119_cast_fp16")];
	tensor<int32, [1]> normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2851_to_fp16 = const()[name = string("op_2851_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_2851_to_fp16, x = input_119_cast_fp16)[name = string("normed_117_cast_fp16")];
	tensor<int32, [2]> var_2864_split_sizes_0 = const()[name = string("op_2864_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2864_axis_0 = const()[name = string("op_2864_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_2864_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2864_cast_fp16_1 = split(axis = var_2864_axis_0, split_sizes = var_2864_split_sizes_0, x = normed_117_cast_fp16)[name = string("op_2864_cast_fp16")];
	tensor<fp16, [1536]> const_71_to_fp16 = const()[name = string("const_71_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(333854976)))];
	tensor<fp16, [1, 1, 1536]> var_2867_cast_fp16 = mul(x = var_2864_cast_fp16_0, y = const_71_to_fp16)[name = string("op_2867_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> x_125_cast_fp16 = add(x = x_113_cast_fp16, y = var_2867_cast_fp16)[name = string("x_125_cast_fp16")];
	int32 var_2874 = const()[name = string("op_2874"), val = int32(-1)];
	fp16 const_72_promoted_to_fp16 = const()[name = string("const_72_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_2880_cast_fp16 = mul(x = x_125_cast_fp16, y = const_72_promoted_to_fp16)[name = string("op_2880_cast_fp16")];
	bool input_121_interleave_0 = const()[name = string("input_121_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_121_cast_fp16 = concat(axis = var_2874, interleave = input_121_interleave_0, values = (x_125_cast_fp16, var_2880_cast_fp16))[name = string("input_121_cast_fp16")];
	tensor<int32, [1]> normed_121_axes_0 = const()[name = string("normed_121_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2872_to_fp16 = const()[name = string("op_2872_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_121_cast_fp16 = layer_norm(axes = normed_121_axes_0, epsilon = var_2872_to_fp16, x = input_121_cast_fp16)[name = string("normed_121_cast_fp16")];
	tensor<int32, [2]> var_2885_split_sizes_0 = const()[name = string("op_2885_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2885_axis_0 = const()[name = string("op_2885_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_2885_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2885_cast_fp16_1 = split(axis = var_2885_axis_0, split_sizes = var_2885_split_sizes_0, x = normed_121_cast_fp16)[name = string("op_2885_cast_fp16")];
	tensor<fp16, [1536]> const_73_to_fp16 = const()[name = string("const_73_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(333858112)))];
	tensor<fp16, [1, 1, 1536]> var_2888_cast_fp16 = mul(x = var_2885_cast_fp16_0, y = const_73_to_fp16)[name = string("op_2888_cast_fp16")];
	tensor<int32, [3]> var_2901 = const()[name = string("op_2901"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_123_axes_0 = const()[name = string("input_123_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_2902 = transpose(perm = var_2901, x = var_2888_cast_fp16)[name = string("transpose_50")];
	tensor<fp16, [1, 1536, 1, 1]> input_123 = expand_dims(axes = input_123_axes_0, x = var_2902)[name = string("input_123")];
	string var_2915_pad_type_0 = const()[name = string("op_2915_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_2915_strides_0 = const()[name = string("op_2915_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_2915_pad_0 = const()[name = string("op_2915_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_2915_dilations_0 = const()[name = string("op_2915_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_2915_groups_0 = const()[name = string("op_2915_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> var_2915 = conv(dilations = var_2915_dilations_0, groups = var_2915_groups_0, pad = var_2915_pad_0, pad_type = var_2915_pad_type_0, strides = var_2915_strides_0, weight = layers_4_mlp_gate_proj_weight_palettized, x = input_123)[name = string("op_2915")];
	string var_2917_mode_0 = const()[name = string("op_2917_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 1]> var_2917 = gelu(mode = var_2917_mode_0, x = var_2915)[name = string("op_2917")];
	string var_2928_pad_type_0 = const()[name = string("op_2928_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_2928_strides_0 = const()[name = string("op_2928_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_2928_pad_0 = const()[name = string("op_2928_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_2928_dilations_0 = const()[name = string("op_2928_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_2928_groups_0 = const()[name = string("op_2928_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> var_2928 = conv(dilations = var_2928_dilations_0, groups = var_2928_groups_0, pad = var_2928_pad_0, pad_type = var_2928_pad_type_0, strides = var_2928_strides_0, weight = layers_4_mlp_up_proj_weight_palettized, x = input_123)[name = string("op_2928")];
	tensor<fp16, [1, 12288, 1, 1]> input_125 = mul(x = var_2917, y = var_2928)[name = string("input_125")];
	string var_2940_pad_type_0 = const()[name = string("op_2940_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_2940_strides_0 = const()[name = string("op_2940_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_2940_pad_0 = const()[name = string("op_2940_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_2940_dilations_0 = const()[name = string("op_2940_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_2940_groups_0 = const()[name = string("op_2940_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 1]> var_2940 = conv(dilations = var_2940_dilations_0, groups = var_2940_groups_0, pad = var_2940_pad_0, pad_type = var_2940_pad_type_0, strides = var_2940_strides_0, weight = layers_4_mlp_down_proj_weight_palettized, x = input_125)[name = string("op_2940")];
	tensor<int32, [1]> var_2942_axes_0 = const()[name = string("op_2942_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_2942 = squeeze(axes = var_2942_axes_0, x = var_2940)[name = string("op_2942")];
	tensor<int32, [3]> var_2946 = const()[name = string("op_2946"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_2952 = const()[name = string("op_2952"), val = int32(-1)];
	fp16 const_74_promoted_to_fp16 = const()[name = string("const_74_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_129 = transpose(perm = var_2946, x = var_2942)[name = string("transpose_49")];
	tensor<fp16, [1, 1, 1536]> var_2958_cast_fp16 = mul(x = x_129, y = const_74_promoted_to_fp16)[name = string("op_2958_cast_fp16")];
	bool input_127_interleave_0 = const()[name = string("input_127_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_127_cast_fp16 = concat(axis = var_2952, interleave = input_127_interleave_0, values = (x_129, var_2958_cast_fp16))[name = string("input_127_cast_fp16")];
	tensor<int32, [1]> normed_125_axes_0 = const()[name = string("normed_125_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_2950_to_fp16 = const()[name = string("op_2950_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_125_cast_fp16 = layer_norm(axes = normed_125_axes_0, epsilon = var_2950_to_fp16, x = input_127_cast_fp16)[name = string("normed_125_cast_fp16")];
	tensor<int32, [2]> var_2963_split_sizes_0 = const()[name = string("op_2963_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_2963_axis_0 = const()[name = string("op_2963_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_2963_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2963_cast_fp16_1 = split(axis = var_2963_axis_0, split_sizes = var_2963_split_sizes_0, x = normed_125_cast_fp16)[name = string("op_2963_cast_fp16")];
	tensor<fp16, [1536]> const_75_to_fp16 = const()[name = string("const_75_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(333861248)))];
	tensor<fp16, [1, 1, 1536]> var_2966_cast_fp16 = mul(x = var_2963_cast_fp16_0, y = const_75_to_fp16)[name = string("op_2966_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_69_cast_fp16 = add(x = x_125_cast_fp16, y = var_2966_cast_fp16)[name = string("hidden_states_69_cast_fp16")];
	tensor<fp16, [1, 1, 256]> var_2977 = linear(bias = linear_0_bias_0, weight = layers_4_per_layer_input_gate_weight_palettized, x = hidden_states_69_cast_fp16)[name = string("linear_8")];
	string gated_9_mode_0 = const()[name = string("gated_9_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 1, 256]> gated_9 = gelu(mode = gated_9_mode_0, x = var_2977)[name = string("gated_9")];
	tensor<int32, [3]> var_2994_begin_0 = const()[name = string("op_2994_begin_0"), val = tensor<int32, [3]>([0, 0, 4096])];
	tensor<int32, [3]> var_2994_end_0 = const()[name = string("op_2994_end_0"), val = tensor<int32, [3]>([1, 1, 4352])];
	tensor<bool, [3]> var_2994_end_mask_0 = const()[name = string("op_2994_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 1, 256]> var_2994_cast_fp16 = slice_by_index(begin = var_2994_begin_0, end = var_2994_end_0, end_mask = var_2994_end_mask_0, x = per_layer_combined)[name = string("op_2994_cast_fp16")];
	tensor<fp16, [1, 1, 256]> input_131_cast_fp16 = mul(x = gated_9, y = var_2994_cast_fp16)[name = string("input_131_cast_fp16")];
	tensor<fp16, [1536, 256]> layers_4_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(333864384))), lut = tensor<fp16, [48, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334061056))))[name = string("layers_4_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1, 1536]> linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_131_cast_fp16)[name = string("linear_9_cast_fp16")];
	int32 var_3003 = const()[name = string("op_3003"), val = int32(-1)];
	fp16 const_76_promoted_to_fp16 = const()[name = string("const_76_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_3009_cast_fp16 = mul(x = linear_9_cast_fp16, y = const_76_promoted_to_fp16)[name = string("op_3009_cast_fp16")];
	bool input_133_interleave_0 = const()[name = string("input_133_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_133_cast_fp16 = concat(axis = var_3003, interleave = input_133_interleave_0, values = (linear_9_cast_fp16, var_3009_cast_fp16))[name = string("input_133_cast_fp16")];
	tensor<int32, [1]> normed_129_axes_0 = const()[name = string("normed_129_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3001_to_fp16 = const()[name = string("op_3001_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_129_cast_fp16 = layer_norm(axes = normed_129_axes_0, epsilon = var_3001_to_fp16, x = input_133_cast_fp16)[name = string("normed_129_cast_fp16")];
	tensor<int32, [2]> var_3014_split_sizes_0 = const()[name = string("op_3014_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3014_axis_0 = const()[name = string("op_3014_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3014_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3014_cast_fp16_1 = split(axis = var_3014_axis_0, split_sizes = var_3014_split_sizes_0, x = normed_129_cast_fp16)[name = string("op_3014_cast_fp16")];
	tensor<fp16, [1536]> const_77_to_fp16 = const()[name = string("const_77_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334062656)))];
	tensor<fp16, [1, 1, 1536]> var_3017_cast_fp16 = mul(x = var_3014_cast_fp16_0, y = const_77_to_fp16)[name = string("op_3017_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_73_cast_fp16 = add(x = hidden_states_69_cast_fp16, y = var_3017_cast_fp16)[name = string("hidden_states_73_cast_fp16")];
	tensor<fp16, [1]> layers_4_layer_scalar_to_fp16 = const()[name = string("layers_4_layer_scalar_to_fp16"), val = tensor<fp16, [1]>([0x1.2cp-1])];
	tensor<fp16, [1, 1, 1536]> x_137_cast_fp16 = mul(x = hidden_states_73_cast_fp16, y = layers_4_layer_scalar_to_fp16)[name = string("x_137_cast_fp16")];
	int32 var_3025 = const()[name = string("op_3025"), val = int32(-1)];
	fp16 const_78_promoted_to_fp16 = const()[name = string("const_78_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_3031_cast_fp16 = mul(x = x_137_cast_fp16, y = const_78_promoted_to_fp16)[name = string("op_3031_cast_fp16")];
	bool input_135_interleave_0 = const()[name = string("input_135_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_135_cast_fp16 = concat(axis = var_3025, interleave = input_135_interleave_0, values = (x_137_cast_fp16, var_3031_cast_fp16))[name = string("input_135_cast_fp16")];
	tensor<int32, [1]> normed_133_axes_0 = const()[name = string("normed_133_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3023_to_fp16 = const()[name = string("op_3023_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_133_cast_fp16 = layer_norm(axes = normed_133_axes_0, epsilon = var_3023_to_fp16, x = input_135_cast_fp16)[name = string("normed_133_cast_fp16")];
	tensor<int32, [2]> var_3036_split_sizes_0 = const()[name = string("op_3036_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3036_axis_0 = const()[name = string("op_3036_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3036_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3036_cast_fp16_1 = split(axis = var_3036_axis_0, split_sizes = var_3036_split_sizes_0, x = normed_133_cast_fp16)[name = string("op_3036_cast_fp16")];
	tensor<fp16, [1536]> const_79_to_fp16 = const()[name = string("const_79_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334065792)))];
	tensor<fp16, [1, 1, 1536]> var_3039_cast_fp16 = mul(x = var_3036_cast_fp16_0, y = const_79_to_fp16)[name = string("op_3039_cast_fp16")];
	tensor<int32, [3]> var_3047 = const()[name = string("op_3047"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_3050_axes_0 = const()[name = string("op_3050_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_3048_cast_fp16 = transpose(perm = var_3047, x = var_3039_cast_fp16)[name = string("transpose_48")];
	tensor<fp16, [1, 1536, 1, 1]> var_3050_cast_fp16 = expand_dims(axes = var_3050_axes_0, x = var_3048_cast_fp16)[name = string("op_3050_cast_fp16")];
	string var_3066_pad_type_0 = const()[name = string("op_3066_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_3066_strides_0 = const()[name = string("op_3066_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_3066_pad_0 = const()[name = string("op_3066_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_3066_dilations_0 = const()[name = string("op_3066_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_3066_groups_0 = const()[name = string("op_3066_groups_0"), val = int32(1)];
	tensor<fp16, [1, 2048, 1, 1]> var_3066 = conv(dilations = var_3066_dilations_0, groups = var_3066_groups_0, pad = var_3066_pad_0, pad_type = var_3066_pad_type_0, strides = var_3066_strides_0, weight = layers_5_self_attn_q_proj_weight_palettized, x = var_3050_cast_fp16)[name = string("op_3066")];
	tensor<int32, [4]> var_3071 = const()[name = string("op_3071"), val = tensor<int32, [4]>([1, 8, 256, 1])];
	tensor<fp16, [1, 8, 256, 1]> var_3072 = reshape(shape = var_3071, x = var_3066)[name = string("op_3072")];
	tensor<int32, [4]> var_3077 = const()[name = string("op_3077"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	tensor<int32, [3]> var_3087 = const()[name = string("op_3087"), val = tensor<int32, [3]>([1, 8, 256])];
	tensor<fp16, [1, 8, 1, 256]> var_3078 = transpose(perm = var_3077, x = var_3072)[name = string("transpose_47")];
	tensor<fp16, [1, 8, 256]> x_141 = reshape(shape = var_3087, x = var_3078)[name = string("x_141")];
	int32 var_3093 = const()[name = string("op_3093"), val = int32(-1)];
	fp16 const_80_promoted_to_fp16 = const()[name = string("const_80_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 256]> var_3099_cast_fp16 = mul(x = x_141, y = const_80_promoted_to_fp16)[name = string("op_3099_cast_fp16")];
	bool input_139_interleave_0 = const()[name = string("input_139_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512]> input_139_cast_fp16 = concat(axis = var_3093, interleave = input_139_interleave_0, values = (x_141, var_3099_cast_fp16))[name = string("input_139_cast_fp16")];
	tensor<int32, [1]> normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3091_to_fp16 = const()[name = string("op_3091_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 8, 512]> normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_3091_to_fp16, x = input_139_cast_fp16)[name = string("normed_137_cast_fp16")];
	tensor<int32, [2]> var_3104_split_sizes_0 = const()[name = string("op_3104_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_3104_axis_0 = const()[name = string("op_3104_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 256]> var_3104_cast_fp16_0, tensor<fp16, [1, 8, 256]> var_3104_cast_fp16_1 = split(axis = var_3104_axis_0, split_sizes = var_3104_split_sizes_0, x = normed_137_cast_fp16)[name = string("op_3104_cast_fp16")];
	tensor<fp16, [1, 8, 256]> var_3107_cast_fp16 = mul(x = var_3104_cast_fp16_0, y = const_21_to_fp16)[name = string("op_3107_cast_fp16")];
	tensor<int32, [4]> var_3113 = const()[name = string("op_3113"), val = tensor<int32, [4]>([1, 8, 1, 256])];
	tensor<fp16, [1, 8, 1, 256]> q_39 = reshape(shape = var_3113, x = var_3107_cast_fp16)[name = string("q_39")];
	tensor<fp16, [1, 8, 1, 256]> var_3115_cast_fp16 = mul(x = q_39, y = cos_s)[name = string("op_3115_cast_fp16")];
	tensor<int32, [2]> var_3116_split_sizes_0 = const()[name = string("op_3116_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_3116_axis_0 = const()[name = string("op_3116_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 128]> var_3116_0, tensor<fp16, [1, 8, 1, 128]> var_3116_1 = split(axis = var_3116_axis_0, split_sizes = var_3116_split_sizes_0, x = q_39)[name = string("op_3116")];
	fp16 const_82_promoted = const()[name = string("const_82_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 128]> var_3118 = mul(x = var_3116_1, y = const_82_promoted)[name = string("op_3118")];
	int32 var_3120 = const()[name = string("op_3120"), val = int32(-1)];
	bool var_3121_interleave_0 = const()[name = string("op_3121_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> var_3121 = concat(axis = var_3120, interleave = var_3121_interleave_0, values = (var_3118, var_3116_0))[name = string("op_3121")];
	tensor<fp16, [1, 8, 1, 256]> var_3122_cast_fp16 = mul(x = var_3121, y = sin_s)[name = string("op_3122_cast_fp16")];
	tensor<fp16, [1, 8, 1, 256]> q_41_cast_fp16 = add(x = var_3115_cast_fp16, y = var_3122_cast_fp16)[name = string("q_41_cast_fp16")];
	bool var_3146_transpose_x_0 = const()[name = string("op_3146_transpose_x_0"), val = bool(false)];
	bool var_3146_transpose_y_0 = const()[name = string("op_3146_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 512]> var_3146_cast_fp16 = matmul(transpose_x = var_3146_transpose_x_0, transpose_y = var_3146_transpose_y_0, x = q_41_cast_fp16, y = transpose_49_cast_fp16)[name = string("op_3146_cast_fp16")];
	tensor<fp16, [1, 8, 1, 512]> var_3153_cast_fp16 = add(x = var_3146_cast_fp16, y = causal_mask)[name = string("op_3153_cast_fp16")];
	int32 var_3154 = const()[name = string("op_3154"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 512]> var_3156_cast_fp16 = softmax(axis = var_3154, x = var_3153_cast_fp16)[name = string("op_3156_cast_fp16")];
	bool var_3172_transpose_x_0 = const()[name = string("op_3172_transpose_x_0"), val = bool(false)];
	bool var_3172_transpose_y_0 = const()[name = string("op_3172_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> var_3172_cast_fp16 = matmul(transpose_x = var_3172_transpose_x_0, transpose_y = var_3172_transpose_y_0, x = var_3156_cast_fp16, y = Ve_3_cast_fp16)[name = string("op_3172_cast_fp16")];
	tensor<int32, [4]> var_3182 = const()[name = string("op_3182"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_3189 = const()[name = string("op_3189"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 8, 256]> var_3183 = transpose(perm = var_3182, x = var_3172_cast_fp16)[name = string("transpose_46")];
	tensor<fp16, [1, 1, 2048]> var_3190 = reshape(shape = var_3189, x = var_3183)[name = string("op_3190")];
	tensor<int32, [3]> var_3194 = const()[name = string("op_3194"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1536, 2048, 1]> squeeze_5_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334068928))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(335641856))))[name = string("squeeze_5_palettized")];
	string var_3210_pad_type_0 = const()[name = string("op_3210_pad_type_0"), val = string("valid")];
	int32 var_3210_groups_0 = const()[name = string("op_3210_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_3210_strides_0 = const()[name = string("op_3210_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_3210_pad_0 = const()[name = string("op_3210_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_3210_dilations_0 = const()[name = string("op_3210_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1, 2048, 1]> var_3195 = transpose(perm = var_3194, x = var_3190)[name = string("transpose_45")];
	tensor<fp16, [1, 1536, 1]> var_3210 = conv(dilations = var_3210_dilations_0, groups = var_3210_groups_0, pad = var_3210_pad_0, pad_type = var_3210_pad_type_0, strides = var_3210_strides_0, weight = squeeze_5_palettized, x = var_3195)[name = string("op_3210")];
	tensor<int32, [3]> var_3214 = const()[name = string("op_3214"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_3220 = const()[name = string("op_3220"), val = int32(-1)];
	fp16 const_83_promoted_to_fp16 = const()[name = string("const_83_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_145 = transpose(perm = var_3214, x = var_3210)[name = string("transpose_44")];
	tensor<fp16, [1, 1, 1536]> var_3226_cast_fp16 = mul(x = x_145, y = const_83_promoted_to_fp16)[name = string("op_3226_cast_fp16")];
	bool input_143_interleave_0 = const()[name = string("input_143_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_143_cast_fp16 = concat(axis = var_3220, interleave = input_143_interleave_0, values = (x_145, var_3226_cast_fp16))[name = string("input_143_cast_fp16")];
	tensor<int32, [1]> normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3218_to_fp16 = const()[name = string("op_3218_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_3218_to_fp16, x = input_143_cast_fp16)[name = string("normed_141_cast_fp16")];
	tensor<int32, [2]> var_3231_split_sizes_0 = const()[name = string("op_3231_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3231_axis_0 = const()[name = string("op_3231_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3231_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3231_cast_fp16_1 = split(axis = var_3231_axis_0, split_sizes = var_3231_split_sizes_0, x = normed_141_cast_fp16)[name = string("op_3231_cast_fp16")];
	tensor<fp16, [1536]> const_84_to_fp16 = const()[name = string("const_84_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(335643456)))];
	tensor<fp16, [1, 1, 1536]> var_3234_cast_fp16 = mul(x = var_3231_cast_fp16_0, y = const_84_to_fp16)[name = string("op_3234_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> x_149_cast_fp16 = add(x = x_137_cast_fp16, y = var_3234_cast_fp16)[name = string("x_149_cast_fp16")];
	int32 var_3241 = const()[name = string("op_3241"), val = int32(-1)];
	fp16 const_85_promoted_to_fp16 = const()[name = string("const_85_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_3247_cast_fp16 = mul(x = x_149_cast_fp16, y = const_85_promoted_to_fp16)[name = string("op_3247_cast_fp16")];
	bool input_145_interleave_0 = const()[name = string("input_145_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_145_cast_fp16 = concat(axis = var_3241, interleave = input_145_interleave_0, values = (x_149_cast_fp16, var_3247_cast_fp16))[name = string("input_145_cast_fp16")];
	tensor<int32, [1]> normed_145_axes_0 = const()[name = string("normed_145_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3239_to_fp16 = const()[name = string("op_3239_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_145_cast_fp16 = layer_norm(axes = normed_145_axes_0, epsilon = var_3239_to_fp16, x = input_145_cast_fp16)[name = string("normed_145_cast_fp16")];
	tensor<int32, [2]> var_3252_split_sizes_0 = const()[name = string("op_3252_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3252_axis_0 = const()[name = string("op_3252_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3252_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3252_cast_fp16_1 = split(axis = var_3252_axis_0, split_sizes = var_3252_split_sizes_0, x = normed_145_cast_fp16)[name = string("op_3252_cast_fp16")];
	tensor<fp16, [1536]> const_86_to_fp16 = const()[name = string("const_86_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(335646592)))];
	tensor<fp16, [1, 1, 1536]> var_3255_cast_fp16 = mul(x = var_3252_cast_fp16_0, y = const_86_to_fp16)[name = string("op_3255_cast_fp16")];
	tensor<int32, [3]> var_3268 = const()[name = string("op_3268"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_147_axes_0 = const()[name = string("input_147_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_3269 = transpose(perm = var_3268, x = var_3255_cast_fp16)[name = string("transpose_43")];
	tensor<fp16, [1, 1536, 1, 1]> input_147 = expand_dims(axes = input_147_axes_0, x = var_3269)[name = string("input_147")];
	string var_3282_pad_type_0 = const()[name = string("op_3282_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_3282_strides_0 = const()[name = string("op_3282_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_3282_pad_0 = const()[name = string("op_3282_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_3282_dilations_0 = const()[name = string("op_3282_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_3282_groups_0 = const()[name = string("op_3282_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> var_3282 = conv(dilations = var_3282_dilations_0, groups = var_3282_groups_0, pad = var_3282_pad_0, pad_type = var_3282_pad_type_0, strides = var_3282_strides_0, weight = layers_5_mlp_gate_proj_weight_palettized, x = input_147)[name = string("op_3282")];
	string var_3284_mode_0 = const()[name = string("op_3284_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 1]> var_3284 = gelu(mode = var_3284_mode_0, x = var_3282)[name = string("op_3284")];
	string var_3295_pad_type_0 = const()[name = string("op_3295_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_3295_strides_0 = const()[name = string("op_3295_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_3295_pad_0 = const()[name = string("op_3295_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_3295_dilations_0 = const()[name = string("op_3295_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_3295_groups_0 = const()[name = string("op_3295_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> var_3295 = conv(dilations = var_3295_dilations_0, groups = var_3295_groups_0, pad = var_3295_pad_0, pad_type = var_3295_pad_type_0, strides = var_3295_strides_0, weight = layers_5_mlp_up_proj_weight_palettized, x = input_147)[name = string("op_3295")];
	tensor<fp16, [1, 12288, 1, 1]> input_149 = mul(x = var_3284, y = var_3295)[name = string("input_149")];
	string var_3307_pad_type_0 = const()[name = string("op_3307_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_3307_strides_0 = const()[name = string("op_3307_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_3307_pad_0 = const()[name = string("op_3307_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_3307_dilations_0 = const()[name = string("op_3307_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_3307_groups_0 = const()[name = string("op_3307_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 1]> var_3307 = conv(dilations = var_3307_dilations_0, groups = var_3307_groups_0, pad = var_3307_pad_0, pad_type = var_3307_pad_type_0, strides = var_3307_strides_0, weight = layers_5_mlp_down_proj_weight_palettized, x = input_149)[name = string("op_3307")];
	tensor<int32, [1]> var_3309_axes_0 = const()[name = string("op_3309_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_3309 = squeeze(axes = var_3309_axes_0, x = var_3307)[name = string("op_3309")];
	tensor<int32, [3]> var_3313 = const()[name = string("op_3313"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_3319 = const()[name = string("op_3319"), val = int32(-1)];
	fp16 const_87_promoted_to_fp16 = const()[name = string("const_87_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_153 = transpose(perm = var_3313, x = var_3309)[name = string("transpose_42")];
	tensor<fp16, [1, 1, 1536]> var_3325_cast_fp16 = mul(x = x_153, y = const_87_promoted_to_fp16)[name = string("op_3325_cast_fp16")];
	bool input_151_interleave_0 = const()[name = string("input_151_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_151_cast_fp16 = concat(axis = var_3319, interleave = input_151_interleave_0, values = (x_153, var_3325_cast_fp16))[name = string("input_151_cast_fp16")];
	tensor<int32, [1]> normed_149_axes_0 = const()[name = string("normed_149_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3317_to_fp16 = const()[name = string("op_3317_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_149_cast_fp16 = layer_norm(axes = normed_149_axes_0, epsilon = var_3317_to_fp16, x = input_151_cast_fp16)[name = string("normed_149_cast_fp16")];
	tensor<int32, [2]> var_3330_split_sizes_0 = const()[name = string("op_3330_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3330_axis_0 = const()[name = string("op_3330_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3330_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3330_cast_fp16_1 = split(axis = var_3330_axis_0, split_sizes = var_3330_split_sizes_0, x = normed_149_cast_fp16)[name = string("op_3330_cast_fp16")];
	tensor<fp16, [1536]> const_88_to_fp16 = const()[name = string("const_88_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(335649728)))];
	tensor<fp16, [1, 1, 1536]> var_3333_cast_fp16 = mul(x = var_3330_cast_fp16_0, y = const_88_to_fp16)[name = string("op_3333_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_83_cast_fp16 = add(x = x_149_cast_fp16, y = var_3333_cast_fp16)[name = string("hidden_states_83_cast_fp16")];
	tensor<fp16, [1, 1, 256]> var_3344 = linear(bias = linear_0_bias_0, weight = layers_5_per_layer_input_gate_weight_palettized, x = hidden_states_83_cast_fp16)[name = string("linear_10")];
	string gated_11_mode_0 = const()[name = string("gated_11_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 1, 256]> gated_11 = gelu(mode = gated_11_mode_0, x = var_3344)[name = string("gated_11")];
	tensor<int32, [3]> var_3361_begin_0 = const()[name = string("op_3361_begin_0"), val = tensor<int32, [3]>([0, 0, 4352])];
	tensor<int32, [3]> var_3361_end_0 = const()[name = string("op_3361_end_0"), val = tensor<int32, [3]>([1, 1, 4608])];
	tensor<bool, [3]> var_3361_end_mask_0 = const()[name = string("op_3361_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 1, 256]> var_3361_cast_fp16 = slice_by_index(begin = var_3361_begin_0, end = var_3361_end_0, end_mask = var_3361_end_mask_0, x = per_layer_combined)[name = string("op_3361_cast_fp16")];
	tensor<fp16, [1, 1, 256]> input_155_cast_fp16 = mul(x = gated_11, y = var_3361_cast_fp16)[name = string("input_155_cast_fp16")];
	tensor<fp16, [1536, 256]> layers_5_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(335652864))), lut = tensor<fp16, [48, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(335849536))))[name = string("layers_5_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1, 1536]> linear_11_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_155_cast_fp16)[name = string("linear_11_cast_fp16")];
	int32 var_3370 = const()[name = string("op_3370"), val = int32(-1)];
	fp16 const_89_promoted_to_fp16 = const()[name = string("const_89_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_3376_cast_fp16 = mul(x = linear_11_cast_fp16, y = const_89_promoted_to_fp16)[name = string("op_3376_cast_fp16")];
	bool input_157_interleave_0 = const()[name = string("input_157_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_157_cast_fp16 = concat(axis = var_3370, interleave = input_157_interleave_0, values = (linear_11_cast_fp16, var_3376_cast_fp16))[name = string("input_157_cast_fp16")];
	tensor<int32, [1]> normed_153_axes_0 = const()[name = string("normed_153_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3368_to_fp16 = const()[name = string("op_3368_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_153_cast_fp16 = layer_norm(axes = normed_153_axes_0, epsilon = var_3368_to_fp16, x = input_157_cast_fp16)[name = string("normed_153_cast_fp16")];
	tensor<int32, [2]> var_3381_split_sizes_0 = const()[name = string("op_3381_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3381_axis_0 = const()[name = string("op_3381_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3381_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3381_cast_fp16_1 = split(axis = var_3381_axis_0, split_sizes = var_3381_split_sizes_0, x = normed_153_cast_fp16)[name = string("op_3381_cast_fp16")];
	tensor<fp16, [1536]> const_90_to_fp16 = const()[name = string("const_90_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(335851136)))];
	tensor<fp16, [1, 1, 1536]> var_3384_cast_fp16 = mul(x = var_3381_cast_fp16_0, y = const_90_to_fp16)[name = string("op_3384_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_87_cast_fp16 = add(x = hidden_states_83_cast_fp16, y = var_3384_cast_fp16)[name = string("hidden_states_87_cast_fp16")];
	tensor<fp16, [1]> layers_5_layer_scalar_to_fp16 = const()[name = string("layers_5_layer_scalar_to_fp16"), val = tensor<fp16, [1]>([0x1.5p-1])];
	tensor<fp16, [1, 1, 1536]> x_161_cast_fp16 = mul(x = hidden_states_87_cast_fp16, y = layers_5_layer_scalar_to_fp16)[name = string("x_161_cast_fp16")];
	int32 var_3392 = const()[name = string("op_3392"), val = int32(-1)];
	fp16 const_91_promoted_to_fp16 = const()[name = string("const_91_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_3398_cast_fp16 = mul(x = x_161_cast_fp16, y = const_91_promoted_to_fp16)[name = string("op_3398_cast_fp16")];
	bool input_159_interleave_0 = const()[name = string("input_159_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_159_cast_fp16 = concat(axis = var_3392, interleave = input_159_interleave_0, values = (x_161_cast_fp16, var_3398_cast_fp16))[name = string("input_159_cast_fp16")];
	tensor<int32, [1]> normed_157_axes_0 = const()[name = string("normed_157_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3390_to_fp16 = const()[name = string("op_3390_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_157_cast_fp16 = layer_norm(axes = normed_157_axes_0, epsilon = var_3390_to_fp16, x = input_159_cast_fp16)[name = string("normed_157_cast_fp16")];
	tensor<int32, [2]> var_3403_split_sizes_0 = const()[name = string("op_3403_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3403_axis_0 = const()[name = string("op_3403_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3403_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3403_cast_fp16_1 = split(axis = var_3403_axis_0, split_sizes = var_3403_split_sizes_0, x = normed_157_cast_fp16)[name = string("op_3403_cast_fp16")];
	tensor<fp16, [1536]> const_92_to_fp16 = const()[name = string("const_92_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(335854272)))];
	tensor<fp16, [1, 1, 1536]> var_3406_cast_fp16 = mul(x = var_3403_cast_fp16_0, y = const_92_to_fp16)[name = string("op_3406_cast_fp16")];
	tensor<int32, [3]> var_3414 = const()[name = string("op_3414"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_3417_axes_0 = const()[name = string("op_3417_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_3415_cast_fp16 = transpose(perm = var_3414, x = var_3406_cast_fp16)[name = string("transpose_41")];
	tensor<fp16, [1, 1536, 1, 1]> var_3417_cast_fp16 = expand_dims(axes = var_3417_axes_0, x = var_3415_cast_fp16)[name = string("op_3417_cast_fp16")];
	string var_3433_pad_type_0 = const()[name = string("op_3433_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_3433_strides_0 = const()[name = string("op_3433_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_3433_pad_0 = const()[name = string("op_3433_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_3433_dilations_0 = const()[name = string("op_3433_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_3433_groups_0 = const()[name = string("op_3433_groups_0"), val = int32(1)];
	tensor<fp16, [1, 2048, 1, 1]> var_3433 = conv(dilations = var_3433_dilations_0, groups = var_3433_groups_0, pad = var_3433_pad_0, pad_type = var_3433_pad_type_0, strides = var_3433_strides_0, weight = layers_6_self_attn_q_proj_weight_palettized, x = var_3417_cast_fp16)[name = string("op_3433")];
	tensor<int32, [4]> var_3438 = const()[name = string("op_3438"), val = tensor<int32, [4]>([1, 8, 256, 1])];
	tensor<fp16, [1, 8, 256, 1]> var_3439 = reshape(shape = var_3438, x = var_3433)[name = string("op_3439")];
	tensor<int32, [4]> var_3444 = const()[name = string("op_3444"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	tensor<int32, [3]> var_3454 = const()[name = string("op_3454"), val = tensor<int32, [3]>([1, 8, 256])];
	tensor<fp16, [1, 8, 1, 256]> var_3445 = transpose(perm = var_3444, x = var_3439)[name = string("transpose_40")];
	tensor<fp16, [1, 8, 256]> x_165 = reshape(shape = var_3454, x = var_3445)[name = string("x_165")];
	int32 var_3460 = const()[name = string("op_3460"), val = int32(-1)];
	fp16 const_93_promoted_to_fp16 = const()[name = string("const_93_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 256]> var_3466_cast_fp16 = mul(x = x_165, y = const_93_promoted_to_fp16)[name = string("op_3466_cast_fp16")];
	bool input_163_interleave_0 = const()[name = string("input_163_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512]> input_163_cast_fp16 = concat(axis = var_3460, interleave = input_163_interleave_0, values = (x_165, var_3466_cast_fp16))[name = string("input_163_cast_fp16")];
	tensor<int32, [1]> normed_161_axes_0 = const()[name = string("normed_161_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3458_to_fp16 = const()[name = string("op_3458_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 8, 512]> normed_161_cast_fp16 = layer_norm(axes = normed_161_axes_0, epsilon = var_3458_to_fp16, x = input_163_cast_fp16)[name = string("normed_161_cast_fp16")];
	tensor<int32, [2]> var_3471_split_sizes_0 = const()[name = string("op_3471_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_3471_axis_0 = const()[name = string("op_3471_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 256]> var_3471_cast_fp16_0, tensor<fp16, [1, 8, 256]> var_3471_cast_fp16_1 = split(axis = var_3471_axis_0, split_sizes = var_3471_split_sizes_0, x = normed_161_cast_fp16)[name = string("op_3471_cast_fp16")];
	tensor<fp16, [1, 8, 256]> var_3474_cast_fp16 = mul(x = var_3471_cast_fp16_0, y = const_21_to_fp16)[name = string("op_3474_cast_fp16")];
	tensor<int32, [4]> var_3480 = const()[name = string("op_3480"), val = tensor<int32, [4]>([1, 8, 1, 256])];
	tensor<fp16, [1, 8, 1, 256]> q_45 = reshape(shape = var_3480, x = var_3474_cast_fp16)[name = string("q_45")];
	tensor<fp16, [1, 8, 1, 256]> var_3482_cast_fp16 = mul(x = q_45, y = cos_s)[name = string("op_3482_cast_fp16")];
	tensor<int32, [2]> var_3483_split_sizes_0 = const()[name = string("op_3483_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_3483_axis_0 = const()[name = string("op_3483_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 128]> var_3483_0, tensor<fp16, [1, 8, 1, 128]> var_3483_1 = split(axis = var_3483_axis_0, split_sizes = var_3483_split_sizes_0, x = q_45)[name = string("op_3483")];
	fp16 const_95_promoted = const()[name = string("const_95_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 128]> var_3485 = mul(x = var_3483_1, y = const_95_promoted)[name = string("op_3485")];
	int32 var_3487 = const()[name = string("op_3487"), val = int32(-1)];
	bool var_3488_interleave_0 = const()[name = string("op_3488_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> var_3488 = concat(axis = var_3487, interleave = var_3488_interleave_0, values = (var_3485, var_3483_0))[name = string("op_3488")];
	tensor<fp16, [1, 8, 1, 256]> var_3489_cast_fp16 = mul(x = var_3488, y = sin_s)[name = string("op_3489_cast_fp16")];
	tensor<fp16, [1, 8, 1, 256]> q_47_cast_fp16 = add(x = var_3482_cast_fp16, y = var_3489_cast_fp16)[name = string("q_47_cast_fp16")];
	bool var_3513_transpose_x_0 = const()[name = string("op_3513_transpose_x_0"), val = bool(false)];
	bool var_3513_transpose_y_0 = const()[name = string("op_3513_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 512]> var_3513_cast_fp16 = matmul(transpose_x = var_3513_transpose_x_0, transpose_y = var_3513_transpose_y_0, x = q_47_cast_fp16, y = transpose_49_cast_fp16)[name = string("op_3513_cast_fp16")];
	tensor<fp16, [1, 8, 1, 512]> var_3520_cast_fp16 = add(x = var_3513_cast_fp16, y = causal_mask)[name = string("op_3520_cast_fp16")];
	int32 var_3521 = const()[name = string("op_3521"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 512]> var_3523_cast_fp16 = softmax(axis = var_3521, x = var_3520_cast_fp16)[name = string("op_3523_cast_fp16")];
	bool var_3539_transpose_x_0 = const()[name = string("op_3539_transpose_x_0"), val = bool(false)];
	bool var_3539_transpose_y_0 = const()[name = string("op_3539_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> var_3539_cast_fp16 = matmul(transpose_x = var_3539_transpose_x_0, transpose_y = var_3539_transpose_y_0, x = var_3523_cast_fp16, y = Ve_3_cast_fp16)[name = string("op_3539_cast_fp16")];
	tensor<int32, [4]> var_3549 = const()[name = string("op_3549"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_3556 = const()[name = string("op_3556"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 8, 256]> var_3550 = transpose(perm = var_3549, x = var_3539_cast_fp16)[name = string("transpose_39")];
	tensor<fp16, [1, 1, 2048]> var_3557 = reshape(shape = var_3556, x = var_3550)[name = string("op_3557")];
	tensor<int32, [3]> var_3561 = const()[name = string("op_3561"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1536, 2048, 1]> squeeze_6_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(335857408))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337430336))))[name = string("squeeze_6_palettized")];
	string var_3577_pad_type_0 = const()[name = string("op_3577_pad_type_0"), val = string("valid")];
	int32 var_3577_groups_0 = const()[name = string("op_3577_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_3577_strides_0 = const()[name = string("op_3577_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_3577_pad_0 = const()[name = string("op_3577_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_3577_dilations_0 = const()[name = string("op_3577_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1, 2048, 1]> var_3562 = transpose(perm = var_3561, x = var_3557)[name = string("transpose_38")];
	tensor<fp16, [1, 1536, 1]> var_3577 = conv(dilations = var_3577_dilations_0, groups = var_3577_groups_0, pad = var_3577_pad_0, pad_type = var_3577_pad_type_0, strides = var_3577_strides_0, weight = squeeze_6_palettized, x = var_3562)[name = string("op_3577")];
	tensor<int32, [3]> var_3581 = const()[name = string("op_3581"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_3587 = const()[name = string("op_3587"), val = int32(-1)];
	fp16 const_96_promoted_to_fp16 = const()[name = string("const_96_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_169 = transpose(perm = var_3581, x = var_3577)[name = string("transpose_37")];
	tensor<fp16, [1, 1, 1536]> var_3593_cast_fp16 = mul(x = x_169, y = const_96_promoted_to_fp16)[name = string("op_3593_cast_fp16")];
	bool input_167_interleave_0 = const()[name = string("input_167_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_167_cast_fp16 = concat(axis = var_3587, interleave = input_167_interleave_0, values = (x_169, var_3593_cast_fp16))[name = string("input_167_cast_fp16")];
	tensor<int32, [1]> normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3585_to_fp16 = const()[name = string("op_3585_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_3585_to_fp16, x = input_167_cast_fp16)[name = string("normed_165_cast_fp16")];
	tensor<int32, [2]> var_3598_split_sizes_0 = const()[name = string("op_3598_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3598_axis_0 = const()[name = string("op_3598_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3598_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3598_cast_fp16_1 = split(axis = var_3598_axis_0, split_sizes = var_3598_split_sizes_0, x = normed_165_cast_fp16)[name = string("op_3598_cast_fp16")];
	tensor<fp16, [1536]> const_97_to_fp16 = const()[name = string("const_97_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337431936)))];
	tensor<fp16, [1, 1, 1536]> var_3601_cast_fp16 = mul(x = var_3598_cast_fp16_0, y = const_97_to_fp16)[name = string("op_3601_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> x_173_cast_fp16 = add(x = x_161_cast_fp16, y = var_3601_cast_fp16)[name = string("x_173_cast_fp16")];
	int32 var_3608 = const()[name = string("op_3608"), val = int32(-1)];
	fp16 const_98_promoted_to_fp16 = const()[name = string("const_98_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_3614_cast_fp16 = mul(x = x_173_cast_fp16, y = const_98_promoted_to_fp16)[name = string("op_3614_cast_fp16")];
	bool input_169_interleave_0 = const()[name = string("input_169_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_169_cast_fp16 = concat(axis = var_3608, interleave = input_169_interleave_0, values = (x_173_cast_fp16, var_3614_cast_fp16))[name = string("input_169_cast_fp16")];
	tensor<int32, [1]> normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3606_to_fp16 = const()[name = string("op_3606_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_3606_to_fp16, x = input_169_cast_fp16)[name = string("normed_169_cast_fp16")];
	tensor<int32, [2]> var_3619_split_sizes_0 = const()[name = string("op_3619_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3619_axis_0 = const()[name = string("op_3619_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3619_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3619_cast_fp16_1 = split(axis = var_3619_axis_0, split_sizes = var_3619_split_sizes_0, x = normed_169_cast_fp16)[name = string("op_3619_cast_fp16")];
	tensor<fp16, [1536]> const_99_to_fp16 = const()[name = string("const_99_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337435072)))];
	tensor<fp16, [1, 1, 1536]> var_3622_cast_fp16 = mul(x = var_3619_cast_fp16_0, y = const_99_to_fp16)[name = string("op_3622_cast_fp16")];
	tensor<int32, [3]> var_3635 = const()[name = string("op_3635"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_171_axes_0 = const()[name = string("input_171_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_3636 = transpose(perm = var_3635, x = var_3622_cast_fp16)[name = string("transpose_36")];
	tensor<fp16, [1, 1536, 1, 1]> input_171 = expand_dims(axes = input_171_axes_0, x = var_3636)[name = string("input_171")];
	string var_3649_pad_type_0 = const()[name = string("op_3649_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_3649_strides_0 = const()[name = string("op_3649_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_3649_pad_0 = const()[name = string("op_3649_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_3649_dilations_0 = const()[name = string("op_3649_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_3649_groups_0 = const()[name = string("op_3649_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> var_3649 = conv(dilations = var_3649_dilations_0, groups = var_3649_groups_0, pad = var_3649_pad_0, pad_type = var_3649_pad_type_0, strides = var_3649_strides_0, weight = layers_6_mlp_gate_proj_weight_palettized, x = input_171)[name = string("op_3649")];
	string var_3651_mode_0 = const()[name = string("op_3651_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 1]> var_3651 = gelu(mode = var_3651_mode_0, x = var_3649)[name = string("op_3651")];
	string var_3662_pad_type_0 = const()[name = string("op_3662_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_3662_strides_0 = const()[name = string("op_3662_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_3662_pad_0 = const()[name = string("op_3662_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_3662_dilations_0 = const()[name = string("op_3662_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_3662_groups_0 = const()[name = string("op_3662_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> var_3662 = conv(dilations = var_3662_dilations_0, groups = var_3662_groups_0, pad = var_3662_pad_0, pad_type = var_3662_pad_type_0, strides = var_3662_strides_0, weight = layers_6_mlp_up_proj_weight_palettized, x = input_171)[name = string("op_3662")];
	tensor<fp16, [1, 12288, 1, 1]> input_173 = mul(x = var_3651, y = var_3662)[name = string("input_173")];
	string var_3674_pad_type_0 = const()[name = string("op_3674_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_3674_strides_0 = const()[name = string("op_3674_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_3674_pad_0 = const()[name = string("op_3674_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_3674_dilations_0 = const()[name = string("op_3674_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_3674_groups_0 = const()[name = string("op_3674_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 1]> var_3674 = conv(dilations = var_3674_dilations_0, groups = var_3674_groups_0, pad = var_3674_pad_0, pad_type = var_3674_pad_type_0, strides = var_3674_strides_0, weight = layers_6_mlp_down_proj_weight_palettized, x = input_173)[name = string("op_3674")];
	tensor<int32, [1]> var_3676_axes_0 = const()[name = string("op_3676_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_3676 = squeeze(axes = var_3676_axes_0, x = var_3674)[name = string("op_3676")];
	tensor<int32, [3]> var_3680 = const()[name = string("op_3680"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_3686 = const()[name = string("op_3686"), val = int32(-1)];
	fp16 const_100_promoted_to_fp16 = const()[name = string("const_100_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_177 = transpose(perm = var_3680, x = var_3676)[name = string("transpose_35")];
	tensor<fp16, [1, 1, 1536]> var_3692_cast_fp16 = mul(x = x_177, y = const_100_promoted_to_fp16)[name = string("op_3692_cast_fp16")];
	bool input_175_interleave_0 = const()[name = string("input_175_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_175_cast_fp16 = concat(axis = var_3686, interleave = input_175_interleave_0, values = (x_177, var_3692_cast_fp16))[name = string("input_175_cast_fp16")];
	tensor<int32, [1]> normed_173_axes_0 = const()[name = string("normed_173_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3684_to_fp16 = const()[name = string("op_3684_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_173_cast_fp16 = layer_norm(axes = normed_173_axes_0, epsilon = var_3684_to_fp16, x = input_175_cast_fp16)[name = string("normed_173_cast_fp16")];
	tensor<int32, [2]> var_3697_split_sizes_0 = const()[name = string("op_3697_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3697_axis_0 = const()[name = string("op_3697_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3697_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3697_cast_fp16_1 = split(axis = var_3697_axis_0, split_sizes = var_3697_split_sizes_0, x = normed_173_cast_fp16)[name = string("op_3697_cast_fp16")];
	tensor<fp16, [1536]> const_101_to_fp16 = const()[name = string("const_101_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337438208)))];
	tensor<fp16, [1, 1, 1536]> var_3700_cast_fp16 = mul(x = var_3697_cast_fp16_0, y = const_101_to_fp16)[name = string("op_3700_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_97_cast_fp16 = add(x = x_173_cast_fp16, y = var_3700_cast_fp16)[name = string("hidden_states_97_cast_fp16")];
	tensor<fp16, [1, 1, 256]> var_3711 = linear(bias = linear_0_bias_0, weight = layers_6_per_layer_input_gate_weight_palettized, x = hidden_states_97_cast_fp16)[name = string("linear_12")];
	string gated_13_mode_0 = const()[name = string("gated_13_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 1, 256]> gated_13 = gelu(mode = gated_13_mode_0, x = var_3711)[name = string("gated_13")];
	tensor<int32, [3]> var_3728_begin_0 = const()[name = string("op_3728_begin_0"), val = tensor<int32, [3]>([0, 0, 4608])];
	tensor<int32, [3]> var_3728_end_0 = const()[name = string("op_3728_end_0"), val = tensor<int32, [3]>([1, 1, 4864])];
	tensor<bool, [3]> var_3728_end_mask_0 = const()[name = string("op_3728_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 1, 256]> var_3728_cast_fp16 = slice_by_index(begin = var_3728_begin_0, end = var_3728_end_0, end_mask = var_3728_end_mask_0, x = per_layer_combined)[name = string("op_3728_cast_fp16")];
	tensor<fp16, [1, 1, 256]> input_179_cast_fp16 = mul(x = gated_13, y = var_3728_cast_fp16)[name = string("input_179_cast_fp16")];
	tensor<fp16, [1536, 256]> layers_6_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337441344))), lut = tensor<fp16, [48, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337638016))))[name = string("layers_6_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1, 1536]> linear_13_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_179_cast_fp16)[name = string("linear_13_cast_fp16")];
	int32 var_3737 = const()[name = string("op_3737"), val = int32(-1)];
	fp16 const_102_promoted_to_fp16 = const()[name = string("const_102_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_3743_cast_fp16 = mul(x = linear_13_cast_fp16, y = const_102_promoted_to_fp16)[name = string("op_3743_cast_fp16")];
	bool input_181_interleave_0 = const()[name = string("input_181_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_181_cast_fp16 = concat(axis = var_3737, interleave = input_181_interleave_0, values = (linear_13_cast_fp16, var_3743_cast_fp16))[name = string("input_181_cast_fp16")];
	tensor<int32, [1]> normed_177_axes_0 = const()[name = string("normed_177_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3735_to_fp16 = const()[name = string("op_3735_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_177_cast_fp16 = layer_norm(axes = normed_177_axes_0, epsilon = var_3735_to_fp16, x = input_181_cast_fp16)[name = string("normed_177_cast_fp16")];
	tensor<int32, [2]> var_3748_split_sizes_0 = const()[name = string("op_3748_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3748_axis_0 = const()[name = string("op_3748_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3748_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3748_cast_fp16_1 = split(axis = var_3748_axis_0, split_sizes = var_3748_split_sizes_0, x = normed_177_cast_fp16)[name = string("op_3748_cast_fp16")];
	tensor<fp16, [1536]> const_103_to_fp16 = const()[name = string("const_103_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337639616)))];
	tensor<fp16, [1, 1, 1536]> var_3751_cast_fp16 = mul(x = var_3748_cast_fp16_0, y = const_103_to_fp16)[name = string("op_3751_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_101_cast_fp16 = add(x = hidden_states_97_cast_fp16, y = var_3751_cast_fp16)[name = string("hidden_states_101_cast_fp16")];
	tensor<fp16, [1]> layers_6_layer_scalar_to_fp16 = const()[name = string("layers_6_layer_scalar_to_fp16"), val = tensor<fp16, [1]>([0x1.34p-1])];
	tensor<fp16, [1, 1, 1536]> x_185_cast_fp16 = mul(x = hidden_states_101_cast_fp16, y = layers_6_layer_scalar_to_fp16)[name = string("x_185_cast_fp16")];
	int32 var_3759 = const()[name = string("op_3759"), val = int32(-1)];
	fp16 const_104_promoted_to_fp16 = const()[name = string("const_104_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_3765_cast_fp16 = mul(x = x_185_cast_fp16, y = const_104_promoted_to_fp16)[name = string("op_3765_cast_fp16")];
	bool input_183_interleave_0 = const()[name = string("input_183_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_183_cast_fp16 = concat(axis = var_3759, interleave = input_183_interleave_0, values = (x_185_cast_fp16, var_3765_cast_fp16))[name = string("input_183_cast_fp16")];
	tensor<int32, [1]> normed_181_axes_0 = const()[name = string("normed_181_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3757_to_fp16 = const()[name = string("op_3757_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_181_cast_fp16 = layer_norm(axes = normed_181_axes_0, epsilon = var_3757_to_fp16, x = input_183_cast_fp16)[name = string("normed_181_cast_fp16")];
	tensor<int32, [2]> var_3770_split_sizes_0 = const()[name = string("op_3770_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3770_axis_0 = const()[name = string("op_3770_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3770_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3770_cast_fp16_1 = split(axis = var_3770_axis_0, split_sizes = var_3770_split_sizes_0, x = normed_181_cast_fp16)[name = string("op_3770_cast_fp16")];
	tensor<fp16, [1536]> const_105_to_fp16 = const()[name = string("const_105_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337642752)))];
	tensor<fp16, [1, 1, 1536]> var_3773_cast_fp16 = mul(x = var_3770_cast_fp16_0, y = const_105_to_fp16)[name = string("op_3773_cast_fp16")];
	tensor<int32, [3]> var_3781 = const()[name = string("op_3781"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_3784_axes_0 = const()[name = string("op_3784_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_3782_cast_fp16 = transpose(perm = var_3781, x = var_3773_cast_fp16)[name = string("transpose_34")];
	tensor<fp16, [1, 1536, 1, 1]> var_3784_cast_fp16 = expand_dims(axes = var_3784_axes_0, x = var_3782_cast_fp16)[name = string("op_3784_cast_fp16")];
	string var_3800_pad_type_0 = const()[name = string("op_3800_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_3800_strides_0 = const()[name = string("op_3800_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_3800_pad_0 = const()[name = string("op_3800_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_3800_dilations_0 = const()[name = string("op_3800_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_3800_groups_0 = const()[name = string("op_3800_groups_0"), val = int32(1)];
	tensor<fp16, [1, 4096, 1, 1]> var_3800 = conv(dilations = var_3800_dilations_0, groups = var_3800_groups_0, pad = var_3800_pad_0, pad_type = var_3800_pad_type_0, strides = var_3800_strides_0, weight = layers_7_self_attn_q_proj_weight_palettized, x = var_3784_cast_fp16)[name = string("op_3800")];
	tensor<int32, [4]> var_3805 = const()[name = string("op_3805"), val = tensor<int32, [4]>([1, 8, 512, 1])];
	tensor<fp16, [1, 8, 512, 1]> var_3806 = reshape(shape = var_3805, x = var_3800)[name = string("op_3806")];
	tensor<int32, [4]> var_3811 = const()[name = string("op_3811"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	tensor<int32, [3]> var_3821 = const()[name = string("op_3821"), val = tensor<int32, [3]>([1, 8, 512])];
	tensor<fp16, [1, 8, 1, 512]> var_3812 = transpose(perm = var_3811, x = var_3806)[name = string("transpose_33")];
	tensor<fp16, [1, 8, 512]> x_189 = reshape(shape = var_3821, x = var_3812)[name = string("x_189")];
	int32 var_3827 = const()[name = string("op_3827"), val = int32(-1)];
	fp16 const_106_promoted_to_fp16 = const()[name = string("const_106_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 512]> var_3833_cast_fp16 = mul(x = x_189, y = const_106_promoted_to_fp16)[name = string("op_3833_cast_fp16")];
	bool input_187_interleave_0 = const()[name = string("input_187_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1024]> input_187_cast_fp16 = concat(axis = var_3827, interleave = input_187_interleave_0, values = (x_189, var_3833_cast_fp16))[name = string("input_187_cast_fp16")];
	tensor<int32, [1]> normed_185_axes_0 = const()[name = string("normed_185_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3825_to_fp16 = const()[name = string("op_3825_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 8, 1024]> normed_185_cast_fp16 = layer_norm(axes = normed_185_axes_0, epsilon = var_3825_to_fp16, x = input_187_cast_fp16)[name = string("normed_185_cast_fp16")];
	tensor<int32, [2]> var_3838_split_sizes_0 = const()[name = string("op_3838_split_sizes_0"), val = tensor<int32, [2]>([512, 512])];
	int32 var_3838_axis_0 = const()[name = string("op_3838_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 512]> var_3838_cast_fp16_0, tensor<fp16, [1, 8, 512]> var_3838_cast_fp16_1 = split(axis = var_3838_axis_0, split_sizes = var_3838_split_sizes_0, x = normed_185_cast_fp16)[name = string("op_3838_cast_fp16")];
	tensor<fp16, [1, 8, 512]> var_3841_cast_fp16 = mul(x = var_3838_cast_fp16_0, y = const_39_to_fp16)[name = string("op_3841_cast_fp16")];
	tensor<int32, [4]> var_3847 = const()[name = string("op_3847"), val = tensor<int32, [4]>([1, 8, 1, 512])];
	tensor<fp16, [1, 8, 1, 512]> q_51 = reshape(shape = var_3847, x = var_3841_cast_fp16)[name = string("q_51")];
	tensor<fp16, [1, 8, 1, 512]> var_3849_cast_fp16 = mul(x = q_51, y = cos_f)[name = string("op_3849_cast_fp16")];
	tensor<int32, [2]> var_3850_split_sizes_0 = const()[name = string("op_3850_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_3850_axis_0 = const()[name = string("op_3850_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 256]> var_3850_0, tensor<fp16, [1, 8, 1, 256]> var_3850_1 = split(axis = var_3850_axis_0, split_sizes = var_3850_split_sizes_0, x = q_51)[name = string("op_3850")];
	fp16 const_108_promoted = const()[name = string("const_108_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 256]> var_3852 = mul(x = var_3850_1, y = const_108_promoted)[name = string("op_3852")];
	int32 var_3854 = const()[name = string("op_3854"), val = int32(-1)];
	bool var_3855_interleave_0 = const()[name = string("op_3855_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 512]> var_3855 = concat(axis = var_3854, interleave = var_3855_interleave_0, values = (var_3852, var_3850_0))[name = string("op_3855")];
	tensor<fp16, [1, 8, 1, 512]> var_3856_cast_fp16 = mul(x = var_3855, y = sin_f)[name = string("op_3856_cast_fp16")];
	tensor<fp16, [1, 8, 1, 512]> q_53_cast_fp16 = add(x = var_3849_cast_fp16, y = var_3856_cast_fp16)[name = string("q_53_cast_fp16")];
	bool var_3880_transpose_x_0 = const()[name = string("op_3880_transpose_x_0"), val = bool(false)];
	bool var_3880_transpose_y_0 = const()[name = string("op_3880_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 512]> var_3880_cast_fp16 = matmul(transpose_x = var_3880_transpose_x_0, transpose_y = var_3880_transpose_y_0, x = q_53_cast_fp16, y = transpose_50_cast_fp16)[name = string("op_3880_cast_fp16")];
	tensor<fp16, [1, 8, 1, 512]> var_3887_cast_fp16 = add(x = var_3880_cast_fp16, y = causal_mask)[name = string("op_3887_cast_fp16")];
	int32 var_3888 = const()[name = string("op_3888"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 512]> var_3890_cast_fp16 = softmax(axis = var_3888, x = var_3887_cast_fp16)[name = string("op_3890_cast_fp16")];
	bool var_3906_transpose_x_0 = const()[name = string("op_3906_transpose_x_0"), val = bool(false)];
	bool var_3906_transpose_y_0 = const()[name = string("op_3906_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 512]> var_3906_cast_fp16 = matmul(transpose_x = var_3906_transpose_x_0, transpose_y = var_3906_transpose_y_0, x = var_3890_cast_fp16, y = Ve_5_cast_fp16)[name = string("op_3906_cast_fp16")];
	tensor<int32, [4]> var_3916 = const()[name = string("op_3916"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_3923 = const()[name = string("op_3923"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 8, 512]> var_3917 = transpose(perm = var_3916, x = var_3906_cast_fp16)[name = string("transpose_32")];
	tensor<fp16, [1, 1, 4096]> var_3924 = reshape(shape = var_3923, x = var_3917)[name = string("op_3924")];
	tensor<int32, [3]> var_3928 = const()[name = string("op_3928"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1536, 4096, 1]> squeeze_7_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 4096, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337645888))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340791680))))[name = string("squeeze_7_palettized")];
	string var_3944_pad_type_0 = const()[name = string("op_3944_pad_type_0"), val = string("valid")];
	int32 var_3944_groups_0 = const()[name = string("op_3944_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_3944_strides_0 = const()[name = string("op_3944_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_3944_pad_0 = const()[name = string("op_3944_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_3944_dilations_0 = const()[name = string("op_3944_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1, 4096, 1]> var_3929 = transpose(perm = var_3928, x = var_3924)[name = string("transpose_31")];
	tensor<fp16, [1, 1536, 1]> var_3944 = conv(dilations = var_3944_dilations_0, groups = var_3944_groups_0, pad = var_3944_pad_0, pad_type = var_3944_pad_type_0, strides = var_3944_strides_0, weight = squeeze_7_palettized, x = var_3929)[name = string("op_3944")];
	tensor<int32, [3]> var_3948 = const()[name = string("op_3948"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_3954 = const()[name = string("op_3954"), val = int32(-1)];
	fp16 const_109_promoted_to_fp16 = const()[name = string("const_109_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_193 = transpose(perm = var_3948, x = var_3944)[name = string("transpose_30")];
	tensor<fp16, [1, 1, 1536]> var_3960_cast_fp16 = mul(x = x_193, y = const_109_promoted_to_fp16)[name = string("op_3960_cast_fp16")];
	bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_191_cast_fp16 = concat(axis = var_3954, interleave = input_191_interleave_0, values = (x_193, var_3960_cast_fp16))[name = string("input_191_cast_fp16")];
	tensor<int32, [1]> normed_189_axes_0 = const()[name = string("normed_189_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3952_to_fp16 = const()[name = string("op_3952_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_189_cast_fp16 = layer_norm(axes = normed_189_axes_0, epsilon = var_3952_to_fp16, x = input_191_cast_fp16)[name = string("normed_189_cast_fp16")];
	tensor<int32, [2]> var_3965_split_sizes_0 = const()[name = string("op_3965_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3965_axis_0 = const()[name = string("op_3965_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3965_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3965_cast_fp16_1 = split(axis = var_3965_axis_0, split_sizes = var_3965_split_sizes_0, x = normed_189_cast_fp16)[name = string("op_3965_cast_fp16")];
	tensor<fp16, [1536]> const_110_to_fp16 = const()[name = string("const_110_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340793280)))];
	tensor<fp16, [1, 1, 1536]> var_3968_cast_fp16 = mul(x = var_3965_cast_fp16_0, y = const_110_to_fp16)[name = string("op_3968_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> x_197_cast_fp16 = add(x = x_185_cast_fp16, y = var_3968_cast_fp16)[name = string("x_197_cast_fp16")];
	int32 var_3975 = const()[name = string("op_3975"), val = int32(-1)];
	fp16 const_111_promoted_to_fp16 = const()[name = string("const_111_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_3981_cast_fp16 = mul(x = x_197_cast_fp16, y = const_111_promoted_to_fp16)[name = string("op_3981_cast_fp16")];
	bool input_193_interleave_0 = const()[name = string("input_193_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_193_cast_fp16 = concat(axis = var_3975, interleave = input_193_interleave_0, values = (x_197_cast_fp16, var_3981_cast_fp16))[name = string("input_193_cast_fp16")];
	tensor<int32, [1]> normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_3973_to_fp16 = const()[name = string("op_3973_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_3973_to_fp16, x = input_193_cast_fp16)[name = string("normed_193_cast_fp16")];
	tensor<int32, [2]> var_3986_split_sizes_0 = const()[name = string("op_3986_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_3986_axis_0 = const()[name = string("op_3986_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_3986_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3986_cast_fp16_1 = split(axis = var_3986_axis_0, split_sizes = var_3986_split_sizes_0, x = normed_193_cast_fp16)[name = string("op_3986_cast_fp16")];
	tensor<fp16, [1536]> const_112_to_fp16 = const()[name = string("const_112_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340796416)))];
	tensor<fp16, [1, 1, 1536]> var_3989_cast_fp16 = mul(x = var_3986_cast_fp16_0, y = const_112_to_fp16)[name = string("op_3989_cast_fp16")];
	tensor<int32, [3]> var_4002 = const()[name = string("op_4002"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_195_axes_0 = const()[name = string("input_195_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_4003 = transpose(perm = var_4002, x = var_3989_cast_fp16)[name = string("transpose_29")];
	tensor<fp16, [1, 1536, 1, 1]> input_195 = expand_dims(axes = input_195_axes_0, x = var_4003)[name = string("input_195")];
	string var_4016_pad_type_0 = const()[name = string("op_4016_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_4016_strides_0 = const()[name = string("op_4016_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_4016_pad_0 = const()[name = string("op_4016_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_4016_dilations_0 = const()[name = string("op_4016_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_4016_groups_0 = const()[name = string("op_4016_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> var_4016 = conv(dilations = var_4016_dilations_0, groups = var_4016_groups_0, pad = var_4016_pad_0, pad_type = var_4016_pad_type_0, strides = var_4016_strides_0, weight = layers_7_mlp_gate_proj_weight_palettized, x = input_195)[name = string("op_4016")];
	string var_4018_mode_0 = const()[name = string("op_4018_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 1]> var_4018 = gelu(mode = var_4018_mode_0, x = var_4016)[name = string("op_4018")];
	string var_4029_pad_type_0 = const()[name = string("op_4029_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_4029_strides_0 = const()[name = string("op_4029_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_4029_pad_0 = const()[name = string("op_4029_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_4029_dilations_0 = const()[name = string("op_4029_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_4029_groups_0 = const()[name = string("op_4029_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> var_4029 = conv(dilations = var_4029_dilations_0, groups = var_4029_groups_0, pad = var_4029_pad_0, pad_type = var_4029_pad_type_0, strides = var_4029_strides_0, weight = layers_7_mlp_up_proj_weight_palettized, x = input_195)[name = string("op_4029")];
	tensor<fp16, [1, 12288, 1, 1]> input_197 = mul(x = var_4018, y = var_4029)[name = string("input_197")];
	string var_4041_pad_type_0 = const()[name = string("op_4041_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_4041_strides_0 = const()[name = string("op_4041_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_4041_pad_0 = const()[name = string("op_4041_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_4041_dilations_0 = const()[name = string("op_4041_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_4041_groups_0 = const()[name = string("op_4041_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 1]> var_4041 = conv(dilations = var_4041_dilations_0, groups = var_4041_groups_0, pad = var_4041_pad_0, pad_type = var_4041_pad_type_0, strides = var_4041_strides_0, weight = layers_7_mlp_down_proj_weight_palettized, x = input_197)[name = string("op_4041")];
	tensor<int32, [1]> var_4043_axes_0 = const()[name = string("op_4043_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_4043 = squeeze(axes = var_4043_axes_0, x = var_4041)[name = string("op_4043")];
	tensor<int32, [3]> var_4047 = const()[name = string("op_4047"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_4053 = const()[name = string("op_4053"), val = int32(-1)];
	fp16 const_113_promoted_to_fp16 = const()[name = string("const_113_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_201 = transpose(perm = var_4047, x = var_4043)[name = string("transpose_28")];
	tensor<fp16, [1, 1, 1536]> var_4059_cast_fp16 = mul(x = x_201, y = const_113_promoted_to_fp16)[name = string("op_4059_cast_fp16")];
	bool input_199_interleave_0 = const()[name = string("input_199_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_199_cast_fp16 = concat(axis = var_4053, interleave = input_199_interleave_0, values = (x_201, var_4059_cast_fp16))[name = string("input_199_cast_fp16")];
	tensor<int32, [1]> normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4051_to_fp16 = const()[name = string("op_4051_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_4051_to_fp16, x = input_199_cast_fp16)[name = string("normed_197_cast_fp16")];
	tensor<int32, [2]> var_4064_split_sizes_0 = const()[name = string("op_4064_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_4064_axis_0 = const()[name = string("op_4064_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_4064_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_4064_cast_fp16_1 = split(axis = var_4064_axis_0, split_sizes = var_4064_split_sizes_0, x = normed_197_cast_fp16)[name = string("op_4064_cast_fp16")];
	tensor<fp16, [1536]> const_114_to_fp16 = const()[name = string("const_114_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340799552)))];
	tensor<fp16, [1, 1, 1536]> var_4067_cast_fp16 = mul(x = var_4064_cast_fp16_0, y = const_114_to_fp16)[name = string("op_4067_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_111_cast_fp16 = add(x = x_197_cast_fp16, y = var_4067_cast_fp16)[name = string("hidden_states_111_cast_fp16")];
	tensor<fp16, [1, 1, 256]> var_4078 = linear(bias = linear_0_bias_0, weight = layers_7_per_layer_input_gate_weight_palettized, x = hidden_states_111_cast_fp16)[name = string("linear_14")];
	string gated_15_mode_0 = const()[name = string("gated_15_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 1, 256]> gated_15 = gelu(mode = gated_15_mode_0, x = var_4078)[name = string("gated_15")];
	tensor<int32, [3]> var_4095_begin_0 = const()[name = string("op_4095_begin_0"), val = tensor<int32, [3]>([0, 0, 4864])];
	tensor<int32, [3]> var_4095_end_0 = const()[name = string("op_4095_end_0"), val = tensor<int32, [3]>([1, 1, 5120])];
	tensor<bool, [3]> var_4095_end_mask_0 = const()[name = string("op_4095_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 1, 256]> var_4095_cast_fp16 = slice_by_index(begin = var_4095_begin_0, end = var_4095_end_0, end_mask = var_4095_end_mask_0, x = per_layer_combined)[name = string("op_4095_cast_fp16")];
	tensor<fp16, [1, 1, 256]> input_203_cast_fp16 = mul(x = gated_15, y = var_4095_cast_fp16)[name = string("input_203_cast_fp16")];
	tensor<fp16, [1536, 256]> layers_7_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340802688))), lut = tensor<fp16, [48, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340999360))))[name = string("layers_7_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1, 1536]> linear_15_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_203_cast_fp16)[name = string("linear_15_cast_fp16")];
	int32 var_4104 = const()[name = string("op_4104"), val = int32(-1)];
	fp16 const_115_promoted_to_fp16 = const()[name = string("const_115_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_4110_cast_fp16 = mul(x = linear_15_cast_fp16, y = const_115_promoted_to_fp16)[name = string("op_4110_cast_fp16")];
	bool input_205_interleave_0 = const()[name = string("input_205_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_205_cast_fp16 = concat(axis = var_4104, interleave = input_205_interleave_0, values = (linear_15_cast_fp16, var_4110_cast_fp16))[name = string("input_205_cast_fp16")];
	tensor<int32, [1]> normed_201_axes_0 = const()[name = string("normed_201_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4102_to_fp16 = const()[name = string("op_4102_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_201_cast_fp16 = layer_norm(axes = normed_201_axes_0, epsilon = var_4102_to_fp16, x = input_205_cast_fp16)[name = string("normed_201_cast_fp16")];
	tensor<int32, [2]> var_4115_split_sizes_0 = const()[name = string("op_4115_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_4115_axis_0 = const()[name = string("op_4115_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_4115_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_4115_cast_fp16_1 = split(axis = var_4115_axis_0, split_sizes = var_4115_split_sizes_0, x = normed_201_cast_fp16)[name = string("op_4115_cast_fp16")];
	tensor<fp16, [1536]> const_116_to_fp16 = const()[name = string("const_116_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341000960)))];
	tensor<fp16, [1, 1, 1536]> var_4118_cast_fp16 = mul(x = var_4115_cast_fp16_0, y = const_116_to_fp16)[name = string("op_4118_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_115_cast_fp16 = add(x = hidden_states_111_cast_fp16, y = var_4118_cast_fp16)[name = string("hidden_states_115_cast_fp16")];
	tensor<fp16, [1]> layers_7_layer_scalar_to_fp16 = const()[name = string("layers_7_layer_scalar_to_fp16"), val = tensor<fp16, [1]>([0x1.14p-1])];
	tensor<fp16, [1, 1, 1536]> x_209_cast_fp16 = mul(x = hidden_states_115_cast_fp16, y = layers_7_layer_scalar_to_fp16)[name = string("x_209_cast_fp16")];
	int32 var_4126 = const()[name = string("op_4126"), val = int32(-1)];
	fp16 const_117_promoted_to_fp16 = const()[name = string("const_117_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_4132_cast_fp16 = mul(x = x_209_cast_fp16, y = const_117_promoted_to_fp16)[name = string("op_4132_cast_fp16")];
	bool input_207_interleave_0 = const()[name = string("input_207_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_207_cast_fp16 = concat(axis = var_4126, interleave = input_207_interleave_0, values = (x_209_cast_fp16, var_4132_cast_fp16))[name = string("input_207_cast_fp16")];
	tensor<int32, [1]> normed_205_axes_0 = const()[name = string("normed_205_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4124_to_fp16 = const()[name = string("op_4124_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_205_cast_fp16 = layer_norm(axes = normed_205_axes_0, epsilon = var_4124_to_fp16, x = input_207_cast_fp16)[name = string("normed_205_cast_fp16")];
	tensor<int32, [2]> var_4137_split_sizes_0 = const()[name = string("op_4137_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_4137_axis_0 = const()[name = string("op_4137_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_4137_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_4137_cast_fp16_1 = split(axis = var_4137_axis_0, split_sizes = var_4137_split_sizes_0, x = normed_205_cast_fp16)[name = string("op_4137_cast_fp16")];
	tensor<fp16, [1536]> const_118_to_fp16 = const()[name = string("const_118_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341004096)))];
	tensor<fp16, [1, 1, 1536]> var_4140_cast_fp16 = mul(x = var_4137_cast_fp16_0, y = const_118_to_fp16)[name = string("op_4140_cast_fp16")];
	tensor<int32, [3]> var_4148 = const()[name = string("op_4148"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_4151_axes_0 = const()[name = string("op_4151_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_4149_cast_fp16 = transpose(perm = var_4148, x = var_4140_cast_fp16)[name = string("transpose_27")];
	tensor<fp16, [1, 1536, 1, 1]> var_4151_cast_fp16 = expand_dims(axes = var_4151_axes_0, x = var_4149_cast_fp16)[name = string("op_4151_cast_fp16")];
	string var_4167_pad_type_0 = const()[name = string("op_4167_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_4167_strides_0 = const()[name = string("op_4167_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_4167_pad_0 = const()[name = string("op_4167_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_4167_dilations_0 = const()[name = string("op_4167_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_4167_groups_0 = const()[name = string("op_4167_groups_0"), val = int32(1)];
	tensor<fp16, [1, 2048, 1, 1]> var_4167 = conv(dilations = var_4167_dilations_0, groups = var_4167_groups_0, pad = var_4167_pad_0, pad_type = var_4167_pad_type_0, strides = var_4167_strides_0, weight = layers_8_self_attn_q_proj_weight_palettized, x = var_4151_cast_fp16)[name = string("op_4167")];
	tensor<int32, [4]> var_4172 = const()[name = string("op_4172"), val = tensor<int32, [4]>([1, 8, 256, 1])];
	tensor<fp16, [1, 8, 256, 1]> var_4173 = reshape(shape = var_4172, x = var_4167)[name = string("op_4173")];
	tensor<int32, [4]> var_4178 = const()[name = string("op_4178"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	tensor<int32, [3]> var_4188 = const()[name = string("op_4188"), val = tensor<int32, [3]>([1, 8, 256])];
	tensor<fp16, [1, 8, 1, 256]> var_4179 = transpose(perm = var_4178, x = var_4173)[name = string("transpose_26")];
	tensor<fp16, [1, 8, 256]> x_213 = reshape(shape = var_4188, x = var_4179)[name = string("x_213")];
	int32 var_4194 = const()[name = string("op_4194"), val = int32(-1)];
	fp16 const_119_promoted_to_fp16 = const()[name = string("const_119_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 256]> var_4200_cast_fp16 = mul(x = x_213, y = const_119_promoted_to_fp16)[name = string("op_4200_cast_fp16")];
	bool input_211_interleave_0 = const()[name = string("input_211_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512]> input_211_cast_fp16 = concat(axis = var_4194, interleave = input_211_interleave_0, values = (x_213, var_4200_cast_fp16))[name = string("input_211_cast_fp16")];
	tensor<int32, [1]> normed_209_axes_0 = const()[name = string("normed_209_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4192_to_fp16 = const()[name = string("op_4192_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 8, 512]> normed_209_cast_fp16 = layer_norm(axes = normed_209_axes_0, epsilon = var_4192_to_fp16, x = input_211_cast_fp16)[name = string("normed_209_cast_fp16")];
	tensor<int32, [2]> var_4205_split_sizes_0 = const()[name = string("op_4205_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_4205_axis_0 = const()[name = string("op_4205_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 256]> var_4205_cast_fp16_0, tensor<fp16, [1, 8, 256]> var_4205_cast_fp16_1 = split(axis = var_4205_axis_0, split_sizes = var_4205_split_sizes_0, x = normed_209_cast_fp16)[name = string("op_4205_cast_fp16")];
	tensor<fp16, [1, 8, 256]> var_4208_cast_fp16 = mul(x = var_4205_cast_fp16_0, y = const_21_to_fp16)[name = string("op_4208_cast_fp16")];
	tensor<int32, [4]> var_4214 = const()[name = string("op_4214"), val = tensor<int32, [4]>([1, 8, 1, 256])];
	tensor<fp16, [1, 8, 1, 256]> q_57 = reshape(shape = var_4214, x = var_4208_cast_fp16)[name = string("q_57")];
	tensor<fp16, [1, 8, 1, 256]> var_4216_cast_fp16 = mul(x = q_57, y = cos_s)[name = string("op_4216_cast_fp16")];
	tensor<int32, [2]> var_4217_split_sizes_0 = const()[name = string("op_4217_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_4217_axis_0 = const()[name = string("op_4217_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 128]> var_4217_0, tensor<fp16, [1, 8, 1, 128]> var_4217_1 = split(axis = var_4217_axis_0, split_sizes = var_4217_split_sizes_0, x = q_57)[name = string("op_4217")];
	fp16 const_121_promoted = const()[name = string("const_121_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 128]> var_4219 = mul(x = var_4217_1, y = const_121_promoted)[name = string("op_4219")];
	int32 var_4221 = const()[name = string("op_4221"), val = int32(-1)];
	bool var_4222_interleave_0 = const()[name = string("op_4222_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> var_4222 = concat(axis = var_4221, interleave = var_4222_interleave_0, values = (var_4219, var_4217_0))[name = string("op_4222")];
	tensor<fp16, [1, 8, 1, 256]> var_4223_cast_fp16 = mul(x = var_4222, y = sin_s)[name = string("op_4223_cast_fp16")];
	tensor<fp16, [1, 8, 1, 256]> q_59_cast_fp16 = add(x = var_4216_cast_fp16, y = var_4223_cast_fp16)[name = string("q_59_cast_fp16")];
	bool var_4247_transpose_x_0 = const()[name = string("op_4247_transpose_x_0"), val = bool(false)];
	bool var_4247_transpose_y_0 = const()[name = string("op_4247_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 512]> var_4247_cast_fp16 = matmul(transpose_x = var_4247_transpose_x_0, transpose_y = var_4247_transpose_y_0, x = q_59_cast_fp16, y = transpose_49_cast_fp16)[name = string("op_4247_cast_fp16")];
	tensor<fp16, [1, 8, 1, 512]> var_4254_cast_fp16 = add(x = var_4247_cast_fp16, y = causal_mask)[name = string("op_4254_cast_fp16")];
	int32 var_4255 = const()[name = string("op_4255"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 512]> var_4257_cast_fp16 = softmax(axis = var_4255, x = var_4254_cast_fp16)[name = string("op_4257_cast_fp16")];
	bool var_4273_transpose_x_0 = const()[name = string("op_4273_transpose_x_0"), val = bool(false)];
	bool var_4273_transpose_y_0 = const()[name = string("op_4273_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> var_4273_cast_fp16 = matmul(transpose_x = var_4273_transpose_x_0, transpose_y = var_4273_transpose_y_0, x = var_4257_cast_fp16, y = Ve_3_cast_fp16)[name = string("op_4273_cast_fp16")];
	tensor<int32, [4]> var_4283 = const()[name = string("op_4283"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_4290 = const()[name = string("op_4290"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 8, 256]> var_4284 = transpose(perm = var_4283, x = var_4273_cast_fp16)[name = string("transpose_25")];
	tensor<fp16, [1, 1, 2048]> var_4291 = reshape(shape = var_4290, x = var_4284)[name = string("op_4291")];
	tensor<int32, [3]> var_4295 = const()[name = string("op_4295"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1536, 2048, 1]> squeeze_8_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341007232))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342580160))))[name = string("squeeze_8_palettized")];
	string var_4311_pad_type_0 = const()[name = string("op_4311_pad_type_0"), val = string("valid")];
	int32 var_4311_groups_0 = const()[name = string("op_4311_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_4311_strides_0 = const()[name = string("op_4311_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_4311_pad_0 = const()[name = string("op_4311_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_4311_dilations_0 = const()[name = string("op_4311_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1, 2048, 1]> var_4296 = transpose(perm = var_4295, x = var_4291)[name = string("transpose_24")];
	tensor<fp16, [1, 1536, 1]> var_4311 = conv(dilations = var_4311_dilations_0, groups = var_4311_groups_0, pad = var_4311_pad_0, pad_type = var_4311_pad_type_0, strides = var_4311_strides_0, weight = squeeze_8_palettized, x = var_4296)[name = string("op_4311")];
	tensor<int32, [3]> var_4315 = const()[name = string("op_4315"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_4321 = const()[name = string("op_4321"), val = int32(-1)];
	fp16 const_122_promoted_to_fp16 = const()[name = string("const_122_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_217 = transpose(perm = var_4315, x = var_4311)[name = string("transpose_23")];
	tensor<fp16, [1, 1, 1536]> var_4327_cast_fp16 = mul(x = x_217, y = const_122_promoted_to_fp16)[name = string("op_4327_cast_fp16")];
	bool input_215_interleave_0 = const()[name = string("input_215_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_215_cast_fp16 = concat(axis = var_4321, interleave = input_215_interleave_0, values = (x_217, var_4327_cast_fp16))[name = string("input_215_cast_fp16")];
	tensor<int32, [1]> normed_213_axes_0 = const()[name = string("normed_213_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4319_to_fp16 = const()[name = string("op_4319_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_213_cast_fp16 = layer_norm(axes = normed_213_axes_0, epsilon = var_4319_to_fp16, x = input_215_cast_fp16)[name = string("normed_213_cast_fp16")];
	tensor<int32, [2]> var_4332_split_sizes_0 = const()[name = string("op_4332_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_4332_axis_0 = const()[name = string("op_4332_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_4332_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_4332_cast_fp16_1 = split(axis = var_4332_axis_0, split_sizes = var_4332_split_sizes_0, x = normed_213_cast_fp16)[name = string("op_4332_cast_fp16")];
	tensor<fp16, [1536]> const_123_to_fp16 = const()[name = string("const_123_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342581760)))];
	tensor<fp16, [1, 1, 1536]> var_4335_cast_fp16 = mul(x = var_4332_cast_fp16_0, y = const_123_to_fp16)[name = string("op_4335_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> x_221_cast_fp16 = add(x = x_209_cast_fp16, y = var_4335_cast_fp16)[name = string("x_221_cast_fp16")];
	int32 var_4342 = const()[name = string("op_4342"), val = int32(-1)];
	fp16 const_124_promoted_to_fp16 = const()[name = string("const_124_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_4348_cast_fp16 = mul(x = x_221_cast_fp16, y = const_124_promoted_to_fp16)[name = string("op_4348_cast_fp16")];
	bool input_217_interleave_0 = const()[name = string("input_217_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_217_cast_fp16 = concat(axis = var_4342, interleave = input_217_interleave_0, values = (x_221_cast_fp16, var_4348_cast_fp16))[name = string("input_217_cast_fp16")];
	tensor<int32, [1]> normed_217_axes_0 = const()[name = string("normed_217_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4340_to_fp16 = const()[name = string("op_4340_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_217_cast_fp16 = layer_norm(axes = normed_217_axes_0, epsilon = var_4340_to_fp16, x = input_217_cast_fp16)[name = string("normed_217_cast_fp16")];
	tensor<int32, [2]> var_4353_split_sizes_0 = const()[name = string("op_4353_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_4353_axis_0 = const()[name = string("op_4353_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_4353_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_4353_cast_fp16_1 = split(axis = var_4353_axis_0, split_sizes = var_4353_split_sizes_0, x = normed_217_cast_fp16)[name = string("op_4353_cast_fp16")];
	tensor<fp16, [1536]> const_125_to_fp16 = const()[name = string("const_125_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342584896)))];
	tensor<fp16, [1, 1, 1536]> var_4356_cast_fp16 = mul(x = var_4353_cast_fp16_0, y = const_125_to_fp16)[name = string("op_4356_cast_fp16")];
	tensor<int32, [3]> var_4369 = const()[name = string("op_4369"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_219_axes_0 = const()[name = string("input_219_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_4370 = transpose(perm = var_4369, x = var_4356_cast_fp16)[name = string("transpose_22")];
	tensor<fp16, [1, 1536, 1, 1]> input_219 = expand_dims(axes = input_219_axes_0, x = var_4370)[name = string("input_219")];
	string var_4383_pad_type_0 = const()[name = string("op_4383_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_4383_strides_0 = const()[name = string("op_4383_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_4383_pad_0 = const()[name = string("op_4383_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_4383_dilations_0 = const()[name = string("op_4383_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_4383_groups_0 = const()[name = string("op_4383_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> var_4383 = conv(dilations = var_4383_dilations_0, groups = var_4383_groups_0, pad = var_4383_pad_0, pad_type = var_4383_pad_type_0, strides = var_4383_strides_0, weight = layers_8_mlp_gate_proj_weight_palettized, x = input_219)[name = string("op_4383")];
	string var_4385_mode_0 = const()[name = string("op_4385_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 1]> var_4385 = gelu(mode = var_4385_mode_0, x = var_4383)[name = string("op_4385")];
	string var_4396_pad_type_0 = const()[name = string("op_4396_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_4396_strides_0 = const()[name = string("op_4396_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_4396_pad_0 = const()[name = string("op_4396_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_4396_dilations_0 = const()[name = string("op_4396_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_4396_groups_0 = const()[name = string("op_4396_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> var_4396 = conv(dilations = var_4396_dilations_0, groups = var_4396_groups_0, pad = var_4396_pad_0, pad_type = var_4396_pad_type_0, strides = var_4396_strides_0, weight = layers_8_mlp_up_proj_weight_palettized, x = input_219)[name = string("op_4396")];
	tensor<fp16, [1, 12288, 1, 1]> input_221 = mul(x = var_4385, y = var_4396)[name = string("input_221")];
	string var_4408_pad_type_0 = const()[name = string("op_4408_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_4408_strides_0 = const()[name = string("op_4408_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_4408_pad_0 = const()[name = string("op_4408_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_4408_dilations_0 = const()[name = string("op_4408_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_4408_groups_0 = const()[name = string("op_4408_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 1]> var_4408 = conv(dilations = var_4408_dilations_0, groups = var_4408_groups_0, pad = var_4408_pad_0, pad_type = var_4408_pad_type_0, strides = var_4408_strides_0, weight = layers_8_mlp_down_proj_weight_palettized, x = input_221)[name = string("op_4408")];
	tensor<int32, [1]> var_4410_axes_0 = const()[name = string("op_4410_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_4410 = squeeze(axes = var_4410_axes_0, x = var_4408)[name = string("op_4410")];
	tensor<int32, [3]> var_4414 = const()[name = string("op_4414"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_4420 = const()[name = string("op_4420"), val = int32(-1)];
	fp16 const_126_promoted_to_fp16 = const()[name = string("const_126_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_225 = transpose(perm = var_4414, x = var_4410)[name = string("transpose_21")];
	tensor<fp16, [1, 1, 1536]> var_4426_cast_fp16 = mul(x = x_225, y = const_126_promoted_to_fp16)[name = string("op_4426_cast_fp16")];
	bool input_223_interleave_0 = const()[name = string("input_223_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_223_cast_fp16 = concat(axis = var_4420, interleave = input_223_interleave_0, values = (x_225, var_4426_cast_fp16))[name = string("input_223_cast_fp16")];
	tensor<int32, [1]> normed_221_axes_0 = const()[name = string("normed_221_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4418_to_fp16 = const()[name = string("op_4418_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_221_cast_fp16 = layer_norm(axes = normed_221_axes_0, epsilon = var_4418_to_fp16, x = input_223_cast_fp16)[name = string("normed_221_cast_fp16")];
	tensor<int32, [2]> var_4431_split_sizes_0 = const()[name = string("op_4431_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_4431_axis_0 = const()[name = string("op_4431_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_4431_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_4431_cast_fp16_1 = split(axis = var_4431_axis_0, split_sizes = var_4431_split_sizes_0, x = normed_221_cast_fp16)[name = string("op_4431_cast_fp16")];
	tensor<fp16, [1536]> const_127_to_fp16 = const()[name = string("const_127_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342588032)))];
	tensor<fp16, [1, 1, 1536]> var_4434_cast_fp16 = mul(x = var_4431_cast_fp16_0, y = const_127_to_fp16)[name = string("op_4434_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_125_cast_fp16 = add(x = x_221_cast_fp16, y = var_4434_cast_fp16)[name = string("hidden_states_125_cast_fp16")];
	tensor<fp16, [1, 1, 256]> var_4445 = linear(bias = linear_0_bias_0, weight = layers_8_per_layer_input_gate_weight_palettized, x = hidden_states_125_cast_fp16)[name = string("linear_16")];
	string gated_17_mode_0 = const()[name = string("gated_17_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 1, 256]> gated_17 = gelu(mode = gated_17_mode_0, x = var_4445)[name = string("gated_17")];
	tensor<int32, [3]> var_4462_begin_0 = const()[name = string("op_4462_begin_0"), val = tensor<int32, [3]>([0, 0, 5120])];
	tensor<int32, [3]> var_4462_end_0 = const()[name = string("op_4462_end_0"), val = tensor<int32, [3]>([1, 1, 5376])];
	tensor<bool, [3]> var_4462_end_mask_0 = const()[name = string("op_4462_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 1, 256]> var_4462_cast_fp16 = slice_by_index(begin = var_4462_begin_0, end = var_4462_end_0, end_mask = var_4462_end_mask_0, x = per_layer_combined)[name = string("op_4462_cast_fp16")];
	tensor<fp16, [1, 1, 256]> input_227_cast_fp16 = mul(x = gated_17, y = var_4462_cast_fp16)[name = string("input_227_cast_fp16")];
	tensor<fp16, [1536, 256]> layers_8_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342591168))), lut = tensor<fp16, [48, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342787840))))[name = string("layers_8_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1, 1536]> linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_227_cast_fp16)[name = string("linear_17_cast_fp16")];
	int32 var_4471 = const()[name = string("op_4471"), val = int32(-1)];
	fp16 const_128_promoted_to_fp16 = const()[name = string("const_128_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_4477_cast_fp16 = mul(x = linear_17_cast_fp16, y = const_128_promoted_to_fp16)[name = string("op_4477_cast_fp16")];
	bool input_229_interleave_0 = const()[name = string("input_229_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_229_cast_fp16 = concat(axis = var_4471, interleave = input_229_interleave_0, values = (linear_17_cast_fp16, var_4477_cast_fp16))[name = string("input_229_cast_fp16")];
	tensor<int32, [1]> normed_225_axes_0 = const()[name = string("normed_225_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4469_to_fp16 = const()[name = string("op_4469_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_225_cast_fp16 = layer_norm(axes = normed_225_axes_0, epsilon = var_4469_to_fp16, x = input_229_cast_fp16)[name = string("normed_225_cast_fp16")];
	tensor<int32, [2]> var_4482_split_sizes_0 = const()[name = string("op_4482_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_4482_axis_0 = const()[name = string("op_4482_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_4482_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_4482_cast_fp16_1 = split(axis = var_4482_axis_0, split_sizes = var_4482_split_sizes_0, x = normed_225_cast_fp16)[name = string("op_4482_cast_fp16")];
	tensor<fp16, [1536]> const_129_to_fp16 = const()[name = string("const_129_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342789440)))];
	tensor<fp16, [1, 1, 1536]> var_4485_cast_fp16 = mul(x = var_4482_cast_fp16_0, y = const_129_to_fp16)[name = string("op_4485_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_129_cast_fp16 = add(x = hidden_states_125_cast_fp16, y = var_4485_cast_fp16)[name = string("hidden_states_129_cast_fp16")];
	tensor<fp16, [1]> layers_8_layer_scalar_to_fp16 = const()[name = string("layers_8_layer_scalar_to_fp16"), val = tensor<fp16, [1]>([0x1.fap-2])];
	tensor<fp16, [1, 1, 1536]> x_233_cast_fp16 = mul(x = hidden_states_129_cast_fp16, y = layers_8_layer_scalar_to_fp16)[name = string("x_233_cast_fp16")];
	int32 var_4493 = const()[name = string("op_4493"), val = int32(-1)];
	fp16 const_130_promoted_to_fp16 = const()[name = string("const_130_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_4499_cast_fp16 = mul(x = x_233_cast_fp16, y = const_130_promoted_to_fp16)[name = string("op_4499_cast_fp16")];
	bool input_231_interleave_0 = const()[name = string("input_231_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_231_cast_fp16 = concat(axis = var_4493, interleave = input_231_interleave_0, values = (x_233_cast_fp16, var_4499_cast_fp16))[name = string("input_231_cast_fp16")];
	tensor<int32, [1]> normed_229_axes_0 = const()[name = string("normed_229_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4491_to_fp16 = const()[name = string("op_4491_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_229_cast_fp16 = layer_norm(axes = normed_229_axes_0, epsilon = var_4491_to_fp16, x = input_231_cast_fp16)[name = string("normed_229_cast_fp16")];
	tensor<int32, [2]> var_4504_split_sizes_0 = const()[name = string("op_4504_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_4504_axis_0 = const()[name = string("op_4504_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_4504_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_4504_cast_fp16_1 = split(axis = var_4504_axis_0, split_sizes = var_4504_split_sizes_0, x = normed_229_cast_fp16)[name = string("op_4504_cast_fp16")];
	tensor<fp16, [1536]> const_131_to_fp16 = const()[name = string("const_131_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342792576)))];
	tensor<fp16, [1, 1, 1536]> var_4507_cast_fp16 = mul(x = var_4504_cast_fp16_0, y = const_131_to_fp16)[name = string("op_4507_cast_fp16")];
	tensor<int32, [3]> var_4515 = const()[name = string("op_4515"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_4518_axes_0 = const()[name = string("op_4518_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_4516_cast_fp16 = transpose(perm = var_4515, x = var_4507_cast_fp16)[name = string("transpose_20")];
	tensor<fp16, [1, 1536, 1, 1]> var_4518_cast_fp16 = expand_dims(axes = var_4518_axes_0, x = var_4516_cast_fp16)[name = string("op_4518_cast_fp16")];
	string var_4534_pad_type_0 = const()[name = string("op_4534_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_4534_strides_0 = const()[name = string("op_4534_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_4534_pad_0 = const()[name = string("op_4534_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_4534_dilations_0 = const()[name = string("op_4534_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_4534_groups_0 = const()[name = string("op_4534_groups_0"), val = int32(1)];
	tensor<fp16, [1, 2048, 1, 1]> var_4534 = conv(dilations = var_4534_dilations_0, groups = var_4534_groups_0, pad = var_4534_pad_0, pad_type = var_4534_pad_type_0, strides = var_4534_strides_0, weight = layers_9_self_attn_q_proj_weight_palettized, x = var_4518_cast_fp16)[name = string("op_4534")];
	tensor<int32, [4]> var_4539 = const()[name = string("op_4539"), val = tensor<int32, [4]>([1, 8, 256, 1])];
	tensor<fp16, [1, 8, 256, 1]> var_4540 = reshape(shape = var_4539, x = var_4534)[name = string("op_4540")];
	tensor<int32, [4]> var_4545 = const()[name = string("op_4545"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	tensor<int32, [3]> var_4555 = const()[name = string("op_4555"), val = tensor<int32, [3]>([1, 8, 256])];
	tensor<fp16, [1, 8, 1, 256]> var_4546 = transpose(perm = var_4545, x = var_4540)[name = string("transpose_19")];
	tensor<fp16, [1, 8, 256]> x_237 = reshape(shape = var_4555, x = var_4546)[name = string("x_237")];
	int32 var_4561 = const()[name = string("op_4561"), val = int32(-1)];
	fp16 const_132_promoted_to_fp16 = const()[name = string("const_132_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 256]> var_4567_cast_fp16 = mul(x = x_237, y = const_132_promoted_to_fp16)[name = string("op_4567_cast_fp16")];
	bool input_235_interleave_0 = const()[name = string("input_235_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512]> input_235_cast_fp16 = concat(axis = var_4561, interleave = input_235_interleave_0, values = (x_237, var_4567_cast_fp16))[name = string("input_235_cast_fp16")];
	tensor<int32, [1]> normed_233_axes_0 = const()[name = string("normed_233_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4559_to_fp16 = const()[name = string("op_4559_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 8, 512]> normed_233_cast_fp16 = layer_norm(axes = normed_233_axes_0, epsilon = var_4559_to_fp16, x = input_235_cast_fp16)[name = string("normed_233_cast_fp16")];
	tensor<int32, [2]> var_4572_split_sizes_0 = const()[name = string("op_4572_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_4572_axis_0 = const()[name = string("op_4572_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 256]> var_4572_cast_fp16_0, tensor<fp16, [1, 8, 256]> var_4572_cast_fp16_1 = split(axis = var_4572_axis_0, split_sizes = var_4572_split_sizes_0, x = normed_233_cast_fp16)[name = string("op_4572_cast_fp16")];
	tensor<fp16, [1, 8, 256]> var_4575_cast_fp16 = mul(x = var_4572_cast_fp16_0, y = const_21_to_fp16)[name = string("op_4575_cast_fp16")];
	tensor<int32, [4]> var_4581 = const()[name = string("op_4581"), val = tensor<int32, [4]>([1, 8, 1, 256])];
	tensor<fp16, [1, 8, 1, 256]> q_63 = reshape(shape = var_4581, x = var_4575_cast_fp16)[name = string("q_63")];
	tensor<fp16, [1, 8, 1, 256]> var_4583_cast_fp16 = mul(x = q_63, y = cos_s)[name = string("op_4583_cast_fp16")];
	tensor<int32, [2]> var_4584_split_sizes_0 = const()[name = string("op_4584_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_4584_axis_0 = const()[name = string("op_4584_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 128]> var_4584_0, tensor<fp16, [1, 8, 1, 128]> var_4584_1 = split(axis = var_4584_axis_0, split_sizes = var_4584_split_sizes_0, x = q_63)[name = string("op_4584")];
	fp16 const_134_promoted = const()[name = string("const_134_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 128]> var_4586 = mul(x = var_4584_1, y = const_134_promoted)[name = string("op_4586")];
	int32 var_4588 = const()[name = string("op_4588"), val = int32(-1)];
	bool var_4589_interleave_0 = const()[name = string("op_4589_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> var_4589 = concat(axis = var_4588, interleave = var_4589_interleave_0, values = (var_4586, var_4584_0))[name = string("op_4589")];
	tensor<fp16, [1, 8, 1, 256]> var_4590_cast_fp16 = mul(x = var_4589, y = sin_s)[name = string("op_4590_cast_fp16")];
	tensor<fp16, [1, 8, 1, 256]> q_65_cast_fp16 = add(x = var_4583_cast_fp16, y = var_4590_cast_fp16)[name = string("q_65_cast_fp16")];
	bool var_4614_transpose_x_0 = const()[name = string("op_4614_transpose_x_0"), val = bool(false)];
	bool var_4614_transpose_y_0 = const()[name = string("op_4614_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 512]> var_4614_cast_fp16 = matmul(transpose_x = var_4614_transpose_x_0, transpose_y = var_4614_transpose_y_0, x = q_65_cast_fp16, y = transpose_49_cast_fp16)[name = string("op_4614_cast_fp16")];
	tensor<fp16, [1, 8, 1, 512]> var_4621_cast_fp16 = add(x = var_4614_cast_fp16, y = causal_mask)[name = string("op_4621_cast_fp16")];
	int32 var_4622 = const()[name = string("op_4622"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 512]> var_4624_cast_fp16 = softmax(axis = var_4622, x = var_4621_cast_fp16)[name = string("op_4624_cast_fp16")];
	bool var_4640_transpose_x_0 = const()[name = string("op_4640_transpose_x_0"), val = bool(false)];
	bool var_4640_transpose_y_0 = const()[name = string("op_4640_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> var_4640_cast_fp16 = matmul(transpose_x = var_4640_transpose_x_0, transpose_y = var_4640_transpose_y_0, x = var_4624_cast_fp16, y = Ve_3_cast_fp16)[name = string("op_4640_cast_fp16")];
	tensor<int32, [4]> var_4650 = const()[name = string("op_4650"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_4657 = const()[name = string("op_4657"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 8, 256]> var_4651 = transpose(perm = var_4650, x = var_4640_cast_fp16)[name = string("transpose_18")];
	tensor<fp16, [1, 1, 2048]> var_4658 = reshape(shape = var_4657, x = var_4651)[name = string("op_4658")];
	tensor<int32, [3]> var_4662 = const()[name = string("op_4662"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1536, 2048, 1]> squeeze_9_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342795712))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344368640))))[name = string("squeeze_9_palettized")];
	string var_4678_pad_type_0 = const()[name = string("op_4678_pad_type_0"), val = string("valid")];
	int32 var_4678_groups_0 = const()[name = string("op_4678_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_4678_strides_0 = const()[name = string("op_4678_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_4678_pad_0 = const()[name = string("op_4678_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_4678_dilations_0 = const()[name = string("op_4678_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1, 2048, 1]> var_4663 = transpose(perm = var_4662, x = var_4658)[name = string("transpose_17")];
	tensor<fp16, [1, 1536, 1]> var_4678 = conv(dilations = var_4678_dilations_0, groups = var_4678_groups_0, pad = var_4678_pad_0, pad_type = var_4678_pad_type_0, strides = var_4678_strides_0, weight = squeeze_9_palettized, x = var_4663)[name = string("op_4678")];
	tensor<int32, [3]> var_4682 = const()[name = string("op_4682"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_4688 = const()[name = string("op_4688"), val = int32(-1)];
	fp16 const_135_promoted_to_fp16 = const()[name = string("const_135_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_241 = transpose(perm = var_4682, x = var_4678)[name = string("transpose_16")];
	tensor<fp16, [1, 1, 1536]> var_4694_cast_fp16 = mul(x = x_241, y = const_135_promoted_to_fp16)[name = string("op_4694_cast_fp16")];
	bool input_239_interleave_0 = const()[name = string("input_239_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_239_cast_fp16 = concat(axis = var_4688, interleave = input_239_interleave_0, values = (x_241, var_4694_cast_fp16))[name = string("input_239_cast_fp16")];
	tensor<int32, [1]> normed_237_axes_0 = const()[name = string("normed_237_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4686_to_fp16 = const()[name = string("op_4686_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_237_cast_fp16 = layer_norm(axes = normed_237_axes_0, epsilon = var_4686_to_fp16, x = input_239_cast_fp16)[name = string("normed_237_cast_fp16")];
	tensor<int32, [2]> var_4699_split_sizes_0 = const()[name = string("op_4699_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_4699_axis_0 = const()[name = string("op_4699_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_4699_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_4699_cast_fp16_1 = split(axis = var_4699_axis_0, split_sizes = var_4699_split_sizes_0, x = normed_237_cast_fp16)[name = string("op_4699_cast_fp16")];
	tensor<fp16, [1536]> const_136_to_fp16 = const()[name = string("const_136_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344370240)))];
	tensor<fp16, [1, 1, 1536]> var_4702_cast_fp16 = mul(x = var_4699_cast_fp16_0, y = const_136_to_fp16)[name = string("op_4702_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> x_245_cast_fp16 = add(x = x_233_cast_fp16, y = var_4702_cast_fp16)[name = string("x_245_cast_fp16")];
	int32 var_4709 = const()[name = string("op_4709"), val = int32(-1)];
	fp16 const_137_promoted_to_fp16 = const()[name = string("const_137_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_4715_cast_fp16 = mul(x = x_245_cast_fp16, y = const_137_promoted_to_fp16)[name = string("op_4715_cast_fp16")];
	bool input_241_interleave_0 = const()[name = string("input_241_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_241_cast_fp16 = concat(axis = var_4709, interleave = input_241_interleave_0, values = (x_245_cast_fp16, var_4715_cast_fp16))[name = string("input_241_cast_fp16")];
	tensor<int32, [1]> normed_241_axes_0 = const()[name = string("normed_241_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4707_to_fp16 = const()[name = string("op_4707_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_241_cast_fp16 = layer_norm(axes = normed_241_axes_0, epsilon = var_4707_to_fp16, x = input_241_cast_fp16)[name = string("normed_241_cast_fp16")];
	tensor<int32, [2]> var_4720_split_sizes_0 = const()[name = string("op_4720_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_4720_axis_0 = const()[name = string("op_4720_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_4720_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_4720_cast_fp16_1 = split(axis = var_4720_axis_0, split_sizes = var_4720_split_sizes_0, x = normed_241_cast_fp16)[name = string("op_4720_cast_fp16")];
	tensor<fp16, [1536]> const_138_to_fp16 = const()[name = string("const_138_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344373376)))];
	tensor<fp16, [1, 1, 1536]> var_4723_cast_fp16 = mul(x = var_4720_cast_fp16_0, y = const_138_to_fp16)[name = string("op_4723_cast_fp16")];
	tensor<int32, [3]> var_4736 = const()[name = string("op_4736"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_243_axes_0 = const()[name = string("input_243_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_4737 = transpose(perm = var_4736, x = var_4723_cast_fp16)[name = string("transpose_15")];
	tensor<fp16, [1, 1536, 1, 1]> input_243 = expand_dims(axes = input_243_axes_0, x = var_4737)[name = string("input_243")];
	string var_4750_pad_type_0 = const()[name = string("op_4750_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_4750_strides_0 = const()[name = string("op_4750_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_4750_pad_0 = const()[name = string("op_4750_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_4750_dilations_0 = const()[name = string("op_4750_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_4750_groups_0 = const()[name = string("op_4750_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> var_4750 = conv(dilations = var_4750_dilations_0, groups = var_4750_groups_0, pad = var_4750_pad_0, pad_type = var_4750_pad_type_0, strides = var_4750_strides_0, weight = layers_9_mlp_gate_proj_weight_palettized, x = input_243)[name = string("op_4750")];
	string var_4752_mode_0 = const()[name = string("op_4752_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 1]> var_4752 = gelu(mode = var_4752_mode_0, x = var_4750)[name = string("op_4752")];
	string var_4763_pad_type_0 = const()[name = string("op_4763_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_4763_strides_0 = const()[name = string("op_4763_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_4763_pad_0 = const()[name = string("op_4763_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_4763_dilations_0 = const()[name = string("op_4763_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_4763_groups_0 = const()[name = string("op_4763_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> var_4763 = conv(dilations = var_4763_dilations_0, groups = var_4763_groups_0, pad = var_4763_pad_0, pad_type = var_4763_pad_type_0, strides = var_4763_strides_0, weight = layers_9_mlp_up_proj_weight_palettized, x = input_243)[name = string("op_4763")];
	tensor<fp16, [1, 12288, 1, 1]> input_245 = mul(x = var_4752, y = var_4763)[name = string("input_245")];
	string var_4775_pad_type_0 = const()[name = string("op_4775_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_4775_strides_0 = const()[name = string("op_4775_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_4775_pad_0 = const()[name = string("op_4775_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_4775_dilations_0 = const()[name = string("op_4775_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_4775_groups_0 = const()[name = string("op_4775_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 1]> var_4775 = conv(dilations = var_4775_dilations_0, groups = var_4775_groups_0, pad = var_4775_pad_0, pad_type = var_4775_pad_type_0, strides = var_4775_strides_0, weight = layers_9_mlp_down_proj_weight_palettized, x = input_245)[name = string("op_4775")];
	tensor<int32, [1]> var_4777_axes_0 = const()[name = string("op_4777_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_4777 = squeeze(axes = var_4777_axes_0, x = var_4775)[name = string("op_4777")];
	tensor<int32, [3]> var_4781 = const()[name = string("op_4781"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_4787 = const()[name = string("op_4787"), val = int32(-1)];
	fp16 const_139_promoted_to_fp16 = const()[name = string("const_139_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_249 = transpose(perm = var_4781, x = var_4777)[name = string("transpose_14")];
	tensor<fp16, [1, 1, 1536]> var_4793_cast_fp16 = mul(x = x_249, y = const_139_promoted_to_fp16)[name = string("op_4793_cast_fp16")];
	bool input_247_interleave_0 = const()[name = string("input_247_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_247_cast_fp16 = concat(axis = var_4787, interleave = input_247_interleave_0, values = (x_249, var_4793_cast_fp16))[name = string("input_247_cast_fp16")];
	tensor<int32, [1]> normed_245_axes_0 = const()[name = string("normed_245_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4785_to_fp16 = const()[name = string("op_4785_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_245_cast_fp16 = layer_norm(axes = normed_245_axes_0, epsilon = var_4785_to_fp16, x = input_247_cast_fp16)[name = string("normed_245_cast_fp16")];
	tensor<int32, [2]> var_4798_split_sizes_0 = const()[name = string("op_4798_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_4798_axis_0 = const()[name = string("op_4798_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_4798_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_4798_cast_fp16_1 = split(axis = var_4798_axis_0, split_sizes = var_4798_split_sizes_0, x = normed_245_cast_fp16)[name = string("op_4798_cast_fp16")];
	tensor<fp16, [1536]> const_140_to_fp16 = const()[name = string("const_140_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344376512)))];
	tensor<fp16, [1, 1, 1536]> var_4801_cast_fp16 = mul(x = var_4798_cast_fp16_0, y = const_140_to_fp16)[name = string("op_4801_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_139_cast_fp16 = add(x = x_245_cast_fp16, y = var_4801_cast_fp16)[name = string("hidden_states_139_cast_fp16")];
	tensor<fp16, [1, 1, 256]> var_4812 = linear(bias = linear_0_bias_0, weight = layers_9_per_layer_input_gate_weight_palettized, x = hidden_states_139_cast_fp16)[name = string("linear_18")];
	string gated_19_mode_0 = const()[name = string("gated_19_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 1, 256]> gated_19 = gelu(mode = gated_19_mode_0, x = var_4812)[name = string("gated_19")];
	tensor<int32, [3]> var_4829_begin_0 = const()[name = string("op_4829_begin_0"), val = tensor<int32, [3]>([0, 0, 5376])];
	tensor<int32, [3]> var_4829_end_0 = const()[name = string("op_4829_end_0"), val = tensor<int32, [3]>([1, 1, 5632])];
	tensor<bool, [3]> var_4829_end_mask_0 = const()[name = string("op_4829_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 1, 256]> var_4829_cast_fp16 = slice_by_index(begin = var_4829_begin_0, end = var_4829_end_0, end_mask = var_4829_end_mask_0, x = per_layer_combined)[name = string("op_4829_cast_fp16")];
	tensor<fp16, [1, 1, 256]> input_251_cast_fp16 = mul(x = gated_19, y = var_4829_cast_fp16)[name = string("input_251_cast_fp16")];
	tensor<fp16, [1536, 256]> layers_9_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344379648))), lut = tensor<fp16, [48, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344576320))))[name = string("layers_9_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1, 1536]> linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_251_cast_fp16)[name = string("linear_19_cast_fp16")];
	int32 var_4838 = const()[name = string("op_4838"), val = int32(-1)];
	fp16 const_141_promoted_to_fp16 = const()[name = string("const_141_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_4844_cast_fp16 = mul(x = linear_19_cast_fp16, y = const_141_promoted_to_fp16)[name = string("op_4844_cast_fp16")];
	bool input_253_interleave_0 = const()[name = string("input_253_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_253_cast_fp16 = concat(axis = var_4838, interleave = input_253_interleave_0, values = (linear_19_cast_fp16, var_4844_cast_fp16))[name = string("input_253_cast_fp16")];
	tensor<int32, [1]> normed_249_axes_0 = const()[name = string("normed_249_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4836_to_fp16 = const()[name = string("op_4836_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_249_cast_fp16 = layer_norm(axes = normed_249_axes_0, epsilon = var_4836_to_fp16, x = input_253_cast_fp16)[name = string("normed_249_cast_fp16")];
	tensor<int32, [2]> var_4849_split_sizes_0 = const()[name = string("op_4849_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_4849_axis_0 = const()[name = string("op_4849_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_4849_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_4849_cast_fp16_1 = split(axis = var_4849_axis_0, split_sizes = var_4849_split_sizes_0, x = normed_249_cast_fp16)[name = string("op_4849_cast_fp16")];
	tensor<fp16, [1536]> const_142_to_fp16 = const()[name = string("const_142_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344577920)))];
	tensor<fp16, [1, 1, 1536]> var_4852_cast_fp16 = mul(x = var_4849_cast_fp16_0, y = const_142_to_fp16)[name = string("op_4852_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_143_cast_fp16 = add(x = hidden_states_139_cast_fp16, y = var_4852_cast_fp16)[name = string("hidden_states_143_cast_fp16")];
	tensor<fp16, [1]> layers_9_layer_scalar_to_fp16 = const()[name = string("layers_9_layer_scalar_to_fp16"), val = tensor<fp16, [1]>([0x1.4ap-1])];
	tensor<fp16, [1, 1, 1536]> x_257_cast_fp16 = mul(x = hidden_states_143_cast_fp16, y = layers_9_layer_scalar_to_fp16)[name = string("x_257_cast_fp16")];
	int32 var_4860 = const()[name = string("op_4860"), val = int32(-1)];
	fp16 const_143_promoted_to_fp16 = const()[name = string("const_143_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_4866_cast_fp16 = mul(x = x_257_cast_fp16, y = const_143_promoted_to_fp16)[name = string("op_4866_cast_fp16")];
	bool input_255_interleave_0 = const()[name = string("input_255_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_255_cast_fp16 = concat(axis = var_4860, interleave = input_255_interleave_0, values = (x_257_cast_fp16, var_4866_cast_fp16))[name = string("input_255_cast_fp16")];
	tensor<int32, [1]> normed_253_axes_0 = const()[name = string("normed_253_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4858_to_fp16 = const()[name = string("op_4858_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_253_cast_fp16 = layer_norm(axes = normed_253_axes_0, epsilon = var_4858_to_fp16, x = input_255_cast_fp16)[name = string("normed_253_cast_fp16")];
	tensor<int32, [2]> var_4871_split_sizes_0 = const()[name = string("op_4871_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_4871_axis_0 = const()[name = string("op_4871_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_4871_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_4871_cast_fp16_1 = split(axis = var_4871_axis_0, split_sizes = var_4871_split_sizes_0, x = normed_253_cast_fp16)[name = string("op_4871_cast_fp16")];
	tensor<fp16, [1536]> const_144_to_fp16 = const()[name = string("const_144_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344581056)))];
	tensor<fp16, [1, 1, 1536]> var_4874_cast_fp16 = mul(x = var_4871_cast_fp16_0, y = const_144_to_fp16)[name = string("op_4874_cast_fp16")];
	tensor<int32, [3]> var_4882 = const()[name = string("op_4882"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_4885_axes_0 = const()[name = string("op_4885_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_4883_cast_fp16 = transpose(perm = var_4882, x = var_4874_cast_fp16)[name = string("transpose_13")];
	tensor<fp16, [1, 1536, 1, 1]> var_4885_cast_fp16 = expand_dims(axes = var_4885_axes_0, x = var_4883_cast_fp16)[name = string("op_4885_cast_fp16")];
	string var_4901_pad_type_0 = const()[name = string("op_4901_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_4901_strides_0 = const()[name = string("op_4901_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_4901_pad_0 = const()[name = string("op_4901_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_4901_dilations_0 = const()[name = string("op_4901_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_4901_groups_0 = const()[name = string("op_4901_groups_0"), val = int32(1)];
	tensor<fp16, [1, 2048, 1, 1]> var_4901 = conv(dilations = var_4901_dilations_0, groups = var_4901_groups_0, pad = var_4901_pad_0, pad_type = var_4901_pad_type_0, strides = var_4901_strides_0, weight = layers_10_self_attn_q_proj_weight_palettized, x = var_4885_cast_fp16)[name = string("op_4901")];
	tensor<int32, [4]> var_4906 = const()[name = string("op_4906"), val = tensor<int32, [4]>([1, 8, 256, 1])];
	tensor<fp16, [1, 8, 256, 1]> var_4907 = reshape(shape = var_4906, x = var_4901)[name = string("op_4907")];
	tensor<int32, [4]> var_4912 = const()[name = string("op_4912"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	tensor<int32, [3]> var_4922 = const()[name = string("op_4922"), val = tensor<int32, [3]>([1, 8, 256])];
	tensor<fp16, [1, 8, 1, 256]> var_4913 = transpose(perm = var_4912, x = var_4907)[name = string("transpose_12")];
	tensor<fp16, [1, 8, 256]> x_261 = reshape(shape = var_4922, x = var_4913)[name = string("x_261")];
	int32 var_4928 = const()[name = string("op_4928"), val = int32(-1)];
	fp16 const_145_promoted_to_fp16 = const()[name = string("const_145_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 256]> var_4934_cast_fp16 = mul(x = x_261, y = const_145_promoted_to_fp16)[name = string("op_4934_cast_fp16")];
	bool input_259_interleave_0 = const()[name = string("input_259_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512]> input_259_cast_fp16 = concat(axis = var_4928, interleave = input_259_interleave_0, values = (x_261, var_4934_cast_fp16))[name = string("input_259_cast_fp16")];
	tensor<int32, [1]> normed_257_axes_0 = const()[name = string("normed_257_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_4926_to_fp16 = const()[name = string("op_4926_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 8, 512]> normed_257_cast_fp16 = layer_norm(axes = normed_257_axes_0, epsilon = var_4926_to_fp16, x = input_259_cast_fp16)[name = string("normed_257_cast_fp16")];
	tensor<int32, [2]> var_4939_split_sizes_0 = const()[name = string("op_4939_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_4939_axis_0 = const()[name = string("op_4939_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 256]> var_4939_cast_fp16_0, tensor<fp16, [1, 8, 256]> var_4939_cast_fp16_1 = split(axis = var_4939_axis_0, split_sizes = var_4939_split_sizes_0, x = normed_257_cast_fp16)[name = string("op_4939_cast_fp16")];
	tensor<fp16, [1, 8, 256]> var_4942_cast_fp16 = mul(x = var_4939_cast_fp16_0, y = const_21_to_fp16)[name = string("op_4942_cast_fp16")];
	tensor<int32, [4]> var_4948 = const()[name = string("op_4948"), val = tensor<int32, [4]>([1, 8, 1, 256])];
	tensor<fp16, [1, 8, 1, 256]> q_69 = reshape(shape = var_4948, x = var_4942_cast_fp16)[name = string("q_69")];
	tensor<fp16, [1, 8, 1, 256]> var_4950_cast_fp16 = mul(x = q_69, y = cos_s)[name = string("op_4950_cast_fp16")];
	tensor<int32, [2]> var_4951_split_sizes_0 = const()[name = string("op_4951_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_4951_axis_0 = const()[name = string("op_4951_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 128]> var_4951_0, tensor<fp16, [1, 8, 1, 128]> var_4951_1 = split(axis = var_4951_axis_0, split_sizes = var_4951_split_sizes_0, x = q_69)[name = string("op_4951")];
	fp16 const_147_promoted = const()[name = string("const_147_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 128]> var_4953 = mul(x = var_4951_1, y = const_147_promoted)[name = string("op_4953")];
	int32 var_4955 = const()[name = string("op_4955"), val = int32(-1)];
	bool var_4956_interleave_0 = const()[name = string("op_4956_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> var_4956 = concat(axis = var_4955, interleave = var_4956_interleave_0, values = (var_4953, var_4951_0))[name = string("op_4956")];
	tensor<fp16, [1, 8, 1, 256]> var_4957_cast_fp16 = mul(x = var_4956, y = sin_s)[name = string("op_4957_cast_fp16")];
	tensor<fp16, [1, 8, 1, 256]> q_71_cast_fp16 = add(x = var_4950_cast_fp16, y = var_4957_cast_fp16)[name = string("q_71_cast_fp16")];
	bool var_4981_transpose_x_0 = const()[name = string("op_4981_transpose_x_0"), val = bool(false)];
	bool var_4981_transpose_y_0 = const()[name = string("op_4981_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 512]> var_4981_cast_fp16 = matmul(transpose_x = var_4981_transpose_x_0, transpose_y = var_4981_transpose_y_0, x = q_71_cast_fp16, y = transpose_49_cast_fp16)[name = string("op_4981_cast_fp16")];
	tensor<fp16, [1, 8, 1, 512]> var_4988_cast_fp16 = add(x = var_4981_cast_fp16, y = causal_mask)[name = string("op_4988_cast_fp16")];
	int32 var_4989 = const()[name = string("op_4989"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 512]> var_4991_cast_fp16 = softmax(axis = var_4989, x = var_4988_cast_fp16)[name = string("op_4991_cast_fp16")];
	bool var_5007_transpose_x_0 = const()[name = string("op_5007_transpose_x_0"), val = bool(false)];
	bool var_5007_transpose_y_0 = const()[name = string("op_5007_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> var_5007_cast_fp16 = matmul(transpose_x = var_5007_transpose_x_0, transpose_y = var_5007_transpose_y_0, x = var_4991_cast_fp16, y = Ve_3_cast_fp16)[name = string("op_5007_cast_fp16")];
	tensor<int32, [4]> var_5017 = const()[name = string("op_5017"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_5024 = const()[name = string("op_5024"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 8, 256]> var_5018 = transpose(perm = var_5017, x = var_5007_cast_fp16)[name = string("transpose_11")];
	tensor<fp16, [1, 1, 2048]> var_5025 = reshape(shape = var_5024, x = var_5018)[name = string("op_5025")];
	tensor<int32, [3]> var_5029 = const()[name = string("op_5029"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1536, 2048, 1]> squeeze_10_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344584192))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346157120))))[name = string("squeeze_10_palettized")];
	string var_5045_pad_type_0 = const()[name = string("op_5045_pad_type_0"), val = string("valid")];
	int32 var_5045_groups_0 = const()[name = string("op_5045_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_5045_strides_0 = const()[name = string("op_5045_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_5045_pad_0 = const()[name = string("op_5045_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_5045_dilations_0 = const()[name = string("op_5045_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1, 2048, 1]> var_5030 = transpose(perm = var_5029, x = var_5025)[name = string("transpose_10")];
	tensor<fp16, [1, 1536, 1]> var_5045 = conv(dilations = var_5045_dilations_0, groups = var_5045_groups_0, pad = var_5045_pad_0, pad_type = var_5045_pad_type_0, strides = var_5045_strides_0, weight = squeeze_10_palettized, x = var_5030)[name = string("op_5045")];
	tensor<int32, [3]> var_5049 = const()[name = string("op_5049"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_5055 = const()[name = string("op_5055"), val = int32(-1)];
	fp16 const_148_promoted_to_fp16 = const()[name = string("const_148_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_265 = transpose(perm = var_5049, x = var_5045)[name = string("transpose_9")];
	tensor<fp16, [1, 1, 1536]> var_5061_cast_fp16 = mul(x = x_265, y = const_148_promoted_to_fp16)[name = string("op_5061_cast_fp16")];
	bool input_263_interleave_0 = const()[name = string("input_263_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_263_cast_fp16 = concat(axis = var_5055, interleave = input_263_interleave_0, values = (x_265, var_5061_cast_fp16))[name = string("input_263_cast_fp16")];
	tensor<int32, [1]> normed_261_axes_0 = const()[name = string("normed_261_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_5053_to_fp16 = const()[name = string("op_5053_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_261_cast_fp16 = layer_norm(axes = normed_261_axes_0, epsilon = var_5053_to_fp16, x = input_263_cast_fp16)[name = string("normed_261_cast_fp16")];
	tensor<int32, [2]> var_5066_split_sizes_0 = const()[name = string("op_5066_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_5066_axis_0 = const()[name = string("op_5066_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_5066_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_5066_cast_fp16_1 = split(axis = var_5066_axis_0, split_sizes = var_5066_split_sizes_0, x = normed_261_cast_fp16)[name = string("op_5066_cast_fp16")];
	tensor<fp16, [1536]> const_149_to_fp16 = const()[name = string("const_149_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346158720)))];
	tensor<fp16, [1, 1, 1536]> var_5069_cast_fp16 = mul(x = var_5066_cast_fp16_0, y = const_149_to_fp16)[name = string("op_5069_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> x_269_cast_fp16 = add(x = x_257_cast_fp16, y = var_5069_cast_fp16)[name = string("x_269_cast_fp16")];
	int32 var_5076 = const()[name = string("op_5076"), val = int32(-1)];
	fp16 const_150_promoted_to_fp16 = const()[name = string("const_150_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_5082_cast_fp16 = mul(x = x_269_cast_fp16, y = const_150_promoted_to_fp16)[name = string("op_5082_cast_fp16")];
	bool input_265_interleave_0 = const()[name = string("input_265_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_265_cast_fp16 = concat(axis = var_5076, interleave = input_265_interleave_0, values = (x_269_cast_fp16, var_5082_cast_fp16))[name = string("input_265_cast_fp16")];
	tensor<int32, [1]> normed_265_axes_0 = const()[name = string("normed_265_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_5074_to_fp16 = const()[name = string("op_5074_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_265_cast_fp16 = layer_norm(axes = normed_265_axes_0, epsilon = var_5074_to_fp16, x = input_265_cast_fp16)[name = string("normed_265_cast_fp16")];
	tensor<int32, [2]> var_5087_split_sizes_0 = const()[name = string("op_5087_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_5087_axis_0 = const()[name = string("op_5087_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_5087_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_5087_cast_fp16_1 = split(axis = var_5087_axis_0, split_sizes = var_5087_split_sizes_0, x = normed_265_cast_fp16)[name = string("op_5087_cast_fp16")];
	tensor<fp16, [1536]> const_151_to_fp16 = const()[name = string("const_151_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346161856)))];
	tensor<fp16, [1, 1, 1536]> var_5090_cast_fp16 = mul(x = var_5087_cast_fp16_0, y = const_151_to_fp16)[name = string("op_5090_cast_fp16")];
	tensor<int32, [3]> var_5103 = const()[name = string("op_5103"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_267_axes_0 = const()[name = string("input_267_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_5104 = transpose(perm = var_5103, x = var_5090_cast_fp16)[name = string("transpose_8")];
	tensor<fp16, [1, 1536, 1, 1]> input_267 = expand_dims(axes = input_267_axes_0, x = var_5104)[name = string("input_267")];
	string var_5117_pad_type_0 = const()[name = string("op_5117_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_5117_strides_0 = const()[name = string("op_5117_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_5117_pad_0 = const()[name = string("op_5117_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_5117_dilations_0 = const()[name = string("op_5117_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_5117_groups_0 = const()[name = string("op_5117_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> var_5117 = conv(dilations = var_5117_dilations_0, groups = var_5117_groups_0, pad = var_5117_pad_0, pad_type = var_5117_pad_type_0, strides = var_5117_strides_0, weight = layers_10_mlp_gate_proj_weight_palettized, x = input_267)[name = string("op_5117")];
	string var_5119_mode_0 = const()[name = string("op_5119_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 1]> var_5119 = gelu(mode = var_5119_mode_0, x = var_5117)[name = string("op_5119")];
	string var_5130_pad_type_0 = const()[name = string("op_5130_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_5130_strides_0 = const()[name = string("op_5130_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_5130_pad_0 = const()[name = string("op_5130_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_5130_dilations_0 = const()[name = string("op_5130_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_5130_groups_0 = const()[name = string("op_5130_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> var_5130 = conv(dilations = var_5130_dilations_0, groups = var_5130_groups_0, pad = var_5130_pad_0, pad_type = var_5130_pad_type_0, strides = var_5130_strides_0, weight = layers_10_mlp_up_proj_weight_palettized, x = input_267)[name = string("op_5130")];
	tensor<fp16, [1, 12288, 1, 1]> input_269 = mul(x = var_5119, y = var_5130)[name = string("input_269")];
	string var_5142_pad_type_0 = const()[name = string("op_5142_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_5142_strides_0 = const()[name = string("op_5142_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_5142_pad_0 = const()[name = string("op_5142_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_5142_dilations_0 = const()[name = string("op_5142_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_5142_groups_0 = const()[name = string("op_5142_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 1]> var_5142 = conv(dilations = var_5142_dilations_0, groups = var_5142_groups_0, pad = var_5142_pad_0, pad_type = var_5142_pad_type_0, strides = var_5142_strides_0, weight = layers_10_mlp_down_proj_weight_palettized, x = input_269)[name = string("op_5142")];
	tensor<int32, [1]> var_5144_axes_0 = const()[name = string("op_5144_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_5144 = squeeze(axes = var_5144_axes_0, x = var_5142)[name = string("op_5144")];
	tensor<int32, [3]> var_5148 = const()[name = string("op_5148"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_5154 = const()[name = string("op_5154"), val = int32(-1)];
	fp16 const_152_promoted_to_fp16 = const()[name = string("const_152_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_273 = transpose(perm = var_5148, x = var_5144)[name = string("transpose_7")];
	tensor<fp16, [1, 1, 1536]> var_5160_cast_fp16 = mul(x = x_273, y = const_152_promoted_to_fp16)[name = string("op_5160_cast_fp16")];
	bool input_271_interleave_0 = const()[name = string("input_271_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_271_cast_fp16 = concat(axis = var_5154, interleave = input_271_interleave_0, values = (x_273, var_5160_cast_fp16))[name = string("input_271_cast_fp16")];
	tensor<int32, [1]> normed_269_axes_0 = const()[name = string("normed_269_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_5152_to_fp16 = const()[name = string("op_5152_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_269_cast_fp16 = layer_norm(axes = normed_269_axes_0, epsilon = var_5152_to_fp16, x = input_271_cast_fp16)[name = string("normed_269_cast_fp16")];
	tensor<int32, [2]> var_5165_split_sizes_0 = const()[name = string("op_5165_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_5165_axis_0 = const()[name = string("op_5165_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_5165_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_5165_cast_fp16_1 = split(axis = var_5165_axis_0, split_sizes = var_5165_split_sizes_0, x = normed_269_cast_fp16)[name = string("op_5165_cast_fp16")];
	tensor<fp16, [1536]> const_153_to_fp16 = const()[name = string("const_153_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346164992)))];
	tensor<fp16, [1, 1, 1536]> var_5168_cast_fp16 = mul(x = var_5165_cast_fp16_0, y = const_153_to_fp16)[name = string("op_5168_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_153_cast_fp16 = add(x = x_269_cast_fp16, y = var_5168_cast_fp16)[name = string("hidden_states_153_cast_fp16")];
	tensor<fp16, [1, 1, 256]> var_5179 = linear(bias = linear_0_bias_0, weight = layers_10_per_layer_input_gate_weight_palettized, x = hidden_states_153_cast_fp16)[name = string("linear_20")];
	string gated_21_mode_0 = const()[name = string("gated_21_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 1, 256]> gated_21 = gelu(mode = gated_21_mode_0, x = var_5179)[name = string("gated_21")];
	tensor<int32, [3]> var_5196_begin_0 = const()[name = string("op_5196_begin_0"), val = tensor<int32, [3]>([0, 0, 5632])];
	tensor<int32, [3]> var_5196_end_0 = const()[name = string("op_5196_end_0"), val = tensor<int32, [3]>([1, 1, 5888])];
	tensor<bool, [3]> var_5196_end_mask_0 = const()[name = string("op_5196_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 1, 256]> var_5196_cast_fp16 = slice_by_index(begin = var_5196_begin_0, end = var_5196_end_0, end_mask = var_5196_end_mask_0, x = per_layer_combined)[name = string("op_5196_cast_fp16")];
	tensor<fp16, [1, 1, 256]> input_275_cast_fp16 = mul(x = gated_21, y = var_5196_cast_fp16)[name = string("input_275_cast_fp16")];
	tensor<fp16, [1536, 256]> layers_10_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346168128))), lut = tensor<fp16, [48, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346364800))))[name = string("layers_10_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1, 1536]> linear_21_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_275_cast_fp16)[name = string("linear_21_cast_fp16")];
	int32 var_5205 = const()[name = string("op_5205"), val = int32(-1)];
	fp16 const_154_promoted_to_fp16 = const()[name = string("const_154_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_5211_cast_fp16 = mul(x = linear_21_cast_fp16, y = const_154_promoted_to_fp16)[name = string("op_5211_cast_fp16")];
	bool input_277_interleave_0 = const()[name = string("input_277_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_277_cast_fp16 = concat(axis = var_5205, interleave = input_277_interleave_0, values = (linear_21_cast_fp16, var_5211_cast_fp16))[name = string("input_277_cast_fp16")];
	tensor<int32, [1]> normed_273_axes_0 = const()[name = string("normed_273_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_5203_to_fp16 = const()[name = string("op_5203_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_273_cast_fp16 = layer_norm(axes = normed_273_axes_0, epsilon = var_5203_to_fp16, x = input_277_cast_fp16)[name = string("normed_273_cast_fp16")];
	tensor<int32, [2]> var_5216_split_sizes_0 = const()[name = string("op_5216_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_5216_axis_0 = const()[name = string("op_5216_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_5216_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_5216_cast_fp16_1 = split(axis = var_5216_axis_0, split_sizes = var_5216_split_sizes_0, x = normed_273_cast_fp16)[name = string("op_5216_cast_fp16")];
	tensor<fp16, [1536]> const_155_to_fp16 = const()[name = string("const_155_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346366400)))];
	tensor<fp16, [1, 1, 1536]> var_5219_cast_fp16 = mul(x = var_5216_cast_fp16_0, y = const_155_to_fp16)[name = string("op_5219_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_157_cast_fp16 = add(x = hidden_states_153_cast_fp16, y = var_5219_cast_fp16)[name = string("hidden_states_157_cast_fp16")];
	tensor<fp16, [1]> layers_10_layer_scalar_to_fp16 = const()[name = string("layers_10_layer_scalar_to_fp16"), val = tensor<fp16, [1]>([0x1.44p-1])];
	tensor<fp16, [1, 1, 1536]> x_281_cast_fp16 = mul(x = hidden_states_157_cast_fp16, y = layers_10_layer_scalar_to_fp16)[name = string("x_281_cast_fp16")];
	int32 var_5227 = const()[name = string("op_5227"), val = int32(-1)];
	fp16 const_156_promoted_to_fp16 = const()[name = string("const_156_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_5233_cast_fp16 = mul(x = x_281_cast_fp16, y = const_156_promoted_to_fp16)[name = string("op_5233_cast_fp16")];
	bool input_279_interleave_0 = const()[name = string("input_279_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_279_cast_fp16 = concat(axis = var_5227, interleave = input_279_interleave_0, values = (x_281_cast_fp16, var_5233_cast_fp16))[name = string("input_279_cast_fp16")];
	tensor<int32, [1]> normed_277_axes_0 = const()[name = string("normed_277_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_5225_to_fp16 = const()[name = string("op_5225_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_277_cast_fp16 = layer_norm(axes = normed_277_axes_0, epsilon = var_5225_to_fp16, x = input_279_cast_fp16)[name = string("normed_277_cast_fp16")];
	tensor<int32, [2]> var_5238_split_sizes_0 = const()[name = string("op_5238_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_5238_axis_0 = const()[name = string("op_5238_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_5238_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_5238_cast_fp16_1 = split(axis = var_5238_axis_0, split_sizes = var_5238_split_sizes_0, x = normed_277_cast_fp16)[name = string("op_5238_cast_fp16")];
	tensor<fp16, [1536]> const_157_to_fp16 = const()[name = string("const_157_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346369536)))];
	tensor<fp16, [1, 1, 1536]> var_5241_cast_fp16 = mul(x = var_5238_cast_fp16_0, y = const_157_to_fp16)[name = string("op_5241_cast_fp16")];
	tensor<int32, [3]> var_5249 = const()[name = string("op_5249"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> var_5252_axes_0 = const()[name = string("op_5252_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_5250_cast_fp16 = transpose(perm = var_5249, x = var_5241_cast_fp16)[name = string("transpose_6")];
	tensor<fp16, [1, 1536, 1, 1]> var_5252_cast_fp16 = expand_dims(axes = var_5252_axes_0, x = var_5250_cast_fp16)[name = string("op_5252_cast_fp16")];
	string var_5268_pad_type_0 = const()[name = string("op_5268_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_5268_strides_0 = const()[name = string("op_5268_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_5268_pad_0 = const()[name = string("op_5268_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_5268_dilations_0 = const()[name = string("op_5268_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_5268_groups_0 = const()[name = string("op_5268_groups_0"), val = int32(1)];
	tensor<fp16, [1, 2048, 1, 1]> var_5268 = conv(dilations = var_5268_dilations_0, groups = var_5268_groups_0, pad = var_5268_pad_0, pad_type = var_5268_pad_type_0, strides = var_5268_strides_0, weight = layers_11_self_attn_q_proj_weight_palettized, x = var_5252_cast_fp16)[name = string("op_5268")];
	tensor<int32, [4]> var_5273 = const()[name = string("op_5273"), val = tensor<int32, [4]>([1, 8, 256, 1])];
	tensor<fp16, [1, 8, 256, 1]> var_5274 = reshape(shape = var_5273, x = var_5268)[name = string("op_5274")];
	tensor<int32, [4]> var_5279 = const()[name = string("op_5279"), val = tensor<int32, [4]>([0, 1, 3, 2])];
	tensor<int32, [3]> var_5289 = const()[name = string("op_5289"), val = tensor<int32, [3]>([1, 8, 256])];
	tensor<fp16, [1, 8, 1, 256]> var_5280 = transpose(perm = var_5279, x = var_5274)[name = string("transpose_5")];
	tensor<fp16, [1, 8, 256]> x_285 = reshape(shape = var_5289, x = var_5280)[name = string("x_285")];
	int32 var_5295 = const()[name = string("op_5295"), val = int32(-1)];
	fp16 const_158_promoted_to_fp16 = const()[name = string("const_158_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 256]> var_5301_cast_fp16 = mul(x = x_285, y = const_158_promoted_to_fp16)[name = string("op_5301_cast_fp16")];
	bool input_283_interleave_0 = const()[name = string("input_283_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 512]> input_283_cast_fp16 = concat(axis = var_5295, interleave = input_283_interleave_0, values = (x_285, var_5301_cast_fp16))[name = string("input_283_cast_fp16")];
	tensor<int32, [1]> normed_281_axes_0 = const()[name = string("normed_281_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_5293_to_fp16 = const()[name = string("op_5293_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 8, 512]> normed_281_cast_fp16 = layer_norm(axes = normed_281_axes_0, epsilon = var_5293_to_fp16, x = input_283_cast_fp16)[name = string("normed_281_cast_fp16")];
	tensor<int32, [2]> var_5306_split_sizes_0 = const()[name = string("op_5306_split_sizes_0"), val = tensor<int32, [2]>([256, 256])];
	int32 var_5306_axis_0 = const()[name = string("op_5306_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 256]> var_5306_cast_fp16_0, tensor<fp16, [1, 8, 256]> var_5306_cast_fp16_1 = split(axis = var_5306_axis_0, split_sizes = var_5306_split_sizes_0, x = normed_281_cast_fp16)[name = string("op_5306_cast_fp16")];
	tensor<fp16, [1, 8, 256]> var_5309_cast_fp16 = mul(x = var_5306_cast_fp16_0, y = const_21_to_fp16)[name = string("op_5309_cast_fp16")];
	tensor<int32, [4]> var_5315 = const()[name = string("op_5315"), val = tensor<int32, [4]>([1, 8, 1, 256])];
	tensor<fp16, [1, 8, 1, 256]> q_75 = reshape(shape = var_5315, x = var_5309_cast_fp16)[name = string("q_75")];
	tensor<fp16, [1, 8, 1, 256]> var_5317_cast_fp16 = mul(x = q_75, y = cos_s)[name = string("op_5317_cast_fp16")];
	tensor<int32, [2]> var_5318_split_sizes_0 = const()[name = string("op_5318_split_sizes_0"), val = tensor<int32, [2]>([128, 128])];
	int32 var_5318_axis_0 = const()[name = string("op_5318_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 128]> var_5318_0, tensor<fp16, [1, 8, 1, 128]> var_5318_1 = split(axis = var_5318_axis_0, split_sizes = var_5318_split_sizes_0, x = q_75)[name = string("op_5318")];
	fp16 const_160_promoted = const()[name = string("const_160_promoted"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 8, 1, 128]> var_5320 = mul(x = var_5318_1, y = const_160_promoted)[name = string("op_5320")];
	int32 var_5322 = const()[name = string("op_5322"), val = int32(-1)];
	bool var_5323_interleave_0 = const()[name = string("op_5323_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> var_5323 = concat(axis = var_5322, interleave = var_5323_interleave_0, values = (var_5320, var_5318_0))[name = string("op_5323")];
	tensor<fp16, [1, 8, 1, 256]> var_5324_cast_fp16 = mul(x = var_5323, y = sin_s)[name = string("op_5324_cast_fp16")];
	tensor<fp16, [1, 8, 1, 256]> q_cast_fp16 = add(x = var_5317_cast_fp16, y = var_5324_cast_fp16)[name = string("q_cast_fp16")];
	bool var_5348_transpose_x_0 = const()[name = string("op_5348_transpose_x_0"), val = bool(false)];
	bool var_5348_transpose_y_0 = const()[name = string("op_5348_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 512]> var_5348_cast_fp16 = matmul(transpose_x = var_5348_transpose_x_0, transpose_y = var_5348_transpose_y_0, x = q_cast_fp16, y = transpose_49_cast_fp16)[name = string("op_5348_cast_fp16")];
	tensor<fp16, [1, 8, 1, 512]> var_5355_cast_fp16 = add(x = var_5348_cast_fp16, y = causal_mask)[name = string("op_5355_cast_fp16")];
	int32 var_5356 = const()[name = string("op_5356"), val = int32(-1)];
	tensor<fp16, [1, 8, 1, 512]> var_5358_cast_fp16 = softmax(axis = var_5356, x = var_5355_cast_fp16)[name = string("op_5358_cast_fp16")];
	bool var_5374_transpose_x_0 = const()[name = string("op_5374_transpose_x_0"), val = bool(false)];
	bool var_5374_transpose_y_0 = const()[name = string("op_5374_transpose_y_0"), val = bool(false)];
	tensor<fp16, [1, 8, 1, 256]> var_5374_cast_fp16 = matmul(transpose_x = var_5374_transpose_x_0, transpose_y = var_5374_transpose_y_0, x = var_5358_cast_fp16, y = Ve_3_cast_fp16)[name = string("op_5374_cast_fp16")];
	tensor<int32, [4]> var_5384 = const()[name = string("op_5384"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_5391 = const()[name = string("op_5391"), val = tensor<int32, [3]>([1, 1, -1])];
	tensor<fp16, [1, 1, 8, 256]> var_5385 = transpose(perm = var_5384, x = var_5374_cast_fp16)[name = string("transpose_4")];
	tensor<fp16, [1, 1, 2048]> var_5392 = reshape(shape = var_5391, x = var_5385)[name = string("op_5392")];
	tensor<int32, [3]> var_5396 = const()[name = string("op_5396"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1536, 2048, 1]> squeeze_11_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346372672))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347945600))))[name = string("squeeze_11_palettized")];
	string var_5412_pad_type_0 = const()[name = string("op_5412_pad_type_0"), val = string("valid")];
	int32 var_5412_groups_0 = const()[name = string("op_5412_groups_0"), val = int32(1)];
	tensor<int32, [1]> var_5412_strides_0 = const()[name = string("op_5412_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> var_5412_pad_0 = const()[name = string("op_5412_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> var_5412_dilations_0 = const()[name = string("op_5412_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1, 2048, 1]> var_5397 = transpose(perm = var_5396, x = var_5392)[name = string("transpose_3")];
	tensor<fp16, [1, 1536, 1]> var_5412 = conv(dilations = var_5412_dilations_0, groups = var_5412_groups_0, pad = var_5412_pad_0, pad_type = var_5412_pad_type_0, strides = var_5412_strides_0, weight = squeeze_11_palettized, x = var_5397)[name = string("op_5412")];
	tensor<int32, [3]> var_5416 = const()[name = string("op_5416"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_5422 = const()[name = string("op_5422"), val = int32(-1)];
	fp16 const_161_promoted_to_fp16 = const()[name = string("const_161_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_289 = transpose(perm = var_5416, x = var_5412)[name = string("transpose_2")];
	tensor<fp16, [1, 1, 1536]> var_5428_cast_fp16 = mul(x = x_289, y = const_161_promoted_to_fp16)[name = string("op_5428_cast_fp16")];
	bool input_287_interleave_0 = const()[name = string("input_287_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_287_cast_fp16 = concat(axis = var_5422, interleave = input_287_interleave_0, values = (x_289, var_5428_cast_fp16))[name = string("input_287_cast_fp16")];
	tensor<int32, [1]> normed_285_axes_0 = const()[name = string("normed_285_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_5420_to_fp16 = const()[name = string("op_5420_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_285_cast_fp16 = layer_norm(axes = normed_285_axes_0, epsilon = var_5420_to_fp16, x = input_287_cast_fp16)[name = string("normed_285_cast_fp16")];
	tensor<int32, [2]> var_5433_split_sizes_0 = const()[name = string("op_5433_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_5433_axis_0 = const()[name = string("op_5433_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_5433_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_5433_cast_fp16_1 = split(axis = var_5433_axis_0, split_sizes = var_5433_split_sizes_0, x = normed_285_cast_fp16)[name = string("op_5433_cast_fp16")];
	tensor<fp16, [1536]> const_162_to_fp16 = const()[name = string("const_162_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347947200)))];
	tensor<fp16, [1, 1, 1536]> var_5436_cast_fp16 = mul(x = var_5433_cast_fp16_0, y = const_162_to_fp16)[name = string("op_5436_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> x_293_cast_fp16 = add(x = x_281_cast_fp16, y = var_5436_cast_fp16)[name = string("x_293_cast_fp16")];
	int32 var_5443 = const()[name = string("op_5443"), val = int32(-1)];
	fp16 const_163_promoted_to_fp16 = const()[name = string("const_163_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_5449_cast_fp16 = mul(x = x_293_cast_fp16, y = const_163_promoted_to_fp16)[name = string("op_5449_cast_fp16")];
	bool input_289_interleave_0 = const()[name = string("input_289_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_289_cast_fp16 = concat(axis = var_5443, interleave = input_289_interleave_0, values = (x_293_cast_fp16, var_5449_cast_fp16))[name = string("input_289_cast_fp16")];
	tensor<int32, [1]> normed_289_axes_0 = const()[name = string("normed_289_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_5441_to_fp16 = const()[name = string("op_5441_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_289_cast_fp16 = layer_norm(axes = normed_289_axes_0, epsilon = var_5441_to_fp16, x = input_289_cast_fp16)[name = string("normed_289_cast_fp16")];
	tensor<int32, [2]> var_5454_split_sizes_0 = const()[name = string("op_5454_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_5454_axis_0 = const()[name = string("op_5454_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_5454_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_5454_cast_fp16_1 = split(axis = var_5454_axis_0, split_sizes = var_5454_split_sizes_0, x = normed_289_cast_fp16)[name = string("op_5454_cast_fp16")];
	tensor<fp16, [1536]> const_164_to_fp16 = const()[name = string("const_164_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347950336)))];
	tensor<fp16, [1, 1, 1536]> var_5457_cast_fp16 = mul(x = var_5454_cast_fp16_0, y = const_164_to_fp16)[name = string("op_5457_cast_fp16")];
	tensor<int32, [3]> var_5470 = const()[name = string("op_5470"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [1]> input_291_axes_0 = const()[name = string("input_291_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_5471 = transpose(perm = var_5470, x = var_5457_cast_fp16)[name = string("transpose_1")];
	tensor<fp16, [1, 1536, 1, 1]> input_291 = expand_dims(axes = input_291_axes_0, x = var_5471)[name = string("input_291")];
	string var_5484_pad_type_0 = const()[name = string("op_5484_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_5484_strides_0 = const()[name = string("op_5484_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_5484_pad_0 = const()[name = string("op_5484_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_5484_dilations_0 = const()[name = string("op_5484_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_5484_groups_0 = const()[name = string("op_5484_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> var_5484 = conv(dilations = var_5484_dilations_0, groups = var_5484_groups_0, pad = var_5484_pad_0, pad_type = var_5484_pad_type_0, strides = var_5484_strides_0, weight = layers_11_mlp_gate_proj_weight_palettized, x = input_291)[name = string("op_5484")];
	string var_5486_mode_0 = const()[name = string("op_5486_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 12288, 1, 1]> var_5486 = gelu(mode = var_5486_mode_0, x = var_5484)[name = string("op_5486")];
	string var_5497_pad_type_0 = const()[name = string("op_5497_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_5497_strides_0 = const()[name = string("op_5497_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_5497_pad_0 = const()[name = string("op_5497_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_5497_dilations_0 = const()[name = string("op_5497_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_5497_groups_0 = const()[name = string("op_5497_groups_0"), val = int32(1)];
	tensor<fp16, [1, 12288, 1, 1]> var_5497 = conv(dilations = var_5497_dilations_0, groups = var_5497_groups_0, pad = var_5497_pad_0, pad_type = var_5497_pad_type_0, strides = var_5497_strides_0, weight = layers_11_mlp_up_proj_weight_palettized, x = input_291)[name = string("op_5497")];
	tensor<fp16, [1, 12288, 1, 1]> input_293 = mul(x = var_5486, y = var_5497)[name = string("input_293")];
	string var_5509_pad_type_0 = const()[name = string("op_5509_pad_type_0"), val = string("valid")];
	tensor<int32, [2]> var_5509_strides_0 = const()[name = string("op_5509_strides_0"), val = tensor<int32, [2]>([1, 1])];
	tensor<int32, [4]> var_5509_pad_0 = const()[name = string("op_5509_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
	tensor<int32, [2]> var_5509_dilations_0 = const()[name = string("op_5509_dilations_0"), val = tensor<int32, [2]>([1, 1])];
	int32 var_5509_groups_0 = const()[name = string("op_5509_groups_0"), val = int32(1)];
	tensor<fp16, [1, 1536, 1, 1]> var_5509 = conv(dilations = var_5509_dilations_0, groups = var_5509_groups_0, pad = var_5509_pad_0, pad_type = var_5509_pad_type_0, strides = var_5509_strides_0, weight = layers_11_mlp_down_proj_weight_palettized, x = input_293)[name = string("op_5509")];
	tensor<int32, [1]> var_5511_axes_0 = const()[name = string("op_5511_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<fp16, [1, 1536, 1]> var_5511 = squeeze(axes = var_5511_axes_0, x = var_5509)[name = string("op_5511")];
	tensor<int32, [3]> var_5515 = const()[name = string("op_5515"), val = tensor<int32, [3]>([0, 2, 1])];
	int32 var_5521 = const()[name = string("op_5521"), val = int32(-1)];
	fp16 const_165_promoted_to_fp16 = const()[name = string("const_165_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> x_297 = transpose(perm = var_5515, x = var_5511)[name = string("transpose_0")];
	tensor<fp16, [1, 1, 1536]> var_5527_cast_fp16 = mul(x = x_297, y = const_165_promoted_to_fp16)[name = string("op_5527_cast_fp16")];
	bool input_295_interleave_0 = const()[name = string("input_295_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_295_cast_fp16 = concat(axis = var_5521, interleave = input_295_interleave_0, values = (x_297, var_5527_cast_fp16))[name = string("input_295_cast_fp16")];
	tensor<int32, [1]> normed_293_axes_0 = const()[name = string("normed_293_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_5519_to_fp16 = const()[name = string("op_5519_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_293_cast_fp16 = layer_norm(axes = normed_293_axes_0, epsilon = var_5519_to_fp16, x = input_295_cast_fp16)[name = string("normed_293_cast_fp16")];
	tensor<int32, [2]> var_5532_split_sizes_0 = const()[name = string("op_5532_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_5532_axis_0 = const()[name = string("op_5532_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_5532_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_5532_cast_fp16_1 = split(axis = var_5532_axis_0, split_sizes = var_5532_split_sizes_0, x = normed_293_cast_fp16)[name = string("op_5532_cast_fp16")];
	tensor<fp16, [1536]> const_166_to_fp16 = const()[name = string("const_166_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347953472)))];
	tensor<fp16, [1, 1, 1536]> var_5535_cast_fp16 = mul(x = var_5532_cast_fp16_0, y = const_166_to_fp16)[name = string("op_5535_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_167_cast_fp16 = add(x = x_293_cast_fp16, y = var_5535_cast_fp16)[name = string("hidden_states_167_cast_fp16")];
	tensor<fp16, [1, 1, 256]> var_5546 = linear(bias = linear_0_bias_0, weight = layers_11_per_layer_input_gate_weight_palettized, x = hidden_states_167_cast_fp16)[name = string("linear_22")];
	string gated_mode_0 = const()[name = string("gated_mode_0"), val = string("TANH_APPROXIMATION")];
	tensor<fp16, [1, 1, 256]> gated = gelu(mode = gated_mode_0, x = var_5546)[name = string("gated")];
	tensor<int32, [3]> var_5563_begin_0 = const()[name = string("op_5563_begin_0"), val = tensor<int32, [3]>([0, 0, 5888])];
	tensor<int32, [3]> var_5563_end_0 = const()[name = string("op_5563_end_0"), val = tensor<int32, [3]>([1, 1, 6144])];
	tensor<bool, [3]> var_5563_end_mask_0 = const()[name = string("op_5563_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
	tensor<fp16, [1, 1, 256]> var_5563_cast_fp16 = slice_by_index(begin = var_5563_begin_0, end = var_5563_end_0, end_mask = var_5563_end_mask_0, x = per_layer_combined)[name = string("op_5563_cast_fp16")];
	tensor<fp16, [1, 1, 256]> input_299_cast_fp16 = mul(x = gated, y = var_5563_cast_fp16)[name = string("input_299_cast_fp16")];
	tensor<fp16, [1536, 256]> layers_11_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347956608))), lut = tensor<fp16, [48, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348153280))))[name = string("layers_11_per_layer_projection_weight_promoted_to_fp16_palettized")];
	tensor<fp16, [1, 1, 1536]> linear_23_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_299_cast_fp16)[name = string("linear_23_cast_fp16")];
	int32 var_5572 = const()[name = string("op_5572"), val = int32(-1)];
	fp16 const_167_promoted_to_fp16 = const()[name = string("const_167_promoted_to_fp16"), val = fp16(-0x1p+0)];
	tensor<fp16, [1, 1, 1536]> var_5578_cast_fp16 = mul(x = linear_23_cast_fp16, y = const_167_promoted_to_fp16)[name = string("op_5578_cast_fp16")];
	bool input_interleave_0 = const()[name = string("input_interleave_0"), val = bool(false)];
	tensor<fp16, [1, 1, 3072]> input_cast_fp16 = concat(axis = var_5572, interleave = input_interleave_0, values = (linear_23_cast_fp16, var_5578_cast_fp16))[name = string("input_cast_fp16")];
	tensor<int32, [1]> normed_297_axes_0 = const()[name = string("normed_297_axes_0"), val = tensor<int32, [1]>([-1])];
	fp16 var_5570_to_fp16 = const()[name = string("op_5570_to_fp16"), val = fp16(0x1.1p-20)];
	tensor<fp16, [1, 1, 3072]> normed_297_cast_fp16 = layer_norm(axes = normed_297_axes_0, epsilon = var_5570_to_fp16, x = input_cast_fp16)[name = string("normed_297_cast_fp16")];
	tensor<int32, [2]> var_5583_split_sizes_0 = const()[name = string("op_5583_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])];
	int32 var_5583_axis_0 = const()[name = string("op_5583_axis_0"), val = int32(-1)];
	tensor<fp16, [1, 1, 1536]> var_5583_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_5583_cast_fp16_1 = split(axis = var_5583_axis_0, split_sizes = var_5583_split_sizes_0, x = normed_297_cast_fp16)[name = string("op_5583_cast_fp16")];
	tensor<fp16, [1536]> const_168_to_fp16 = const()[name = string("const_168_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348154880)))];
	tensor<fp16, [1, 1, 1536]> var_5586_cast_fp16 = mul(x = var_5583_cast_fp16_0, y = const_168_to_fp16)[name = string("op_5586_cast_fp16")];
	tensor<fp16, [1, 1, 1536]> hidden_states_cast_fp16 = add(x = hidden_states_167_cast_fp16, y = var_5586_cast_fp16)[name = string("hidden_states_cast_fp16")];
	tensor<fp16, [1]> layers_11_layer_scalar_to_fp16 = const()[name = string("layers_11_layer_scalar_to_fp16"), val = tensor<fp16, [1]>([0x1.bap-2])];
	tensor<fp16, [1, 1, 1536]> hidden_states_out = mul(x = hidden_states_cast_fp16, y = layers_11_layer_scalar_to_fp16)[name = string("op_5590_cast_fp16")];
	} -> (hidden_states_out, kv13_k, kv13_v, kv14_k, kv14_v);
	}