| program(1.3) |
| [buildInfo = dict<string, string>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}})] |
| { |
| func main<ios18>(tensor<fp16, [1, 1, 1, 2048]> causal_mask_full, tensor<fp16, [1, 1, 1, 512]> causal_mask_sliding, tensor<fp16, [1, 1, 1, 512]> cos_f, tensor<fp16, [1, 1, 1, 256]> cos_s, tensor<fp16, [1, 1, 1536]> hidden_states, tensor<fp16, [1, 1, 512, 256]> kv13_k, tensor<fp16, [1, 1, 512, 256]> kv13_v, tensor<fp16, [1, 1, 2048, 512]> kv14_k, tensor<fp16, [1, 1, 2048, 512]> kv14_v, tensor<fp16, [1, 1, 8960]> per_layer_combined, tensor<fp16, [1, 1, 1, 512]> sin_f, tensor<fp16, [1, 1, 1, 256]> sin_s, tensor<fp16, [1, 1, 2048, 1]> update_mask) { |
| tensor<fp16, [2048, 1536, 1, 1]> layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1572992))))[name = string("layers_0_self_attn_q_proj_weight_palettized")]; |
| tensor<fp16, [256]> layers_0_self_attn_q_norm_weight = const()[name = string("layers_0_self_attn_q_norm_weight"), val = tensor<fp16, [256]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1575104)))]; |
| tensor<fp16, [12288, 1536, 1, 1]> layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1575680))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11012928))))[name = string("layers_0_mlp_gate_proj_weight_palettized")]; |
| tensor<fp16, [12288, 1536, 1, 1]> layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11025280))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20462528))))[name = string("layers_0_mlp_up_proj_weight_palettized")]; |
| tensor<fp16, [1536, 12288, 1, 1]> layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20474880))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29912128))))[name = string("layers_0_mlp_down_proj_weight_palettized")]; |
| tensor<fp16, [1536]> layers_0_post_feedforward_layernorm_weight = const()[name = string("layers_0_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29913728)))]; |
| tensor<fp16, [256, 1536, 1, 1]> layers_0_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29916864))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30113536))))[name = string("layers_0_per_layer_input_gate_weight_palettized")]; |
| tensor<fp16, [2048, 1536, 1, 1]> layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30113856))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31686784))))[name = string("layers_1_self_attn_q_proj_weight_palettized")]; |
| tensor<fp16, [12288, 1536, 1, 1]> layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31688896))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41126144))))[name = string("layers_1_mlp_gate_proj_weight_palettized")]; |
| tensor<fp16, [12288, 1536, 1, 1]> layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41138496))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50575744))))[name = string("layers_1_mlp_up_proj_weight_palettized")]; |
| tensor<fp16, [1536, 12288, 1, 1]> layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50588096))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60025344))))[name = string("layers_1_mlp_down_proj_weight_palettized")]; |
| tensor<fp16, [1536]> layers_1_post_feedforward_layernorm_weight = const()[name = string("layers_1_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60026944)))]; |
| tensor<fp16, [256, 1536, 1, 1]> layers_1_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60030080))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60226752))))[name = string("layers_1_per_layer_input_gate_weight_palettized")]; |
| tensor<fp16, [2048, 1536, 1, 1]> layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60227072))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61800000))))[name = string("layers_2_self_attn_q_proj_weight_palettized")]; |
| tensor<fp16, [12288, 1536, 1, 1]> layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61802112))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71239360))))[name = string("layers_2_mlp_gate_proj_weight_palettized")]; |
| tensor<fp16, [12288, 1536, 1, 1]> layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71251712))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80688960))))[name = string("layers_2_mlp_up_proj_weight_palettized")]; |
| tensor<fp16, [1536, 12288, 1, 1]> layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80701312))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90138560))))[name = string("layers_2_mlp_down_proj_weight_palettized")]; |
| tensor<fp16, [1536]> layers_2_post_feedforward_layernorm_weight = const()[name = string("layers_2_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90140160)))]; |
| tensor<fp16, [256, 1536, 1, 1]> layers_2_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90143296))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90339968))))[name = string("layers_2_per_layer_input_gate_weight_palettized")]; |
| tensor<fp16, [2048, 1536, 1, 1]> layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90340288))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91913216))))[name = string("layers_3_self_attn_q_proj_weight_palettized")]; |
| tensor<fp16, [12288, 1536, 1, 1]> layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91915328))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101352576))))[name = string("layers_3_mlp_gate_proj_weight_palettized")]; |
| tensor<fp16, [12288, 1536, 1, 1]> layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101364928))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110802176))))[name = string("layers_3_mlp_up_proj_weight_palettized")]; |
| tensor<fp16, [1536, 12288, 1, 1]> layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110814528))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120251776))))[name = string("layers_3_mlp_down_proj_weight_palettized")]; |
| tensor<fp16, [1536]> layers_3_post_feedforward_layernorm_weight = const()[name = string("layers_3_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120253376)))]; |
| tensor<fp16, [256, 1536, 1, 1]> layers_3_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120256512))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120453184))))[name = string("layers_3_per_layer_input_gate_weight_palettized")]; |
| tensor<fp16, [4096, 1536, 1, 1]> layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [4096, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120453504))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123599296))))[name = string("layers_4_self_attn_q_proj_weight_palettized")]; |
| tensor<fp16, [512]> layers_4_self_attn_q_norm_weight = const()[name = string("layers_4_self_attn_q_norm_weight"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123603456)))]; |
| tensor<fp16, [12288, 1536, 1, 1]> layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123604544))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133041792))))[name = string("layers_4_mlp_gate_proj_weight_palettized")]; |
| tensor<fp16, [12288, 1536, 1, 1]> layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133054144))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142491392))))[name = string("layers_4_mlp_up_proj_weight_palettized")]; |
| tensor<fp16, [1536, 12288, 1, 1]> layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142503744))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151940992))))[name = string("layers_4_mlp_down_proj_weight_palettized")]; |
| tensor<fp16, [1536]> layers_4_post_feedforward_layernorm_weight = const()[name = string("layers_4_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151942592)))]; |
| tensor<fp16, [256, 1536, 1, 1]> layers_4_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151945728))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152142400))))[name = string("layers_4_per_layer_input_gate_weight_palettized")]; |
| tensor<fp16, [2048, 1536, 1, 1]> layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152142720))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153715648))))[name = string("layers_5_self_attn_q_proj_weight_palettized")]; |
| tensor<fp16, [12288, 1536, 1, 1]> layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153717760))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163155008))))[name = string("layers_5_mlp_gate_proj_weight_palettized")]; |
| tensor<fp16, [12288, 1536, 1, 1]> layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163167360))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172604608))))[name = string("layers_5_mlp_up_proj_weight_palettized")]; |
| tensor<fp16, [1536, 12288, 1, 1]> layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172616960))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182054208))))[name = string("layers_5_mlp_down_proj_weight_palettized")]; |
| tensor<fp16, [1536]> layers_5_post_feedforward_layernorm_weight = const()[name = string("layers_5_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182055808)))]; |
| tensor<fp16, [256, 1536, 1, 1]> layers_5_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182058944))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182255616))))[name = string("layers_5_per_layer_input_gate_weight_palettized")]; |
| tensor<fp16, [2048, 1536, 1, 1]> layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182255936))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183828864))))[name = string("layers_6_self_attn_q_proj_weight_palettized")]; |
| tensor<fp16, [12288, 1536, 1, 1]> layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183830976))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193268224))))[name = string("layers_6_mlp_gate_proj_weight_palettized")]; |
| tensor<fp16, [12288, 1536, 1, 1]> layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193280576))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202717824))))[name = string("layers_6_mlp_up_proj_weight_palettized")]; |
| tensor<fp16, [1536, 12288, 1, 1]> layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202730176))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212167424))))[name = string("layers_6_mlp_down_proj_weight_palettized")]; |
| tensor<fp16, [1536]> layers_6_post_feedforward_layernorm_weight = const()[name = string("layers_6_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212169024)))]; |
| tensor<fp16, [256, 1536, 1, 1]> layers_6_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212172160))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212368832))))[name = string("layers_6_per_layer_input_gate_weight_palettized")]; |
| tensor<fp16, [2048, 1536, 1, 1]> layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212369152))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213942080))))[name = string("layers_7_self_attn_q_proj_weight_palettized")]; |
| tensor<fp16, [12288, 1536, 1, 1]> layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213944192))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223381440))))[name = string("layers_7_mlp_gate_proj_weight_palettized")]; |
| tensor<fp16, [12288, 1536, 1, 1]> layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223393792))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232831040))))[name = string("layers_7_mlp_up_proj_weight_palettized")]; |
| tensor<fp16, [1536, 12288, 1, 1]> layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232843392))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242280640))))[name = string("layers_7_mlp_down_proj_weight_palettized")]; |
| tensor<fp16, [1536]> layers_7_post_feedforward_layernorm_weight = const()[name = string("layers_7_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242282240)))]; |
| tensor<fp16, [256, 1536, 1, 1]> layers_7_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242285376))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242482048))))[name = string("layers_7_per_layer_input_gate_weight_palettized")]; |
| tensor<fp16, [2048, 1536, 1, 1]> layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [2048, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242482368))), lut = tensor<fp16, [64, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244055296))))[name = string("layers_8_self_attn_q_proj_weight_palettized")]; |
| tensor<fp16, [12288, 1536, 1, 1]> layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244057408))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253494656))))[name = string("layers_8_mlp_gate_proj_weight_palettized")]; |
| tensor<fp16, [12288, 1536, 1, 1]> layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253507008))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262944256))))[name = string("layers_8_mlp_up_proj_weight_palettized")]; |
| tensor<fp16, [1536, 12288, 1, 1]> layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262956608))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272393856))))[name = string("layers_8_mlp_down_proj_weight_palettized")]; |
| tensor<fp16, [1536]> layers_8_post_feedforward_layernorm_weight = const()[name = string("layers_8_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272395456)))]; |
| tensor<fp16, [256, 1536, 1, 1]> layers_8_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272398592))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272595264))))[name = string("layers_8_per_layer_input_gate_weight_palettized")]; |
| tensor<fp16, [4096, 1536, 1, 1]> layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [4096, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272595584))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275741376))))[name = string("layers_9_self_attn_q_proj_weight_palettized")]; |
| tensor<fp16, [12288, 1536, 1, 1]> layers_9_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275745536))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285182784))))[name = string("layers_9_mlp_gate_proj_weight_palettized")]; |
| tensor<fp16, [12288, 1536, 1, 1]> layers_9_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [12288, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285195136))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294632384))))[name = string("layers_9_mlp_up_proj_weight_palettized")]; |
| tensor<fp16, [1536, 12288, 1, 1]> layers_9_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 12288, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294644736))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304081984))))[name = string("layers_9_mlp_down_proj_weight_palettized")]; |
| tensor<fp16, [1536]> layers_9_post_feedforward_layernorm_weight = const()[name = string("layers_9_post_feedforward_layernorm_weight"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304083584)))]; |
| tensor<fp16, [256, 1536, 1, 1]> layers_9_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [256, 1536, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304086720))), lut = tensor<fp16, [8, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304283392))))[name = string("layers_9_per_layer_input_gate_weight_palettized")]; |
| int32 var_500 = const()[name = string("op_500"), val = int32(-1)]; |
| fp16 const_0_promoted_to_fp16 = const()[name = string("const_0_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> var_502_cast_fp16 = mul(x = hidden_states, y = const_0_promoted_to_fp16)[name = string("op_502_cast_fp16")]; |
| bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_1_cast_fp16 = concat(axis = var_500, interleave = input_1_interleave_0, values = (hidden_states, var_502_cast_fp16))[name = string("input_1_cast_fp16")]; |
| tensor<int32, [1]> normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_497_to_fp16 = const()[name = string("op_497_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_497_to_fp16, x = input_1_cast_fp16)[name = string("normed_1_cast_fp16")]; |
| tensor<int32, [2]> var_507_split_sizes_0 = const()[name = string("op_507_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_507_axis_0 = const()[name = string("op_507_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_507_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_507_cast_fp16_1 = split(axis = var_507_axis_0, split_sizes = var_507_split_sizes_0, x = normed_1_cast_fp16)[name = string("op_507_cast_fp16")]; |
| tensor<fp16, [1536]> layers_0_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304283712)))]; |
| tensor<fp16, [1, 1, 1536]> h_1_cast_fp16 = mul(x = var_507_cast_fp16_0, y = layers_0_input_layernorm_weight_promoted_to_fp16)[name = string("h_1_cast_fp16")]; |
| tensor<int32, [3]> var_513 = const()[name = string("op_513"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> var_516_axes_0 = const()[name = string("op_516_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_514_cast_fp16 = transpose(perm = var_513, x = h_1_cast_fp16)[name = string("transpose_113")]; |
| tensor<fp16, [1, 1536, 1, 1]> var_516_cast_fp16 = expand_dims(axes = var_516_axes_0, x = var_514_cast_fp16)[name = string("op_516_cast_fp16")]; |
| string var_532_pad_type_0 = const()[name = string("op_532_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> var_532_strides_0 = const()[name = string("op_532_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> var_532_pad_0 = const()[name = string("op_532_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> var_532_dilations_0 = const()[name = string("op_532_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 var_532_groups_0 = const()[name = string("op_532_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 2048, 1, 1]> var_532 = conv(dilations = var_532_dilations_0, groups = var_532_groups_0, pad = var_532_pad_0, pad_type = var_532_pad_type_0, strides = var_532_strides_0, weight = layers_0_self_attn_q_proj_weight_palettized, x = var_516_cast_fp16)[name = string("op_532")]; |
| tensor<int32, [4]> var_537 = const()[name = string("op_537"), val = tensor<int32, [4]>([1, 8, 256, 1])]; |
| tensor<fp16, [1, 8, 256, 1]> var_538 = reshape(shape = var_537, x = var_532)[name = string("op_538")]; |
| tensor<int32, [4]> var_543 = const()[name = string("op_543"), val = tensor<int32, [4]>([0, 1, 3, 2])]; |
| tensor<int32, [3]> var_553 = const()[name = string("op_553"), val = tensor<int32, [3]>([1, 8, 256])]; |
| tensor<fp16, [1, 8, 1, 256]> var_544 = transpose(perm = var_543, x = var_538)[name = string("transpose_112")]; |
| tensor<fp16, [1, 8, 256]> x_1 = reshape(shape = var_553, x = var_544)[name = string("x_1")]; |
| int32 var_559 = const()[name = string("op_559"), val = int32(-1)]; |
| fp16 const_1_promoted = const()[name = string("const_1_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 8, 256]> var_561 = mul(x = x_1, y = const_1_promoted)[name = string("op_561")]; |
| bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 512]> input_5 = concat(axis = var_559, interleave = input_5_interleave_0, values = (x_1, var_561))[name = string("input_5")]; |
| tensor<int32, [1]> normed_5_axes_0 = const()[name = string("normed_5_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_556_to_fp16 = const()[name = string("op_556_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 8, 512]> normed_5_cast_fp16 = layer_norm(axes = normed_5_axes_0, epsilon = var_556_to_fp16, x = input_5)[name = string("normed_5_cast_fp16")]; |
| tensor<int32, [2]> var_566_split_sizes_0 = const()[name = string("op_566_split_sizes_0"), val = tensor<int32, [2]>([256, 256])]; |
| int32 var_566_axis_0 = const()[name = string("op_566_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 8, 256]> var_566_0, tensor<fp16, [1, 8, 256]> var_566_1 = split(axis = var_566_axis_0, split_sizes = var_566_split_sizes_0, x = normed_5_cast_fp16)[name = string("op_566")]; |
| tensor<fp16, [1, 8, 256]> var_568 = mul(x = var_566_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_568")]; |
| tensor<int32, [4]> var_573 = const()[name = string("op_573"), val = tensor<int32, [4]>([1, 8, 1, 256])]; |
| tensor<fp16, [1, 8, 1, 256]> q_3 = reshape(shape = var_573, x = var_568)[name = string("q_3")]; |
| tensor<fp16, [1, 8, 1, 256]> var_575_cast_fp16 = mul(x = q_3, y = cos_s)[name = string("op_575_cast_fp16")]; |
| tensor<int32, [2]> var_576_split_sizes_0 = const()[name = string("op_576_split_sizes_0"), val = tensor<int32, [2]>([128, 128])]; |
| int32 var_576_axis_0 = const()[name = string("op_576_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 8, 1, 128]> var_576_0, tensor<fp16, [1, 8, 1, 128]> var_576_1 = split(axis = var_576_axis_0, split_sizes = var_576_split_sizes_0, x = q_3)[name = string("op_576")]; |
| fp16 const_2_promoted = const()[name = string("const_2_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 8, 1, 128]> var_578 = mul(x = var_576_1, y = const_2_promoted)[name = string("op_578")]; |
| int32 var_580 = const()[name = string("op_580"), val = int32(-1)]; |
| bool var_581_interleave_0 = const()[name = string("op_581_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1, 256]> var_581 = concat(axis = var_580, interleave = var_581_interleave_0, values = (var_578, var_576_0))[name = string("op_581")]; |
| tensor<fp16, [1, 8, 1, 256]> var_582_cast_fp16 = mul(x = var_581, y = sin_s)[name = string("op_582_cast_fp16")]; |
| tensor<fp16, [1, 8, 1, 256]> q_5_cast_fp16 = add(x = var_575_cast_fp16, y = var_582_cast_fp16)[name = string("q_5_cast_fp16")]; |
| tensor<int32, [4]> transpose_0_perm_0 = const()[name = string("transpose_0_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])]; |
| tensor<int32, [4]> tile_0_reps_0 = const()[name = string("tile_0_reps_0"), val = tensor<int32, [4]>([8, 1, 1, 1])]; |
| tensor<fp16, [1, 1, 512, 256]> transpose_0_cast_fp16 = transpose(perm = transpose_0_perm_0, x = kv13_k)[name = string("transpose_111")]; |
| tensor<fp16, [8, 1, 512, 256]> tile_0_cast_fp16 = tile(reps = tile_0_reps_0, x = transpose_0_cast_fp16)[name = string("tile_0_cast_fp16")]; |
| tensor<int32, [5]> concat_0 = const()[name = string("concat_0"), val = tensor<int32, [5]>([8, 1, 1, 512, 256])]; |
| tensor<fp16, [8, 1, 1, 512, 256]> reshape_0_cast_fp16 = reshape(shape = concat_0, x = tile_0_cast_fp16)[name = string("reshape_0_cast_fp16")]; |
| tensor<int32, [5]> transpose_1_perm_0 = const()[name = string("transpose_1_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])]; |
| tensor<int32, [4]> concat_1 = const()[name = string("concat_1"), val = tensor<int32, [4]>([-1, 1, 512, 256])]; |
| tensor<fp16, [1, 8, 1, 512, 256]> transpose_1_cast_fp16 = transpose(perm = transpose_1_perm_0, x = reshape_0_cast_fp16)[name = string("transpose_110")]; |
| tensor<fp16, [8, 1, 512, 256]> reshape_1_cast_fp16 = reshape(shape = concat_1, x = transpose_1_cast_fp16)[name = string("reshape_1_cast_fp16")]; |
| tensor<int32, [4]> transpose_40_perm_0 = const()[name = string("transpose_40_perm_0"), val = tensor<int32, [4]>([1, 0, -1, -2])]; |
| tensor<int32, [4]> transpose_2_perm_0 = const()[name = string("transpose_2_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])]; |
| tensor<int32, [4]> tile_1_reps_0 = const()[name = string("tile_1_reps_0"), val = tensor<int32, [4]>([8, 1, 1, 1])]; |
| tensor<fp16, [1, 1, 512, 256]> transpose_2_cast_fp16 = transpose(perm = transpose_2_perm_0, x = kv13_v)[name = string("transpose_109")]; |
| tensor<fp16, [8, 1, 512, 256]> tile_1_cast_fp16 = tile(reps = tile_1_reps_0, x = transpose_2_cast_fp16)[name = string("tile_1_cast_fp16")]; |
| tensor<int32, [5]> concat_2 = const()[name = string("concat_2"), val = tensor<int32, [5]>([8, 1, 1, 512, 256])]; |
| tensor<fp16, [8, 1, 1, 512, 256]> reshape_2_cast_fp16 = reshape(shape = concat_2, x = tile_1_cast_fp16)[name = string("reshape_2_cast_fp16")]; |
| tensor<int32, [5]> transpose_3_perm_0 = const()[name = string("transpose_3_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])]; |
| tensor<int32, [4]> concat_3 = const()[name = string("concat_3"), val = tensor<int32, [4]>([-1, 1, 512, 256])]; |
| tensor<fp16, [1, 8, 1, 512, 256]> transpose_3_cast_fp16 = transpose(perm = transpose_3_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_108")]; |
| tensor<fp16, [8, 1, 512, 256]> reshape_3_cast_fp16 = reshape(shape = concat_3, x = transpose_3_cast_fp16)[name = string("reshape_3_cast_fp16")]; |
| tensor<int32, [4]> V_expanded_1_perm_0 = const()[name = string("V_expanded_1_perm_0"), val = tensor<int32, [4]>([1, 0, -2, -1])]; |
| bool attn_weights_1_transpose_x_0 = const()[name = string("attn_weights_1_transpose_x_0"), val = bool(false)]; |
| bool attn_weights_1_transpose_y_0 = const()[name = string("attn_weights_1_transpose_y_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 256, 512]> transpose_40_cast_fp16 = transpose(perm = transpose_40_perm_0, x = reshape_1_cast_fp16)[name = string("transpose_107")]; |
| tensor<fp16, [1, 8, 1, 512]> attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = q_5_cast_fp16, y = transpose_40_cast_fp16)[name = string("attn_weights_1_cast_fp16")]; |
| tensor<fp16, [1, 8, 1, 512]> x_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask_sliding)[name = string("x_3_cast_fp16")]; |
| tensor<int32, [1]> reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor<int32, [1]>([-1])]; |
| bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; |
| tensor<fp16, [1, 8, 1, 1]> reduce_max_0 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_3_cast_fp16)[name = string("reduce_max_0")]; |
| tensor<fp16, [1, 8, 1, 512]> var_614 = sub(x = x_3_cast_fp16, y = reduce_max_0)[name = string("op_614")]; |
| tensor<fp16, [1, 8, 1, 512]> var_620 = exp(x = var_614)[name = string("op_620")]; |
| tensor<int32, [1]> var_630_axes_0 = const()[name = string("op_630_axes_0"), val = tensor<int32, [1]>([-1])]; |
| bool var_630_keep_dims_0 = const()[name = string("op_630_keep_dims_0"), val = bool(true)]; |
| tensor<fp16, [1, 8, 1, 1]> var_630 = reduce_sum(axes = var_630_axes_0, keep_dims = var_630_keep_dims_0, x = var_620)[name = string("op_630")]; |
| tensor<fp16, [1, 8, 1, 512]> var_636_cast_fp16 = real_div(x = var_620, y = var_630)[name = string("op_636_cast_fp16")]; |
| bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; |
| bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 512, 256]> V_expanded_1_cast_fp16 = transpose(perm = V_expanded_1_perm_0, x = reshape_3_cast_fp16)[name = string("transpose_106")]; |
| tensor<fp16, [1, 8, 1, 256]> attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = var_636_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_1_cast_fp16")]; |
| tensor<int32, [4]> var_647 = const()[name = string("op_647"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_654 = const()[name = string("op_654"), val = tensor<int32, [3]>([1, 1, -1])]; |
| tensor<fp16, [1, 1, 8, 256]> var_648_cast_fp16 = transpose(perm = var_647, x = attn_output_1_cast_fp16)[name = string("transpose_105")]; |
| tensor<fp16, [1, 1, 2048]> attn_output_3_cast_fp16 = reshape(shape = var_654, x = var_648_cast_fp16)[name = string("attn_output_3_cast_fp16")]; |
| tensor<int32, [3]> var_659 = const()[name = string("op_659"), val = tensor<int32, [3]>([0, 2, 1])]; |
| string var_675_pad_type_0 = const()[name = string("op_675_pad_type_0"), val = string("valid")]; |
| int32 var_675_groups_0 = const()[name = string("op_675_groups_0"), val = int32(1)]; |
| tensor<int32, [1]> var_675_strides_0 = const()[name = string("op_675_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> var_675_pad_0 = const()[name = string("op_675_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> var_675_dilations_0 = const()[name = string("op_675_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<fp16, [1536, 2048, 1]> squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304286848))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305859776))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; |
| tensor<fp16, [1, 2048, 1]> var_660_cast_fp16 = transpose(perm = var_659, x = attn_output_3_cast_fp16)[name = string("transpose_104")]; |
| tensor<fp16, [1, 1536, 1]> var_675_cast_fp16 = conv(dilations = var_675_dilations_0, groups = var_675_groups_0, pad = var_675_pad_0, pad_type = var_675_pad_type_0, strides = var_675_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_660_cast_fp16)[name = string("op_675_cast_fp16")]; |
| tensor<int32, [3]> var_679 = const()[name = string("op_679"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_685 = const()[name = string("op_685"), val = int32(-1)]; |
| fp16 const_3_promoted_to_fp16 = const()[name = string("const_3_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_7_cast_fp16 = transpose(perm = var_679, x = var_675_cast_fp16)[name = string("transpose_103")]; |
| tensor<fp16, [1, 1, 1536]> var_687_cast_fp16 = mul(x = x_7_cast_fp16, y = const_3_promoted_to_fp16)[name = string("op_687_cast_fp16")]; |
| bool input_9_interleave_0 = const()[name = string("input_9_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_9_cast_fp16 = concat(axis = var_685, interleave = input_9_interleave_0, values = (x_7_cast_fp16, var_687_cast_fp16))[name = string("input_9_cast_fp16")]; |
| tensor<int32, [1]> normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_682_to_fp16 = const()[name = string("op_682_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_682_to_fp16, x = input_9_cast_fp16)[name = string("normed_9_cast_fp16")]; |
| tensor<int32, [2]> var_692_split_sizes_0 = const()[name = string("op_692_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_692_axis_0 = const()[name = string("op_692_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_692_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_692_cast_fp16_1 = split(axis = var_692_axis_0, split_sizes = var_692_split_sizes_0, x = normed_9_cast_fp16)[name = string("op_692_cast_fp16")]; |
| tensor<fp16, [1536]> layers_0_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305861376)))]; |
| tensor<fp16, [1, 1, 1536]> attn_output_5_cast_fp16 = mul(x = var_692_cast_fp16_0, y = layers_0_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_5_cast_fp16")]; |
| tensor<fp16, [1, 1, 1536]> x_9_cast_fp16 = add(x = hidden_states, y = attn_output_5_cast_fp16)[name = string("x_9_cast_fp16")]; |
| int32 var_701 = const()[name = string("op_701"), val = int32(-1)]; |
| fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> var_703_cast_fp16 = mul(x = x_9_cast_fp16, y = const_4_promoted_to_fp16)[name = string("op_703_cast_fp16")]; |
| bool input_11_interleave_0 = const()[name = string("input_11_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_11_cast_fp16 = concat(axis = var_701, interleave = input_11_interleave_0, values = (x_9_cast_fp16, var_703_cast_fp16))[name = string("input_11_cast_fp16")]; |
| tensor<int32, [1]> normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_698_to_fp16 = const()[name = string("op_698_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_698_to_fp16, x = input_11_cast_fp16)[name = string("normed_13_cast_fp16")]; |
| tensor<int32, [2]> var_708_split_sizes_0 = const()[name = string("op_708_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_708_axis_0 = const()[name = string("op_708_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_708_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_708_cast_fp16_1 = split(axis = var_708_axis_0, split_sizes = var_708_split_sizes_0, x = normed_13_cast_fp16)[name = string("op_708_cast_fp16")]; |
| tensor<fp16, [1536]> layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305864512)))]; |
| tensor<fp16, [1, 1, 1536]> h_3_cast_fp16 = mul(x = var_708_cast_fp16_0, y = layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_3_cast_fp16")]; |
| tensor<int32, [3]> var_719 = const()[name = string("op_719"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> input_13_axes_0 = const()[name = string("input_13_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_720 = transpose(perm = var_719, x = h_3_cast_fp16)[name = string("transpose_102")]; |
| tensor<fp16, [1, 1536, 1, 1]> input_13 = expand_dims(axes = input_13_axes_0, x = var_720)[name = string("input_13")]; |
| string gate_1_pad_type_0 = const()[name = string("gate_1_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gate_1_strides_0 = const()[name = string("gate_1_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gate_1_pad_0 = const()[name = string("gate_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gate_1_dilations_0 = const()[name = string("gate_1_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gate_1_groups_0 = const()[name = string("gate_1_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 12288, 1, 1]> gate_1 = conv(dilations = gate_1_dilations_0, groups = gate_1_groups_0, pad = gate_1_pad_0, pad_type = gate_1_pad_type_0, strides = gate_1_strides_0, weight = layers_0_mlp_gate_proj_weight_palettized, x = input_13)[name = string("gate_1")]; |
| string up_1_pad_type_0 = const()[name = string("up_1_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> up_1_strides_0 = const()[name = string("up_1_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> up_1_pad_0 = const()[name = string("up_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> up_1_dilations_0 = const()[name = string("up_1_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 up_1_groups_0 = const()[name = string("up_1_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 12288, 1, 1]> up_1 = conv(dilations = up_1_dilations_0, groups = up_1_groups_0, pad = up_1_pad_0, pad_type = up_1_pad_type_0, strides = up_1_strides_0, weight = layers_0_mlp_up_proj_weight_palettized, x = input_13)[name = string("up_1")]; |
| string gate_3_mode_0 = const()[name = string("gate_3_mode_0"), val = string("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 12288, 1, 1]> gate_3 = gelu(mode = gate_3_mode_0, x = gate_1)[name = string("gate_3")]; |
| tensor<fp16, [1, 12288, 1, 1]> input_15 = mul(x = gate_3, y = up_1)[name = string("input_15")]; |
| string mlp_out_1_pad_type_0 = const()[name = string("mlp_out_1_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> mlp_out_1_strides_0 = const()[name = string("mlp_out_1_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> mlp_out_1_pad_0 = const()[name = string("mlp_out_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> mlp_out_1_dilations_0 = const()[name = string("mlp_out_1_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 mlp_out_1_groups_0 = const()[name = string("mlp_out_1_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 1536, 1, 1]> mlp_out_1 = conv(dilations = mlp_out_1_dilations_0, groups = mlp_out_1_groups_0, pad = mlp_out_1_pad_0, pad_type = mlp_out_1_pad_type_0, strides = mlp_out_1_strides_0, weight = layers_0_mlp_down_proj_weight_palettized, x = input_15)[name = string("mlp_out_1")]; |
| tensor<int32, [1]> var_760_axes_0 = const()[name = string("op_760_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_760 = squeeze(axes = var_760_axes_0, x = mlp_out_1)[name = string("op_760")]; |
| tensor<int32, [3]> var_764 = const()[name = string("op_764"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_770 = const()[name = string("op_770"), val = int32(-1)]; |
| fp16 const_5_promoted = const()[name = string("const_5_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_11 = transpose(perm = var_764, x = var_760)[name = string("transpose_101")]; |
| tensor<fp16, [1, 1, 1536]> var_772 = mul(x = x_11, y = const_5_promoted)[name = string("op_772")]; |
| bool input_17_interleave_0 = const()[name = string("input_17_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_17 = concat(axis = var_770, interleave = input_17_interleave_0, values = (x_11, var_772))[name = string("input_17")]; |
| tensor<int32, [1]> normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_767_to_fp16 = const()[name = string("op_767_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_767_to_fp16, x = input_17)[name = string("normed_17_cast_fp16")]; |
| tensor<int32, [2]> var_777_split_sizes_0 = const()[name = string("op_777_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_777_axis_0 = const()[name = string("op_777_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_777_0, tensor<fp16, [1, 1, 1536]> var_777_1 = split(axis = var_777_axis_0, split_sizes = var_777_split_sizes_0, x = normed_17_cast_fp16)[name = string("op_777")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_3 = mul(x = var_777_0, y = layers_0_post_feedforward_layernorm_weight)[name = string("hidden_states_3")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_5_cast_fp16 = add(x = x_9_cast_fp16, y = hidden_states_3)[name = string("hidden_states_5_cast_fp16")]; |
| tensor<int32, [3]> per_layer_slice_1_begin_0 = const()[name = string("per_layer_slice_1_begin_0"), val = tensor<int32, [3]>([0, 0, 6400])]; |
| tensor<int32, [3]> per_layer_slice_1_end_0 = const()[name = string("per_layer_slice_1_end_0"), val = tensor<int32, [3]>([1, 1, 6656])]; |
| tensor<bool, [3]> per_layer_slice_1_end_mask_0 = const()[name = string("per_layer_slice_1_end_mask_0"), val = tensor<bool, [3]>([true, true, false])]; |
| tensor<fp16, [1, 1, 256]> per_layer_slice_1_cast_fp16 = slice_by_index(begin = per_layer_slice_1_begin_0, end = per_layer_slice_1_end_0, end_mask = per_layer_slice_1_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_1_cast_fp16")]; |
| tensor<int32, [3]> var_805 = const()[name = string("op_805"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> input_19_axes_0 = const()[name = string("input_19_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_806 = transpose(perm = var_805, x = hidden_states_5_cast_fp16)[name = string("transpose_100")]; |
| tensor<fp16, [1, 1536, 1, 1]> input_19 = expand_dims(axes = input_19_axes_0, x = var_806)[name = string("input_19")]; |
| string gated_1_pad_type_0 = const()[name = string("gated_1_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gated_1_strides_0 = const()[name = string("gated_1_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gated_1_pad_0 = const()[name = string("gated_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gated_1_dilations_0 = const()[name = string("gated_1_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gated_1_groups_0 = const()[name = string("gated_1_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 256, 1, 1]> gated_1 = conv(dilations = gated_1_dilations_0, groups = gated_1_groups_0, pad = gated_1_pad_0, pad_type = gated_1_pad_type_0, strides = gated_1_strides_0, weight = layers_0_per_layer_input_gate_weight_palettized, x = input_19)[name = string("gated_1")]; |
| string gated_3_mode_0 = const()[name = string("gated_3_mode_0"), val = string("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 256, 1, 1]> gated_3 = gelu(mode = gated_3_mode_0, x = gated_1)[name = string("gated_3")]; |
| tensor<int32, [3]> var_825 = const()[name = string("op_825"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> per_layer_slice_conv_1_axes_0 = const()[name = string("per_layer_slice_conv_1_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 256, 1]> var_826_cast_fp16 = transpose(perm = var_825, x = per_layer_slice_1_cast_fp16)[name = string("transpose_99")]; |
| tensor<fp16, [1, 256, 1, 1]> per_layer_slice_conv_1_cast_fp16 = expand_dims(axes = per_layer_slice_conv_1_axes_0, x = var_826_cast_fp16)[name = string("per_layer_slice_conv_1_cast_fp16")]; |
| tensor<fp16, [1, 256, 1, 1]> input_21_cast_fp16 = mul(x = gated_3, y = per_layer_slice_conv_1_cast_fp16)[name = string("input_21_cast_fp16")]; |
| string gated_5_pad_type_0 = const()[name = string("gated_5_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gated_5_strides_0 = const()[name = string("gated_5_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gated_5_pad_0 = const()[name = string("gated_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gated_5_dilations_0 = const()[name = string("gated_5_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gated_5_groups_0 = const()[name = string("gated_5_groups_0"), val = int32(1)]; |
| tensor<fp16, [1536, 256, 1, 1]> layers_0_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305867648))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306064320))))[name = string("layers_0_per_layer_projection_weight_promoted_to_fp16_palettized")]; |
| tensor<fp16, [1, 1536, 1, 1]> gated_5_cast_fp16 = conv(dilations = gated_5_dilations_0, groups = gated_5_groups_0, pad = gated_5_pad_0, pad_type = gated_5_pad_type_0, strides = gated_5_strides_0, weight = layers_0_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_21_cast_fp16)[name = string("gated_5_cast_fp16")]; |
| tensor<int32, [1]> var_842_axes_0 = const()[name = string("op_842_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_842_cast_fp16 = squeeze(axes = var_842_axes_0, x = gated_5_cast_fp16)[name = string("op_842_cast_fp16")]; |
| tensor<int32, [3]> var_846 = const()[name = string("op_846"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_852 = const()[name = string("op_852"), val = int32(-1)]; |
| fp16 const_6_promoted_to_fp16 = const()[name = string("const_6_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_13_cast_fp16 = transpose(perm = var_846, x = var_842_cast_fp16)[name = string("transpose_98")]; |
| tensor<fp16, [1, 1, 1536]> var_854_cast_fp16 = mul(x = x_13_cast_fp16, y = const_6_promoted_to_fp16)[name = string("op_854_cast_fp16")]; |
| bool input_23_interleave_0 = const()[name = string("input_23_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_23_cast_fp16 = concat(axis = var_852, interleave = input_23_interleave_0, values = (x_13_cast_fp16, var_854_cast_fp16))[name = string("input_23_cast_fp16")]; |
| tensor<int32, [1]> normed_21_axes_0 = const()[name = string("normed_21_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_849_to_fp16 = const()[name = string("op_849_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_21_cast_fp16 = layer_norm(axes = normed_21_axes_0, epsilon = var_849_to_fp16, x = input_23_cast_fp16)[name = string("normed_21_cast_fp16")]; |
| tensor<int32, [2]> var_859_split_sizes_0 = const()[name = string("op_859_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_859_axis_0 = const()[name = string("op_859_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_859_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_859_cast_fp16_1 = split(axis = var_859_axis_0, split_sizes = var_859_split_sizes_0, x = normed_21_cast_fp16)[name = string("op_859_cast_fp16")]; |
| tensor<fp16, [1536]> layers_0_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_0_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306065920)))]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_9_cast_fp16 = mul(x = var_859_cast_fp16_0, y = layers_0_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_9_cast_fp16")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_11_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; |
| tensor<fp16, [1]> const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.92p-1])]; |
| tensor<fp16, [1, 1, 1536]> x_15_cast_fp16 = mul(x = hidden_states_11_cast_fp16, y = const_7_promoted_to_fp16)[name = string("x_15_cast_fp16")]; |
| int32 var_874 = const()[name = string("op_874"), val = int32(-1)]; |
| fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> var_876_cast_fp16 = mul(x = x_15_cast_fp16, y = const_8_promoted_to_fp16)[name = string("op_876_cast_fp16")]; |
| bool input_25_interleave_0 = const()[name = string("input_25_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_25_cast_fp16 = concat(axis = var_874, interleave = input_25_interleave_0, values = (x_15_cast_fp16, var_876_cast_fp16))[name = string("input_25_cast_fp16")]; |
| tensor<int32, [1]> normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_871_to_fp16 = const()[name = string("op_871_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_871_to_fp16, x = input_25_cast_fp16)[name = string("normed_25_cast_fp16")]; |
| tensor<int32, [2]> var_881_split_sizes_0 = const()[name = string("op_881_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_881_axis_0 = const()[name = string("op_881_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_881_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_881_cast_fp16_1 = split(axis = var_881_axis_0, split_sizes = var_881_split_sizes_0, x = normed_25_cast_fp16)[name = string("op_881_cast_fp16")]; |
| tensor<fp16, [1536]> layers_1_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306069056)))]; |
| tensor<fp16, [1, 1, 1536]> h_7_cast_fp16 = mul(x = var_881_cast_fp16_0, y = layers_1_input_layernorm_weight_promoted_to_fp16)[name = string("h_7_cast_fp16")]; |
| tensor<int32, [3]> var_887 = const()[name = string("op_887"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> var_890_axes_0 = const()[name = string("op_890_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_888_cast_fp16 = transpose(perm = var_887, x = h_7_cast_fp16)[name = string("transpose_97")]; |
| tensor<fp16, [1, 1536, 1, 1]> var_890_cast_fp16 = expand_dims(axes = var_890_axes_0, x = var_888_cast_fp16)[name = string("op_890_cast_fp16")]; |
| string var_906_pad_type_0 = const()[name = string("op_906_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> var_906_strides_0 = const()[name = string("op_906_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> var_906_pad_0 = const()[name = string("op_906_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> var_906_dilations_0 = const()[name = string("op_906_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 var_906_groups_0 = const()[name = string("op_906_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 2048, 1, 1]> var_906 = conv(dilations = var_906_dilations_0, groups = var_906_groups_0, pad = var_906_pad_0, pad_type = var_906_pad_type_0, strides = var_906_strides_0, weight = layers_1_self_attn_q_proj_weight_palettized, x = var_890_cast_fp16)[name = string("op_906")]; |
| tensor<int32, [4]> var_911 = const()[name = string("op_911"), val = tensor<int32, [4]>([1, 8, 256, 1])]; |
| tensor<fp16, [1, 8, 256, 1]> var_912 = reshape(shape = var_911, x = var_906)[name = string("op_912")]; |
| tensor<int32, [4]> var_917 = const()[name = string("op_917"), val = tensor<int32, [4]>([0, 1, 3, 2])]; |
| tensor<int32, [3]> var_927 = const()[name = string("op_927"), val = tensor<int32, [3]>([1, 8, 256])]; |
| tensor<fp16, [1, 8, 1, 256]> var_918 = transpose(perm = var_917, x = var_912)[name = string("transpose_96")]; |
| tensor<fp16, [1, 8, 256]> x_17 = reshape(shape = var_927, x = var_918)[name = string("x_17")]; |
| int32 var_933 = const()[name = string("op_933"), val = int32(-1)]; |
| fp16 const_9_promoted = const()[name = string("const_9_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 8, 256]> var_935 = mul(x = x_17, y = const_9_promoted)[name = string("op_935")]; |
| bool input_29_interleave_0 = const()[name = string("input_29_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 512]> input_29 = concat(axis = var_933, interleave = input_29_interleave_0, values = (x_17, var_935))[name = string("input_29")]; |
| tensor<int32, [1]> normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_930_to_fp16 = const()[name = string("op_930_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 8, 512]> normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_930_to_fp16, x = input_29)[name = string("normed_29_cast_fp16")]; |
| tensor<int32, [2]> var_940_split_sizes_0 = const()[name = string("op_940_split_sizes_0"), val = tensor<int32, [2]>([256, 256])]; |
| int32 var_940_axis_0 = const()[name = string("op_940_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 8, 256]> var_940_0, tensor<fp16, [1, 8, 256]> var_940_1 = split(axis = var_940_axis_0, split_sizes = var_940_split_sizes_0, x = normed_29_cast_fp16)[name = string("op_940")]; |
| tensor<fp16, [1, 8, 256]> var_942 = mul(x = var_940_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_942")]; |
| tensor<int32, [4]> var_947 = const()[name = string("op_947"), val = tensor<int32, [4]>([1, 8, 1, 256])]; |
| tensor<fp16, [1, 8, 1, 256]> q_9 = reshape(shape = var_947, x = var_942)[name = string("q_9")]; |
| tensor<fp16, [1, 8, 1, 256]> var_949_cast_fp16 = mul(x = q_9, y = cos_s)[name = string("op_949_cast_fp16")]; |
| tensor<int32, [2]> var_950_split_sizes_0 = const()[name = string("op_950_split_sizes_0"), val = tensor<int32, [2]>([128, 128])]; |
| int32 var_950_axis_0 = const()[name = string("op_950_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 8, 1, 128]> var_950_0, tensor<fp16, [1, 8, 1, 128]> var_950_1 = split(axis = var_950_axis_0, split_sizes = var_950_split_sizes_0, x = q_9)[name = string("op_950")]; |
| fp16 const_10_promoted = const()[name = string("const_10_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 8, 1, 128]> var_952 = mul(x = var_950_1, y = const_10_promoted)[name = string("op_952")]; |
| int32 var_954 = const()[name = string("op_954"), val = int32(-1)]; |
| bool var_955_interleave_0 = const()[name = string("op_955_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1, 256]> var_955 = concat(axis = var_954, interleave = var_955_interleave_0, values = (var_952, var_950_0))[name = string("op_955")]; |
| tensor<fp16, [1, 8, 1, 256]> var_956_cast_fp16 = mul(x = var_955, y = sin_s)[name = string("op_956_cast_fp16")]; |
| tensor<fp16, [1, 8, 1, 256]> q_11_cast_fp16 = add(x = var_949_cast_fp16, y = var_956_cast_fp16)[name = string("q_11_cast_fp16")]; |
| bool attn_weights_5_transpose_x_0 = const()[name = string("attn_weights_5_transpose_x_0"), val = bool(false)]; |
| bool attn_weights_5_transpose_y_0 = const()[name = string("attn_weights_5_transpose_y_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1, 512]> attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = q_11_cast_fp16, y = transpose_40_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; |
| tensor<fp16, [1, 8, 1, 512]> x_19_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask_sliding)[name = string("x_19_cast_fp16")]; |
| tensor<int32, [1]> reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor<int32, [1]>([-1])]; |
| bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; |
| tensor<fp16, [1, 8, 1, 1]> reduce_max_1 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_19_cast_fp16)[name = string("reduce_max_1")]; |
| tensor<fp16, [1, 8, 1, 512]> var_988 = sub(x = x_19_cast_fp16, y = reduce_max_1)[name = string("op_988")]; |
| tensor<fp16, [1, 8, 1, 512]> var_994 = exp(x = var_988)[name = string("op_994")]; |
| tensor<int32, [1]> var_1004_axes_0 = const()[name = string("op_1004_axes_0"), val = tensor<int32, [1]>([-1])]; |
| bool var_1004_keep_dims_0 = const()[name = string("op_1004_keep_dims_0"), val = bool(true)]; |
| tensor<fp16, [1, 8, 1, 1]> var_1004 = reduce_sum(axes = var_1004_axes_0, keep_dims = var_1004_keep_dims_0, x = var_994)[name = string("op_1004")]; |
| tensor<fp16, [1, 8, 1, 512]> var_1010_cast_fp16 = real_div(x = var_994, y = var_1004)[name = string("op_1010_cast_fp16")]; |
| bool attn_output_7_transpose_x_0 = const()[name = string("attn_output_7_transpose_x_0"), val = bool(false)]; |
| bool attn_output_7_transpose_y_0 = const()[name = string("attn_output_7_transpose_y_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1, 256]> attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_0, transpose_y = attn_output_7_transpose_y_0, x = var_1010_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_7_cast_fp16")]; |
| tensor<int32, [4]> var_1021 = const()[name = string("op_1021"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_1028 = const()[name = string("op_1028"), val = tensor<int32, [3]>([1, 1, -1])]; |
| tensor<fp16, [1, 1, 8, 256]> var_1022_cast_fp16 = transpose(perm = var_1021, x = attn_output_7_cast_fp16)[name = string("transpose_95")]; |
| tensor<fp16, [1, 1, 2048]> attn_output_9_cast_fp16 = reshape(shape = var_1028, x = var_1022_cast_fp16)[name = string("attn_output_9_cast_fp16")]; |
| tensor<int32, [3]> var_1033 = const()[name = string("op_1033"), val = tensor<int32, [3]>([0, 2, 1])]; |
| string var_1049_pad_type_0 = const()[name = string("op_1049_pad_type_0"), val = string("valid")]; |
| int32 var_1049_groups_0 = const()[name = string("op_1049_groups_0"), val = int32(1)]; |
| tensor<int32, [1]> var_1049_strides_0 = const()[name = string("op_1049_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> var_1049_pad_0 = const()[name = string("op_1049_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> var_1049_dilations_0 = const()[name = string("op_1049_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<fp16, [1536, 2048, 1]> squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306072192))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307645120))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; |
| tensor<fp16, [1, 2048, 1]> var_1034_cast_fp16 = transpose(perm = var_1033, x = attn_output_9_cast_fp16)[name = string("transpose_94")]; |
| tensor<fp16, [1, 1536, 1]> var_1049_cast_fp16 = conv(dilations = var_1049_dilations_0, groups = var_1049_groups_0, pad = var_1049_pad_0, pad_type = var_1049_pad_type_0, strides = var_1049_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_1034_cast_fp16)[name = string("op_1049_cast_fp16")]; |
| tensor<int32, [3]> var_1053 = const()[name = string("op_1053"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_1059 = const()[name = string("op_1059"), val = int32(-1)]; |
| fp16 const_11_promoted_to_fp16 = const()[name = string("const_11_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_23_cast_fp16 = transpose(perm = var_1053, x = var_1049_cast_fp16)[name = string("transpose_93")]; |
| tensor<fp16, [1, 1, 1536]> var_1061_cast_fp16 = mul(x = x_23_cast_fp16, y = const_11_promoted_to_fp16)[name = string("op_1061_cast_fp16")]; |
| bool input_33_interleave_0 = const()[name = string("input_33_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_33_cast_fp16 = concat(axis = var_1059, interleave = input_33_interleave_0, values = (x_23_cast_fp16, var_1061_cast_fp16))[name = string("input_33_cast_fp16")]; |
| tensor<int32, [1]> normed_33_axes_0 = const()[name = string("normed_33_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_1056_to_fp16 = const()[name = string("op_1056_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_33_cast_fp16 = layer_norm(axes = normed_33_axes_0, epsilon = var_1056_to_fp16, x = input_33_cast_fp16)[name = string("normed_33_cast_fp16")]; |
| tensor<int32, [2]> var_1066_split_sizes_0 = const()[name = string("op_1066_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_1066_axis_0 = const()[name = string("op_1066_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_1066_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1066_cast_fp16_1 = split(axis = var_1066_axis_0, split_sizes = var_1066_split_sizes_0, x = normed_33_cast_fp16)[name = string("op_1066_cast_fp16")]; |
| tensor<fp16, [1536]> layers_1_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307646720)))]; |
| tensor<fp16, [1, 1, 1536]> attn_output_11_cast_fp16 = mul(x = var_1066_cast_fp16_0, y = layers_1_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_11_cast_fp16")]; |
| tensor<fp16, [1, 1, 1536]> x_25_cast_fp16 = add(x = x_15_cast_fp16, y = attn_output_11_cast_fp16)[name = string("x_25_cast_fp16")]; |
| int32 var_1075 = const()[name = string("op_1075"), val = int32(-1)]; |
| fp16 const_12_promoted_to_fp16 = const()[name = string("const_12_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> var_1077_cast_fp16 = mul(x = x_25_cast_fp16, y = const_12_promoted_to_fp16)[name = string("op_1077_cast_fp16")]; |
| bool input_35_interleave_0 = const()[name = string("input_35_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_35_cast_fp16 = concat(axis = var_1075, interleave = input_35_interleave_0, values = (x_25_cast_fp16, var_1077_cast_fp16))[name = string("input_35_cast_fp16")]; |
| tensor<int32, [1]> normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_1072_to_fp16 = const()[name = string("op_1072_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_1072_to_fp16, x = input_35_cast_fp16)[name = string("normed_37_cast_fp16")]; |
| tensor<int32, [2]> var_1082_split_sizes_0 = const()[name = string("op_1082_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_1082_axis_0 = const()[name = string("op_1082_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_1082_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1082_cast_fp16_1 = split(axis = var_1082_axis_0, split_sizes = var_1082_split_sizes_0, x = normed_37_cast_fp16)[name = string("op_1082_cast_fp16")]; |
| tensor<fp16, [1536]> layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307649856)))]; |
| tensor<fp16, [1, 1, 1536]> h_9_cast_fp16 = mul(x = var_1082_cast_fp16_0, y = layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_9_cast_fp16")]; |
| tensor<int32, [3]> var_1093 = const()[name = string("op_1093"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_1094 = transpose(perm = var_1093, x = h_9_cast_fp16)[name = string("transpose_92")]; |
| tensor<fp16, [1, 1536, 1, 1]> input_37 = expand_dims(axes = input_37_axes_0, x = var_1094)[name = string("input_37")]; |
| string gate_5_pad_type_0 = const()[name = string("gate_5_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gate_5_strides_0 = const()[name = string("gate_5_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gate_5_pad_0 = const()[name = string("gate_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gate_5_dilations_0 = const()[name = string("gate_5_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gate_5_groups_0 = const()[name = string("gate_5_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 12288, 1, 1]> gate_5 = conv(dilations = gate_5_dilations_0, groups = gate_5_groups_0, pad = gate_5_pad_0, pad_type = gate_5_pad_type_0, strides = gate_5_strides_0, weight = layers_1_mlp_gate_proj_weight_palettized, x = input_37)[name = string("gate_5")]; |
| string up_3_pad_type_0 = const()[name = string("up_3_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> up_3_strides_0 = const()[name = string("up_3_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> up_3_pad_0 = const()[name = string("up_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> up_3_dilations_0 = const()[name = string("up_3_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 up_3_groups_0 = const()[name = string("up_3_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 12288, 1, 1]> up_3 = conv(dilations = up_3_dilations_0, groups = up_3_groups_0, pad = up_3_pad_0, pad_type = up_3_pad_type_0, strides = up_3_strides_0, weight = layers_1_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_3")]; |
| string gate_7_mode_0 = const()[name = string("gate_7_mode_0"), val = string("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 12288, 1, 1]> gate_7 = gelu(mode = gate_7_mode_0, x = gate_5)[name = string("gate_7")]; |
| tensor<fp16, [1, 12288, 1, 1]> input_39 = mul(x = gate_7, y = up_3)[name = string("input_39")]; |
| string mlp_out_3_pad_type_0 = const()[name = string("mlp_out_3_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> mlp_out_3_strides_0 = const()[name = string("mlp_out_3_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> mlp_out_3_pad_0 = const()[name = string("mlp_out_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> mlp_out_3_dilations_0 = const()[name = string("mlp_out_3_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 mlp_out_3_groups_0 = const()[name = string("mlp_out_3_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 1536, 1, 1]> mlp_out_3 = conv(dilations = mlp_out_3_dilations_0, groups = mlp_out_3_groups_0, pad = mlp_out_3_pad_0, pad_type = mlp_out_3_pad_type_0, strides = mlp_out_3_strides_0, weight = layers_1_mlp_down_proj_weight_palettized, x = input_39)[name = string("mlp_out_3")]; |
| tensor<int32, [1]> var_1134_axes_0 = const()[name = string("op_1134_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_1134 = squeeze(axes = var_1134_axes_0, x = mlp_out_3)[name = string("op_1134")]; |
| tensor<int32, [3]> var_1138 = const()[name = string("op_1138"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_1144 = const()[name = string("op_1144"), val = int32(-1)]; |
| fp16 const_13_promoted = const()[name = string("const_13_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_27 = transpose(perm = var_1138, x = var_1134)[name = string("transpose_91")]; |
| tensor<fp16, [1, 1, 1536]> var_1146 = mul(x = x_27, y = const_13_promoted)[name = string("op_1146")]; |
| bool input_41_interleave_0 = const()[name = string("input_41_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_41 = concat(axis = var_1144, interleave = input_41_interleave_0, values = (x_27, var_1146))[name = string("input_41")]; |
| tensor<int32, [1]> normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_1141_to_fp16 = const()[name = string("op_1141_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_1141_to_fp16, x = input_41)[name = string("normed_41_cast_fp16")]; |
| tensor<int32, [2]> var_1151_split_sizes_0 = const()[name = string("op_1151_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_1151_axis_0 = const()[name = string("op_1151_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_1151_0, tensor<fp16, [1, 1, 1536]> var_1151_1 = split(axis = var_1151_axis_0, split_sizes = var_1151_split_sizes_0, x = normed_41_cast_fp16)[name = string("op_1151")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_13 = mul(x = var_1151_0, y = layers_1_post_feedforward_layernorm_weight)[name = string("hidden_states_13")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_15_cast_fp16 = add(x = x_25_cast_fp16, y = hidden_states_13)[name = string("hidden_states_15_cast_fp16")]; |
| tensor<int32, [3]> per_layer_slice_3_begin_0 = const()[name = string("per_layer_slice_3_begin_0"), val = tensor<int32, [3]>([0, 0, 6656])]; |
| tensor<int32, [3]> per_layer_slice_3_end_0 = const()[name = string("per_layer_slice_3_end_0"), val = tensor<int32, [3]>([1, 1, 6912])]; |
| tensor<bool, [3]> per_layer_slice_3_end_mask_0 = const()[name = string("per_layer_slice_3_end_mask_0"), val = tensor<bool, [3]>([true, true, false])]; |
| tensor<fp16, [1, 1, 256]> per_layer_slice_3_cast_fp16 = slice_by_index(begin = per_layer_slice_3_begin_0, end = per_layer_slice_3_end_0, end_mask = per_layer_slice_3_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_3_cast_fp16")]; |
| tensor<int32, [3]> var_1179 = const()[name = string("op_1179"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> input_43_axes_0 = const()[name = string("input_43_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_1180 = transpose(perm = var_1179, x = hidden_states_15_cast_fp16)[name = string("transpose_90")]; |
| tensor<fp16, [1, 1536, 1, 1]> input_43 = expand_dims(axes = input_43_axes_0, x = var_1180)[name = string("input_43")]; |
| string gated_7_pad_type_0 = const()[name = string("gated_7_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gated_7_strides_0 = const()[name = string("gated_7_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gated_7_pad_0 = const()[name = string("gated_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gated_7_dilations_0 = const()[name = string("gated_7_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gated_7_groups_0 = const()[name = string("gated_7_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 256, 1, 1]> gated_7 = conv(dilations = gated_7_dilations_0, groups = gated_7_groups_0, pad = gated_7_pad_0, pad_type = gated_7_pad_type_0, strides = gated_7_strides_0, weight = layers_1_per_layer_input_gate_weight_palettized, x = input_43)[name = string("gated_7")]; |
| string gated_9_mode_0 = const()[name = string("gated_9_mode_0"), val = string("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 256, 1, 1]> gated_9 = gelu(mode = gated_9_mode_0, x = gated_7)[name = string("gated_9")]; |
| tensor<int32, [3]> var_1199 = const()[name = string("op_1199"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> per_layer_slice_conv_3_axes_0 = const()[name = string("per_layer_slice_conv_3_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 256, 1]> var_1200_cast_fp16 = transpose(perm = var_1199, x = per_layer_slice_3_cast_fp16)[name = string("transpose_89")]; |
| tensor<fp16, [1, 256, 1, 1]> per_layer_slice_conv_3_cast_fp16 = expand_dims(axes = per_layer_slice_conv_3_axes_0, x = var_1200_cast_fp16)[name = string("per_layer_slice_conv_3_cast_fp16")]; |
| tensor<fp16, [1, 256, 1, 1]> input_45_cast_fp16 = mul(x = gated_9, y = per_layer_slice_conv_3_cast_fp16)[name = string("input_45_cast_fp16")]; |
| string gated_11_pad_type_0 = const()[name = string("gated_11_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gated_11_strides_0 = const()[name = string("gated_11_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gated_11_pad_0 = const()[name = string("gated_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gated_11_dilations_0 = const()[name = string("gated_11_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gated_11_groups_0 = const()[name = string("gated_11_groups_0"), val = int32(1)]; |
| tensor<fp16, [1536, 256, 1, 1]> layers_1_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307652992))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307849664))))[name = string("layers_1_per_layer_projection_weight_promoted_to_fp16_palettized")]; |
| tensor<fp16, [1, 1536, 1, 1]> gated_11_cast_fp16 = conv(dilations = gated_11_dilations_0, groups = gated_11_groups_0, pad = gated_11_pad_0, pad_type = gated_11_pad_type_0, strides = gated_11_strides_0, weight = layers_1_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_45_cast_fp16)[name = string("gated_11_cast_fp16")]; |
| tensor<int32, [1]> var_1216_axes_0 = const()[name = string("op_1216_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_1216_cast_fp16 = squeeze(axes = var_1216_axes_0, x = gated_11_cast_fp16)[name = string("op_1216_cast_fp16")]; |
| tensor<int32, [3]> var_1220 = const()[name = string("op_1220"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_1226 = const()[name = string("op_1226"), val = int32(-1)]; |
| fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_29_cast_fp16 = transpose(perm = var_1220, x = var_1216_cast_fp16)[name = string("transpose_88")]; |
| tensor<fp16, [1, 1, 1536]> var_1228_cast_fp16 = mul(x = x_29_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_1228_cast_fp16")]; |
| bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_47_cast_fp16 = concat(axis = var_1226, interleave = input_47_interleave_0, values = (x_29_cast_fp16, var_1228_cast_fp16))[name = string("input_47_cast_fp16")]; |
| tensor<int32, [1]> normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_1223_to_fp16 = const()[name = string("op_1223_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_1223_to_fp16, x = input_47_cast_fp16)[name = string("normed_45_cast_fp16")]; |
| tensor<int32, [2]> var_1233_split_sizes_0 = const()[name = string("op_1233_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_1233_axis_0 = const()[name = string("op_1233_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_1233_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1233_cast_fp16_1 = split(axis = var_1233_axis_0, split_sizes = var_1233_split_sizes_0, x = normed_45_cast_fp16)[name = string("op_1233_cast_fp16")]; |
| tensor<fp16, [1536]> layers_1_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_1_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307851264)))]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_19_cast_fp16 = mul(x = var_1233_cast_fp16_0, y = layers_1_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_19_cast_fp16")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_21_cast_fp16 = add(x = hidden_states_15_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; |
| tensor<fp16, [1]> const_15_promoted_to_fp16 = const()[name = string("const_15_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.a6p-1])]; |
| tensor<fp16, [1, 1, 1536]> x_31_cast_fp16 = mul(x = hidden_states_21_cast_fp16, y = const_15_promoted_to_fp16)[name = string("x_31_cast_fp16")]; |
| int32 var_1248 = const()[name = string("op_1248"), val = int32(-1)]; |
| fp16 const_16_promoted_to_fp16 = const()[name = string("const_16_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> var_1250_cast_fp16 = mul(x = x_31_cast_fp16, y = const_16_promoted_to_fp16)[name = string("op_1250_cast_fp16")]; |
| bool input_49_interleave_0 = const()[name = string("input_49_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_49_cast_fp16 = concat(axis = var_1248, interleave = input_49_interleave_0, values = (x_31_cast_fp16, var_1250_cast_fp16))[name = string("input_49_cast_fp16")]; |
| tensor<int32, [1]> normed_49_axes_0 = const()[name = string("normed_49_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_1245_to_fp16 = const()[name = string("op_1245_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_49_cast_fp16 = layer_norm(axes = normed_49_axes_0, epsilon = var_1245_to_fp16, x = input_49_cast_fp16)[name = string("normed_49_cast_fp16")]; |
| tensor<int32, [2]> var_1255_split_sizes_0 = const()[name = string("op_1255_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_1255_axis_0 = const()[name = string("op_1255_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_1255_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1255_cast_fp16_1 = split(axis = var_1255_axis_0, split_sizes = var_1255_split_sizes_0, x = normed_49_cast_fp16)[name = string("op_1255_cast_fp16")]; |
| tensor<fp16, [1536]> layers_2_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307854400)))]; |
| tensor<fp16, [1, 1, 1536]> h_13_cast_fp16 = mul(x = var_1255_cast_fp16_0, y = layers_2_input_layernorm_weight_promoted_to_fp16)[name = string("h_13_cast_fp16")]; |
| tensor<int32, [3]> var_1261 = const()[name = string("op_1261"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> var_1264_axes_0 = const()[name = string("op_1264_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_1262_cast_fp16 = transpose(perm = var_1261, x = h_13_cast_fp16)[name = string("transpose_87")]; |
| tensor<fp16, [1, 1536, 1, 1]> var_1264_cast_fp16 = expand_dims(axes = var_1264_axes_0, x = var_1262_cast_fp16)[name = string("op_1264_cast_fp16")]; |
| string var_1280_pad_type_0 = const()[name = string("op_1280_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> var_1280_strides_0 = const()[name = string("op_1280_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> var_1280_pad_0 = const()[name = string("op_1280_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> var_1280_dilations_0 = const()[name = string("op_1280_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 var_1280_groups_0 = const()[name = string("op_1280_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 2048, 1, 1]> var_1280 = conv(dilations = var_1280_dilations_0, groups = var_1280_groups_0, pad = var_1280_pad_0, pad_type = var_1280_pad_type_0, strides = var_1280_strides_0, weight = layers_2_self_attn_q_proj_weight_palettized, x = var_1264_cast_fp16)[name = string("op_1280")]; |
| tensor<int32, [4]> var_1285 = const()[name = string("op_1285"), val = tensor<int32, [4]>([1, 8, 256, 1])]; |
| tensor<fp16, [1, 8, 256, 1]> var_1286 = reshape(shape = var_1285, x = var_1280)[name = string("op_1286")]; |
| tensor<int32, [4]> var_1291 = const()[name = string("op_1291"), val = tensor<int32, [4]>([0, 1, 3, 2])]; |
| tensor<int32, [3]> var_1301 = const()[name = string("op_1301"), val = tensor<int32, [3]>([1, 8, 256])]; |
| tensor<fp16, [1, 8, 1, 256]> var_1292 = transpose(perm = var_1291, x = var_1286)[name = string("transpose_86")]; |
| tensor<fp16, [1, 8, 256]> x_33 = reshape(shape = var_1301, x = var_1292)[name = string("x_33")]; |
| int32 var_1307 = const()[name = string("op_1307"), val = int32(-1)]; |
| fp16 const_17_promoted = const()[name = string("const_17_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 8, 256]> var_1309 = mul(x = x_33, y = const_17_promoted)[name = string("op_1309")]; |
| bool input_53_interleave_0 = const()[name = string("input_53_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 512]> input_53 = concat(axis = var_1307, interleave = input_53_interleave_0, values = (x_33, var_1309))[name = string("input_53")]; |
| tensor<int32, [1]> normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_1304_to_fp16 = const()[name = string("op_1304_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 8, 512]> normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_1304_to_fp16, x = input_53)[name = string("normed_53_cast_fp16")]; |
| tensor<int32, [2]> var_1314_split_sizes_0 = const()[name = string("op_1314_split_sizes_0"), val = tensor<int32, [2]>([256, 256])]; |
| int32 var_1314_axis_0 = const()[name = string("op_1314_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 8, 256]> var_1314_0, tensor<fp16, [1, 8, 256]> var_1314_1 = split(axis = var_1314_axis_0, split_sizes = var_1314_split_sizes_0, x = normed_53_cast_fp16)[name = string("op_1314")]; |
| tensor<fp16, [1, 8, 256]> var_1316 = mul(x = var_1314_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_1316")]; |
| tensor<int32, [4]> var_1321 = const()[name = string("op_1321"), val = tensor<int32, [4]>([1, 8, 1, 256])]; |
| tensor<fp16, [1, 8, 1, 256]> q_15 = reshape(shape = var_1321, x = var_1316)[name = string("q_15")]; |
| tensor<fp16, [1, 8, 1, 256]> var_1323_cast_fp16 = mul(x = q_15, y = cos_s)[name = string("op_1323_cast_fp16")]; |
| tensor<int32, [2]> var_1324_split_sizes_0 = const()[name = string("op_1324_split_sizes_0"), val = tensor<int32, [2]>([128, 128])]; |
| int32 var_1324_axis_0 = const()[name = string("op_1324_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 8, 1, 128]> var_1324_0, tensor<fp16, [1, 8, 1, 128]> var_1324_1 = split(axis = var_1324_axis_0, split_sizes = var_1324_split_sizes_0, x = q_15)[name = string("op_1324")]; |
| fp16 const_18_promoted = const()[name = string("const_18_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 8, 1, 128]> var_1326 = mul(x = var_1324_1, y = const_18_promoted)[name = string("op_1326")]; |
| int32 var_1328 = const()[name = string("op_1328"), val = int32(-1)]; |
| bool var_1329_interleave_0 = const()[name = string("op_1329_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1, 256]> var_1329 = concat(axis = var_1328, interleave = var_1329_interleave_0, values = (var_1326, var_1324_0))[name = string("op_1329")]; |
| tensor<fp16, [1, 8, 1, 256]> var_1330_cast_fp16 = mul(x = var_1329, y = sin_s)[name = string("op_1330_cast_fp16")]; |
| tensor<fp16, [1, 8, 1, 256]> q_17_cast_fp16 = add(x = var_1323_cast_fp16, y = var_1330_cast_fp16)[name = string("q_17_cast_fp16")]; |
| bool attn_weights_9_transpose_x_0 = const()[name = string("attn_weights_9_transpose_x_0"), val = bool(false)]; |
| bool attn_weights_9_transpose_y_0 = const()[name = string("attn_weights_9_transpose_y_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1, 512]> attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = q_17_cast_fp16, y = transpose_40_cast_fp16)[name = string("attn_weights_9_cast_fp16")]; |
| tensor<fp16, [1, 8, 1, 512]> x_35_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask_sliding)[name = string("x_35_cast_fp16")]; |
| tensor<int32, [1]> reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor<int32, [1]>([-1])]; |
| bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; |
| tensor<fp16, [1, 8, 1, 1]> reduce_max_2 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_35_cast_fp16)[name = string("reduce_max_2")]; |
| tensor<fp16, [1, 8, 1, 512]> var_1362 = sub(x = x_35_cast_fp16, y = reduce_max_2)[name = string("op_1362")]; |
| tensor<fp16, [1, 8, 1, 512]> var_1368 = exp(x = var_1362)[name = string("op_1368")]; |
| tensor<int32, [1]> var_1378_axes_0 = const()[name = string("op_1378_axes_0"), val = tensor<int32, [1]>([-1])]; |
| bool var_1378_keep_dims_0 = const()[name = string("op_1378_keep_dims_0"), val = bool(true)]; |
| tensor<fp16, [1, 8, 1, 1]> var_1378 = reduce_sum(axes = var_1378_axes_0, keep_dims = var_1378_keep_dims_0, x = var_1368)[name = string("op_1378")]; |
| tensor<fp16, [1, 8, 1, 512]> var_1384_cast_fp16 = real_div(x = var_1368, y = var_1378)[name = string("op_1384_cast_fp16")]; |
| bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)]; |
| bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1, 256]> attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = var_1384_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_13_cast_fp16")]; |
| tensor<int32, [4]> var_1395 = const()[name = string("op_1395"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_1402 = const()[name = string("op_1402"), val = tensor<int32, [3]>([1, 1, -1])]; |
| tensor<fp16, [1, 1, 8, 256]> var_1396_cast_fp16 = transpose(perm = var_1395, x = attn_output_13_cast_fp16)[name = string("transpose_85")]; |
| tensor<fp16, [1, 1, 2048]> attn_output_15_cast_fp16 = reshape(shape = var_1402, x = var_1396_cast_fp16)[name = string("attn_output_15_cast_fp16")]; |
| tensor<int32, [3]> var_1407 = const()[name = string("op_1407"), val = tensor<int32, [3]>([0, 2, 1])]; |
| string var_1423_pad_type_0 = const()[name = string("op_1423_pad_type_0"), val = string("valid")]; |
| int32 var_1423_groups_0 = const()[name = string("op_1423_groups_0"), val = int32(1)]; |
| tensor<int32, [1]> var_1423_strides_0 = const()[name = string("op_1423_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> var_1423_pad_0 = const()[name = string("op_1423_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> var_1423_dilations_0 = const()[name = string("op_1423_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<fp16, [1536, 2048, 1]> squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307857536))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309430464))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; |
| tensor<fp16, [1, 2048, 1]> var_1408_cast_fp16 = transpose(perm = var_1407, x = attn_output_15_cast_fp16)[name = string("transpose_84")]; |
| tensor<fp16, [1, 1536, 1]> var_1423_cast_fp16 = conv(dilations = var_1423_dilations_0, groups = var_1423_groups_0, pad = var_1423_pad_0, pad_type = var_1423_pad_type_0, strides = var_1423_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_1408_cast_fp16)[name = string("op_1423_cast_fp16")]; |
| tensor<int32, [3]> var_1427 = const()[name = string("op_1427"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_1433 = const()[name = string("op_1433"), val = int32(-1)]; |
| fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_39_cast_fp16 = transpose(perm = var_1427, x = var_1423_cast_fp16)[name = string("transpose_83")]; |
| tensor<fp16, [1, 1, 1536]> var_1435_cast_fp16 = mul(x = x_39_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_1435_cast_fp16")]; |
| bool input_57_interleave_0 = const()[name = string("input_57_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_57_cast_fp16 = concat(axis = var_1433, interleave = input_57_interleave_0, values = (x_39_cast_fp16, var_1435_cast_fp16))[name = string("input_57_cast_fp16")]; |
| tensor<int32, [1]> normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_1430_to_fp16 = const()[name = string("op_1430_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_1430_to_fp16, x = input_57_cast_fp16)[name = string("normed_57_cast_fp16")]; |
| tensor<int32, [2]> var_1440_split_sizes_0 = const()[name = string("op_1440_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_1440_axis_0 = const()[name = string("op_1440_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_1440_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1440_cast_fp16_1 = split(axis = var_1440_axis_0, split_sizes = var_1440_split_sizes_0, x = normed_57_cast_fp16)[name = string("op_1440_cast_fp16")]; |
| tensor<fp16, [1536]> layers_2_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309432064)))]; |
| tensor<fp16, [1, 1, 1536]> attn_output_17_cast_fp16 = mul(x = var_1440_cast_fp16_0, y = layers_2_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_17_cast_fp16")]; |
| tensor<fp16, [1, 1, 1536]> x_41_cast_fp16 = add(x = x_31_cast_fp16, y = attn_output_17_cast_fp16)[name = string("x_41_cast_fp16")]; |
| int32 var_1449 = const()[name = string("op_1449"), val = int32(-1)]; |
| fp16 const_20_promoted_to_fp16 = const()[name = string("const_20_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> var_1451_cast_fp16 = mul(x = x_41_cast_fp16, y = const_20_promoted_to_fp16)[name = string("op_1451_cast_fp16")]; |
| bool input_59_interleave_0 = const()[name = string("input_59_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_59_cast_fp16 = concat(axis = var_1449, interleave = input_59_interleave_0, values = (x_41_cast_fp16, var_1451_cast_fp16))[name = string("input_59_cast_fp16")]; |
| tensor<int32, [1]> normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_1446_to_fp16 = const()[name = string("op_1446_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_1446_to_fp16, x = input_59_cast_fp16)[name = string("normed_61_cast_fp16")]; |
| tensor<int32, [2]> var_1456_split_sizes_0 = const()[name = string("op_1456_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_1456_axis_0 = const()[name = string("op_1456_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_1456_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1456_cast_fp16_1 = split(axis = var_1456_axis_0, split_sizes = var_1456_split_sizes_0, x = normed_61_cast_fp16)[name = string("op_1456_cast_fp16")]; |
| tensor<fp16, [1536]> layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309435200)))]; |
| tensor<fp16, [1, 1, 1536]> h_15_cast_fp16 = mul(x = var_1456_cast_fp16_0, y = layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_15_cast_fp16")]; |
| tensor<int32, [3]> var_1467 = const()[name = string("op_1467"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> input_61_axes_0 = const()[name = string("input_61_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_1468 = transpose(perm = var_1467, x = h_15_cast_fp16)[name = string("transpose_82")]; |
| tensor<fp16, [1, 1536, 1, 1]> input_61 = expand_dims(axes = input_61_axes_0, x = var_1468)[name = string("input_61")]; |
| string gate_9_pad_type_0 = const()[name = string("gate_9_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gate_9_strides_0 = const()[name = string("gate_9_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gate_9_pad_0 = const()[name = string("gate_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gate_9_dilations_0 = const()[name = string("gate_9_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gate_9_groups_0 = const()[name = string("gate_9_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 12288, 1, 1]> gate_9 = conv(dilations = gate_9_dilations_0, groups = gate_9_groups_0, pad = gate_9_pad_0, pad_type = gate_9_pad_type_0, strides = gate_9_strides_0, weight = layers_2_mlp_gate_proj_weight_palettized, x = input_61)[name = string("gate_9")]; |
| string up_5_pad_type_0 = const()[name = string("up_5_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> up_5_strides_0 = const()[name = string("up_5_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> up_5_pad_0 = const()[name = string("up_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> up_5_dilations_0 = const()[name = string("up_5_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 up_5_groups_0 = const()[name = string("up_5_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 12288, 1, 1]> up_5 = conv(dilations = up_5_dilations_0, groups = up_5_groups_0, pad = up_5_pad_0, pad_type = up_5_pad_type_0, strides = up_5_strides_0, weight = layers_2_mlp_up_proj_weight_palettized, x = input_61)[name = string("up_5")]; |
| string gate_11_mode_0 = const()[name = string("gate_11_mode_0"), val = string("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 12288, 1, 1]> gate_11 = gelu(mode = gate_11_mode_0, x = gate_9)[name = string("gate_11")]; |
| tensor<fp16, [1, 12288, 1, 1]> input_63 = mul(x = gate_11, y = up_5)[name = string("input_63")]; |
| string mlp_out_5_pad_type_0 = const()[name = string("mlp_out_5_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> mlp_out_5_strides_0 = const()[name = string("mlp_out_5_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> mlp_out_5_pad_0 = const()[name = string("mlp_out_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> mlp_out_5_dilations_0 = const()[name = string("mlp_out_5_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 mlp_out_5_groups_0 = const()[name = string("mlp_out_5_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 1536, 1, 1]> mlp_out_5 = conv(dilations = mlp_out_5_dilations_0, groups = mlp_out_5_groups_0, pad = mlp_out_5_pad_0, pad_type = mlp_out_5_pad_type_0, strides = mlp_out_5_strides_0, weight = layers_2_mlp_down_proj_weight_palettized, x = input_63)[name = string("mlp_out_5")]; |
| tensor<int32, [1]> var_1508_axes_0 = const()[name = string("op_1508_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_1508 = squeeze(axes = var_1508_axes_0, x = mlp_out_5)[name = string("op_1508")]; |
| tensor<int32, [3]> var_1512 = const()[name = string("op_1512"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_1518 = const()[name = string("op_1518"), val = int32(-1)]; |
| fp16 const_21_promoted = const()[name = string("const_21_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_43 = transpose(perm = var_1512, x = var_1508)[name = string("transpose_81")]; |
| tensor<fp16, [1, 1, 1536]> var_1520 = mul(x = x_43, y = const_21_promoted)[name = string("op_1520")]; |
| bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_65 = concat(axis = var_1518, interleave = input_65_interleave_0, values = (x_43, var_1520))[name = string("input_65")]; |
| tensor<int32, [1]> normed_65_axes_0 = const()[name = string("normed_65_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_1515_to_fp16 = const()[name = string("op_1515_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_65_cast_fp16 = layer_norm(axes = normed_65_axes_0, epsilon = var_1515_to_fp16, x = input_65)[name = string("normed_65_cast_fp16")]; |
| tensor<int32, [2]> var_1525_split_sizes_0 = const()[name = string("op_1525_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_1525_axis_0 = const()[name = string("op_1525_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_1525_0, tensor<fp16, [1, 1, 1536]> var_1525_1 = split(axis = var_1525_axis_0, split_sizes = var_1525_split_sizes_0, x = normed_65_cast_fp16)[name = string("op_1525")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_23 = mul(x = var_1525_0, y = layers_2_post_feedforward_layernorm_weight)[name = string("hidden_states_23")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_25_cast_fp16 = add(x = x_41_cast_fp16, y = hidden_states_23)[name = string("hidden_states_25_cast_fp16")]; |
| tensor<int32, [3]> per_layer_slice_5_begin_0 = const()[name = string("per_layer_slice_5_begin_0"), val = tensor<int32, [3]>([0, 0, 6912])]; |
| tensor<int32, [3]> per_layer_slice_5_end_0 = const()[name = string("per_layer_slice_5_end_0"), val = tensor<int32, [3]>([1, 1, 7168])]; |
| tensor<bool, [3]> per_layer_slice_5_end_mask_0 = const()[name = string("per_layer_slice_5_end_mask_0"), val = tensor<bool, [3]>([true, true, false])]; |
| tensor<fp16, [1, 1, 256]> per_layer_slice_5_cast_fp16 = slice_by_index(begin = per_layer_slice_5_begin_0, end = per_layer_slice_5_end_0, end_mask = per_layer_slice_5_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_5_cast_fp16")]; |
| tensor<int32, [3]> var_1553 = const()[name = string("op_1553"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> input_67_axes_0 = const()[name = string("input_67_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_1554 = transpose(perm = var_1553, x = hidden_states_25_cast_fp16)[name = string("transpose_80")]; |
| tensor<fp16, [1, 1536, 1, 1]> input_67 = expand_dims(axes = input_67_axes_0, x = var_1554)[name = string("input_67")]; |
| string gated_13_pad_type_0 = const()[name = string("gated_13_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gated_13_strides_0 = const()[name = string("gated_13_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gated_13_pad_0 = const()[name = string("gated_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gated_13_dilations_0 = const()[name = string("gated_13_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gated_13_groups_0 = const()[name = string("gated_13_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 256, 1, 1]> gated_13 = conv(dilations = gated_13_dilations_0, groups = gated_13_groups_0, pad = gated_13_pad_0, pad_type = gated_13_pad_type_0, strides = gated_13_strides_0, weight = layers_2_per_layer_input_gate_weight_palettized, x = input_67)[name = string("gated_13")]; |
| string gated_15_mode_0 = const()[name = string("gated_15_mode_0"), val = string("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 256, 1, 1]> gated_15 = gelu(mode = gated_15_mode_0, x = gated_13)[name = string("gated_15")]; |
| tensor<int32, [3]> var_1573 = const()[name = string("op_1573"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> per_layer_slice_conv_5_axes_0 = const()[name = string("per_layer_slice_conv_5_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 256, 1]> var_1574_cast_fp16 = transpose(perm = var_1573, x = per_layer_slice_5_cast_fp16)[name = string("transpose_79")]; |
| tensor<fp16, [1, 256, 1, 1]> per_layer_slice_conv_5_cast_fp16 = expand_dims(axes = per_layer_slice_conv_5_axes_0, x = var_1574_cast_fp16)[name = string("per_layer_slice_conv_5_cast_fp16")]; |
| tensor<fp16, [1, 256, 1, 1]> input_69_cast_fp16 = mul(x = gated_15, y = per_layer_slice_conv_5_cast_fp16)[name = string("input_69_cast_fp16")]; |
| string gated_17_pad_type_0 = const()[name = string("gated_17_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gated_17_strides_0 = const()[name = string("gated_17_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gated_17_pad_0 = const()[name = string("gated_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gated_17_dilations_0 = const()[name = string("gated_17_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gated_17_groups_0 = const()[name = string("gated_17_groups_0"), val = int32(1)]; |
| tensor<fp16, [1536, 256, 1, 1]> layers_2_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309438336))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309635008))))[name = string("layers_2_per_layer_projection_weight_promoted_to_fp16_palettized")]; |
| tensor<fp16, [1, 1536, 1, 1]> gated_17_cast_fp16 = conv(dilations = gated_17_dilations_0, groups = gated_17_groups_0, pad = gated_17_pad_0, pad_type = gated_17_pad_type_0, strides = gated_17_strides_0, weight = layers_2_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_69_cast_fp16)[name = string("gated_17_cast_fp16")]; |
| tensor<int32, [1]> var_1590_axes_0 = const()[name = string("op_1590_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_1590_cast_fp16 = squeeze(axes = var_1590_axes_0, x = gated_17_cast_fp16)[name = string("op_1590_cast_fp16")]; |
| tensor<int32, [3]> var_1594 = const()[name = string("op_1594"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_1600 = const()[name = string("op_1600"), val = int32(-1)]; |
| fp16 const_22_promoted_to_fp16 = const()[name = string("const_22_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_45_cast_fp16 = transpose(perm = var_1594, x = var_1590_cast_fp16)[name = string("transpose_78")]; |
| tensor<fp16, [1, 1, 1536]> var_1602_cast_fp16 = mul(x = x_45_cast_fp16, y = const_22_promoted_to_fp16)[name = string("op_1602_cast_fp16")]; |
| bool input_71_interleave_0 = const()[name = string("input_71_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_71_cast_fp16 = concat(axis = var_1600, interleave = input_71_interleave_0, values = (x_45_cast_fp16, var_1602_cast_fp16))[name = string("input_71_cast_fp16")]; |
| tensor<int32, [1]> normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_1597_to_fp16 = const()[name = string("op_1597_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_1597_to_fp16, x = input_71_cast_fp16)[name = string("normed_69_cast_fp16")]; |
| tensor<int32, [2]> var_1607_split_sizes_0 = const()[name = string("op_1607_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_1607_axis_0 = const()[name = string("op_1607_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_1607_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1607_cast_fp16_1 = split(axis = var_1607_axis_0, split_sizes = var_1607_split_sizes_0, x = normed_69_cast_fp16)[name = string("op_1607_cast_fp16")]; |
| tensor<fp16, [1536]> layers_2_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_2_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309636608)))]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_29_cast_fp16 = mul(x = var_1607_cast_fp16_0, y = layers_2_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_29_cast_fp16")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_31_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("hidden_states_31_cast_fp16")]; |
| tensor<fp16, [1]> const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.a4p-1])]; |
| tensor<fp16, [1, 1, 1536]> x_47_cast_fp16 = mul(x = hidden_states_31_cast_fp16, y = const_23_promoted_to_fp16)[name = string("x_47_cast_fp16")]; |
| int32 var_1622 = const()[name = string("op_1622"), val = int32(-1)]; |
| fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> var_1624_cast_fp16 = mul(x = x_47_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_1624_cast_fp16")]; |
| bool input_73_interleave_0 = const()[name = string("input_73_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_73_cast_fp16 = concat(axis = var_1622, interleave = input_73_interleave_0, values = (x_47_cast_fp16, var_1624_cast_fp16))[name = string("input_73_cast_fp16")]; |
| tensor<int32, [1]> normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_1619_to_fp16 = const()[name = string("op_1619_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_1619_to_fp16, x = input_73_cast_fp16)[name = string("normed_73_cast_fp16")]; |
| tensor<int32, [2]> var_1629_split_sizes_0 = const()[name = string("op_1629_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_1629_axis_0 = const()[name = string("op_1629_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_1629_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1629_cast_fp16_1 = split(axis = var_1629_axis_0, split_sizes = var_1629_split_sizes_0, x = normed_73_cast_fp16)[name = string("op_1629_cast_fp16")]; |
| tensor<fp16, [1536]> layers_3_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309639744)))]; |
| tensor<fp16, [1, 1, 1536]> h_19_cast_fp16 = mul(x = var_1629_cast_fp16_0, y = layers_3_input_layernorm_weight_promoted_to_fp16)[name = string("h_19_cast_fp16")]; |
| tensor<int32, [3]> var_1635 = const()[name = string("op_1635"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> var_1638_axes_0 = const()[name = string("op_1638_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_1636_cast_fp16 = transpose(perm = var_1635, x = h_19_cast_fp16)[name = string("transpose_77")]; |
| tensor<fp16, [1, 1536, 1, 1]> var_1638_cast_fp16 = expand_dims(axes = var_1638_axes_0, x = var_1636_cast_fp16)[name = string("op_1638_cast_fp16")]; |
| string var_1654_pad_type_0 = const()[name = string("op_1654_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> var_1654_strides_0 = const()[name = string("op_1654_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> var_1654_pad_0 = const()[name = string("op_1654_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> var_1654_dilations_0 = const()[name = string("op_1654_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 var_1654_groups_0 = const()[name = string("op_1654_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 2048, 1, 1]> var_1654 = conv(dilations = var_1654_dilations_0, groups = var_1654_groups_0, pad = var_1654_pad_0, pad_type = var_1654_pad_type_0, strides = var_1654_strides_0, weight = layers_3_self_attn_q_proj_weight_palettized, x = var_1638_cast_fp16)[name = string("op_1654")]; |
| tensor<int32, [4]> var_1659 = const()[name = string("op_1659"), val = tensor<int32, [4]>([1, 8, 256, 1])]; |
| tensor<fp16, [1, 8, 256, 1]> var_1660 = reshape(shape = var_1659, x = var_1654)[name = string("op_1660")]; |
| tensor<int32, [4]> var_1665 = const()[name = string("op_1665"), val = tensor<int32, [4]>([0, 1, 3, 2])]; |
| tensor<int32, [3]> var_1675 = const()[name = string("op_1675"), val = tensor<int32, [3]>([1, 8, 256])]; |
| tensor<fp16, [1, 8, 1, 256]> var_1666 = transpose(perm = var_1665, x = var_1660)[name = string("transpose_76")]; |
| tensor<fp16, [1, 8, 256]> x_49 = reshape(shape = var_1675, x = var_1666)[name = string("x_49")]; |
| int32 var_1681 = const()[name = string("op_1681"), val = int32(-1)]; |
| fp16 const_25_promoted = const()[name = string("const_25_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 8, 256]> var_1683 = mul(x = x_49, y = const_25_promoted)[name = string("op_1683")]; |
| bool input_77_interleave_0 = const()[name = string("input_77_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 512]> input_77 = concat(axis = var_1681, interleave = input_77_interleave_0, values = (x_49, var_1683))[name = string("input_77")]; |
| tensor<int32, [1]> normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_1678_to_fp16 = const()[name = string("op_1678_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 8, 512]> normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_1678_to_fp16, x = input_77)[name = string("normed_77_cast_fp16")]; |
| tensor<int32, [2]> var_1688_split_sizes_0 = const()[name = string("op_1688_split_sizes_0"), val = tensor<int32, [2]>([256, 256])]; |
| int32 var_1688_axis_0 = const()[name = string("op_1688_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 8, 256]> var_1688_0, tensor<fp16, [1, 8, 256]> var_1688_1 = split(axis = var_1688_axis_0, split_sizes = var_1688_split_sizes_0, x = normed_77_cast_fp16)[name = string("op_1688")]; |
| tensor<fp16, [1, 8, 256]> var_1690 = mul(x = var_1688_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_1690")]; |
| tensor<int32, [4]> var_1695 = const()[name = string("op_1695"), val = tensor<int32, [4]>([1, 8, 1, 256])]; |
| tensor<fp16, [1, 8, 1, 256]> q_21 = reshape(shape = var_1695, x = var_1690)[name = string("q_21")]; |
| tensor<fp16, [1, 8, 1, 256]> var_1697_cast_fp16 = mul(x = q_21, y = cos_s)[name = string("op_1697_cast_fp16")]; |
| tensor<int32, [2]> var_1698_split_sizes_0 = const()[name = string("op_1698_split_sizes_0"), val = tensor<int32, [2]>([128, 128])]; |
| int32 var_1698_axis_0 = const()[name = string("op_1698_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 8, 1, 128]> var_1698_0, tensor<fp16, [1, 8, 1, 128]> var_1698_1 = split(axis = var_1698_axis_0, split_sizes = var_1698_split_sizes_0, x = q_21)[name = string("op_1698")]; |
| fp16 const_26_promoted = const()[name = string("const_26_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 8, 1, 128]> var_1700 = mul(x = var_1698_1, y = const_26_promoted)[name = string("op_1700")]; |
| int32 var_1702 = const()[name = string("op_1702"), val = int32(-1)]; |
| bool var_1703_interleave_0 = const()[name = string("op_1703_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1, 256]> var_1703 = concat(axis = var_1702, interleave = var_1703_interleave_0, values = (var_1700, var_1698_0))[name = string("op_1703")]; |
| tensor<fp16, [1, 8, 1, 256]> var_1704_cast_fp16 = mul(x = var_1703, y = sin_s)[name = string("op_1704_cast_fp16")]; |
| tensor<fp16, [1, 8, 1, 256]> q_23_cast_fp16 = add(x = var_1697_cast_fp16, y = var_1704_cast_fp16)[name = string("q_23_cast_fp16")]; |
| bool attn_weights_13_transpose_x_0 = const()[name = string("attn_weights_13_transpose_x_0"), val = bool(false)]; |
| bool attn_weights_13_transpose_y_0 = const()[name = string("attn_weights_13_transpose_y_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1, 512]> attn_weights_13_cast_fp16 = matmul(transpose_x = attn_weights_13_transpose_x_0, transpose_y = attn_weights_13_transpose_y_0, x = q_23_cast_fp16, y = transpose_40_cast_fp16)[name = string("attn_weights_13_cast_fp16")]; |
| tensor<fp16, [1, 8, 1, 512]> x_51_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask_sliding)[name = string("x_51_cast_fp16")]; |
| tensor<int32, [1]> reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor<int32, [1]>([-1])]; |
| bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; |
| tensor<fp16, [1, 8, 1, 1]> reduce_max_3 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_51_cast_fp16)[name = string("reduce_max_3")]; |
| tensor<fp16, [1, 8, 1, 512]> var_1736 = sub(x = x_51_cast_fp16, y = reduce_max_3)[name = string("op_1736")]; |
| tensor<fp16, [1, 8, 1, 512]> var_1742 = exp(x = var_1736)[name = string("op_1742")]; |
| tensor<int32, [1]> var_1752_axes_0 = const()[name = string("op_1752_axes_0"), val = tensor<int32, [1]>([-1])]; |
| bool var_1752_keep_dims_0 = const()[name = string("op_1752_keep_dims_0"), val = bool(true)]; |
| tensor<fp16, [1, 8, 1, 1]> var_1752 = reduce_sum(axes = var_1752_axes_0, keep_dims = var_1752_keep_dims_0, x = var_1742)[name = string("op_1752")]; |
| tensor<fp16, [1, 8, 1, 512]> var_1758_cast_fp16 = real_div(x = var_1742, y = var_1752)[name = string("op_1758_cast_fp16")]; |
| bool attn_output_19_transpose_x_0 = const()[name = string("attn_output_19_transpose_x_0"), val = bool(false)]; |
| bool attn_output_19_transpose_y_0 = const()[name = string("attn_output_19_transpose_y_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1, 256]> attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_0, transpose_y = attn_output_19_transpose_y_0, x = var_1758_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_19_cast_fp16")]; |
| tensor<int32, [4]> var_1769 = const()[name = string("op_1769"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_1776 = const()[name = string("op_1776"), val = tensor<int32, [3]>([1, 1, -1])]; |
| tensor<fp16, [1, 1, 8, 256]> var_1770_cast_fp16 = transpose(perm = var_1769, x = attn_output_19_cast_fp16)[name = string("transpose_75")]; |
| tensor<fp16, [1, 1, 2048]> attn_output_21_cast_fp16 = reshape(shape = var_1776, x = var_1770_cast_fp16)[name = string("attn_output_21_cast_fp16")]; |
| tensor<int32, [3]> var_1781 = const()[name = string("op_1781"), val = tensor<int32, [3]>([0, 2, 1])]; |
| string var_1797_pad_type_0 = const()[name = string("op_1797_pad_type_0"), val = string("valid")]; |
| int32 var_1797_groups_0 = const()[name = string("op_1797_groups_0"), val = int32(1)]; |
| tensor<int32, [1]> var_1797_strides_0 = const()[name = string("op_1797_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> var_1797_pad_0 = const()[name = string("op_1797_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> var_1797_dilations_0 = const()[name = string("op_1797_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<fp16, [1536, 2048, 1]> squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309642880))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311215808))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; |
| tensor<fp16, [1, 2048, 1]> var_1782_cast_fp16 = transpose(perm = var_1781, x = attn_output_21_cast_fp16)[name = string("transpose_74")]; |
| tensor<fp16, [1, 1536, 1]> var_1797_cast_fp16 = conv(dilations = var_1797_dilations_0, groups = var_1797_groups_0, pad = var_1797_pad_0, pad_type = var_1797_pad_type_0, strides = var_1797_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_1782_cast_fp16)[name = string("op_1797_cast_fp16")]; |
| tensor<int32, [3]> var_1801 = const()[name = string("op_1801"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_1807 = const()[name = string("op_1807"), val = int32(-1)]; |
| fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_55_cast_fp16 = transpose(perm = var_1801, x = var_1797_cast_fp16)[name = string("transpose_73")]; |
| tensor<fp16, [1, 1, 1536]> var_1809_cast_fp16 = mul(x = x_55_cast_fp16, y = const_27_promoted_to_fp16)[name = string("op_1809_cast_fp16")]; |
| bool input_81_interleave_0 = const()[name = string("input_81_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_81_cast_fp16 = concat(axis = var_1807, interleave = input_81_interleave_0, values = (x_55_cast_fp16, var_1809_cast_fp16))[name = string("input_81_cast_fp16")]; |
| tensor<int32, [1]> normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_1804_to_fp16 = const()[name = string("op_1804_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_1804_to_fp16, x = input_81_cast_fp16)[name = string("normed_81_cast_fp16")]; |
| tensor<int32, [2]> var_1814_split_sizes_0 = const()[name = string("op_1814_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_1814_axis_0 = const()[name = string("op_1814_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_1814_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1814_cast_fp16_1 = split(axis = var_1814_axis_0, split_sizes = var_1814_split_sizes_0, x = normed_81_cast_fp16)[name = string("op_1814_cast_fp16")]; |
| tensor<fp16, [1536]> layers_3_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311217408)))]; |
| tensor<fp16, [1, 1, 1536]> attn_output_23_cast_fp16 = mul(x = var_1814_cast_fp16_0, y = layers_3_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_23_cast_fp16")]; |
| tensor<fp16, [1, 1, 1536]> x_57_cast_fp16 = add(x = x_47_cast_fp16, y = attn_output_23_cast_fp16)[name = string("x_57_cast_fp16")]; |
| int32 var_1823 = const()[name = string("op_1823"), val = int32(-1)]; |
| fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> var_1825_cast_fp16 = mul(x = x_57_cast_fp16, y = const_28_promoted_to_fp16)[name = string("op_1825_cast_fp16")]; |
| bool input_83_interleave_0 = const()[name = string("input_83_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_83_cast_fp16 = concat(axis = var_1823, interleave = input_83_interleave_0, values = (x_57_cast_fp16, var_1825_cast_fp16))[name = string("input_83_cast_fp16")]; |
| tensor<int32, [1]> normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_1820_to_fp16 = const()[name = string("op_1820_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_1820_to_fp16, x = input_83_cast_fp16)[name = string("normed_85_cast_fp16")]; |
| tensor<int32, [2]> var_1830_split_sizes_0 = const()[name = string("op_1830_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_1830_axis_0 = const()[name = string("op_1830_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_1830_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1830_cast_fp16_1 = split(axis = var_1830_axis_0, split_sizes = var_1830_split_sizes_0, x = normed_85_cast_fp16)[name = string("op_1830_cast_fp16")]; |
| tensor<fp16, [1536]> layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311220544)))]; |
| tensor<fp16, [1, 1, 1536]> h_21_cast_fp16 = mul(x = var_1830_cast_fp16_0, y = layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_21_cast_fp16")]; |
| tensor<int32, [3]> var_1841 = const()[name = string("op_1841"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> input_85_axes_0 = const()[name = string("input_85_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_1842 = transpose(perm = var_1841, x = h_21_cast_fp16)[name = string("transpose_72")]; |
| tensor<fp16, [1, 1536, 1, 1]> input_85 = expand_dims(axes = input_85_axes_0, x = var_1842)[name = string("input_85")]; |
| string gate_13_pad_type_0 = const()[name = string("gate_13_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gate_13_strides_0 = const()[name = string("gate_13_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gate_13_pad_0 = const()[name = string("gate_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gate_13_dilations_0 = const()[name = string("gate_13_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gate_13_groups_0 = const()[name = string("gate_13_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 12288, 1, 1]> gate_13 = conv(dilations = gate_13_dilations_0, groups = gate_13_groups_0, pad = gate_13_pad_0, pad_type = gate_13_pad_type_0, strides = gate_13_strides_0, weight = layers_3_mlp_gate_proj_weight_palettized, x = input_85)[name = string("gate_13")]; |
| string up_7_pad_type_0 = const()[name = string("up_7_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> up_7_strides_0 = const()[name = string("up_7_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> up_7_pad_0 = const()[name = string("up_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> up_7_dilations_0 = const()[name = string("up_7_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 up_7_groups_0 = const()[name = string("up_7_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 12288, 1, 1]> up_7 = conv(dilations = up_7_dilations_0, groups = up_7_groups_0, pad = up_7_pad_0, pad_type = up_7_pad_type_0, strides = up_7_strides_0, weight = layers_3_mlp_up_proj_weight_palettized, x = input_85)[name = string("up_7")]; |
| string gate_15_mode_0 = const()[name = string("gate_15_mode_0"), val = string("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 12288, 1, 1]> gate_15 = gelu(mode = gate_15_mode_0, x = gate_13)[name = string("gate_15")]; |
| tensor<fp16, [1, 12288, 1, 1]> input_87 = mul(x = gate_15, y = up_7)[name = string("input_87")]; |
| string mlp_out_7_pad_type_0 = const()[name = string("mlp_out_7_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> mlp_out_7_strides_0 = const()[name = string("mlp_out_7_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> mlp_out_7_pad_0 = const()[name = string("mlp_out_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> mlp_out_7_dilations_0 = const()[name = string("mlp_out_7_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 mlp_out_7_groups_0 = const()[name = string("mlp_out_7_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 1536, 1, 1]> mlp_out_7 = conv(dilations = mlp_out_7_dilations_0, groups = mlp_out_7_groups_0, pad = mlp_out_7_pad_0, pad_type = mlp_out_7_pad_type_0, strides = mlp_out_7_strides_0, weight = layers_3_mlp_down_proj_weight_palettized, x = input_87)[name = string("mlp_out_7")]; |
| tensor<int32, [1]> var_1882_axes_0 = const()[name = string("op_1882_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_1882 = squeeze(axes = var_1882_axes_0, x = mlp_out_7)[name = string("op_1882")]; |
| tensor<int32, [3]> var_1886 = const()[name = string("op_1886"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_1892 = const()[name = string("op_1892"), val = int32(-1)]; |
| fp16 const_29_promoted = const()[name = string("const_29_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_59 = transpose(perm = var_1886, x = var_1882)[name = string("transpose_71")]; |
| tensor<fp16, [1, 1, 1536]> var_1894 = mul(x = x_59, y = const_29_promoted)[name = string("op_1894")]; |
| bool input_89_interleave_0 = const()[name = string("input_89_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_89 = concat(axis = var_1892, interleave = input_89_interleave_0, values = (x_59, var_1894))[name = string("input_89")]; |
| tensor<int32, [1]> normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_1889_to_fp16 = const()[name = string("op_1889_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_1889_to_fp16, x = input_89)[name = string("normed_89_cast_fp16")]; |
| tensor<int32, [2]> var_1899_split_sizes_0 = const()[name = string("op_1899_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_1899_axis_0 = const()[name = string("op_1899_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_1899_0, tensor<fp16, [1, 1, 1536]> var_1899_1 = split(axis = var_1899_axis_0, split_sizes = var_1899_split_sizes_0, x = normed_89_cast_fp16)[name = string("op_1899")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_33 = mul(x = var_1899_0, y = layers_3_post_feedforward_layernorm_weight)[name = string("hidden_states_33")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_35_cast_fp16 = add(x = x_57_cast_fp16, y = hidden_states_33)[name = string("hidden_states_35_cast_fp16")]; |
| tensor<int32, [3]> per_layer_slice_7_begin_0 = const()[name = string("per_layer_slice_7_begin_0"), val = tensor<int32, [3]>([0, 0, 7168])]; |
| tensor<int32, [3]> per_layer_slice_7_end_0 = const()[name = string("per_layer_slice_7_end_0"), val = tensor<int32, [3]>([1, 1, 7424])]; |
| tensor<bool, [3]> per_layer_slice_7_end_mask_0 = const()[name = string("per_layer_slice_7_end_mask_0"), val = tensor<bool, [3]>([true, true, false])]; |
| tensor<fp16, [1, 1, 256]> per_layer_slice_7_cast_fp16 = slice_by_index(begin = per_layer_slice_7_begin_0, end = per_layer_slice_7_end_0, end_mask = per_layer_slice_7_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_7_cast_fp16")]; |
| tensor<int32, [3]> var_1927 = const()[name = string("op_1927"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> input_91_axes_0 = const()[name = string("input_91_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_1928 = transpose(perm = var_1927, x = hidden_states_35_cast_fp16)[name = string("transpose_70")]; |
| tensor<fp16, [1, 1536, 1, 1]> input_91 = expand_dims(axes = input_91_axes_0, x = var_1928)[name = string("input_91")]; |
| string gated_19_pad_type_0 = const()[name = string("gated_19_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gated_19_strides_0 = const()[name = string("gated_19_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gated_19_pad_0 = const()[name = string("gated_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gated_19_dilations_0 = const()[name = string("gated_19_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gated_19_groups_0 = const()[name = string("gated_19_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 256, 1, 1]> gated_19 = conv(dilations = gated_19_dilations_0, groups = gated_19_groups_0, pad = gated_19_pad_0, pad_type = gated_19_pad_type_0, strides = gated_19_strides_0, weight = layers_3_per_layer_input_gate_weight_palettized, x = input_91)[name = string("gated_19")]; |
| string gated_21_mode_0 = const()[name = string("gated_21_mode_0"), val = string("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 256, 1, 1]> gated_21 = gelu(mode = gated_21_mode_0, x = gated_19)[name = string("gated_21")]; |
| tensor<int32, [3]> var_1947 = const()[name = string("op_1947"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> per_layer_slice_conv_7_axes_0 = const()[name = string("per_layer_slice_conv_7_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 256, 1]> var_1948_cast_fp16 = transpose(perm = var_1947, x = per_layer_slice_7_cast_fp16)[name = string("transpose_69")]; |
| tensor<fp16, [1, 256, 1, 1]> per_layer_slice_conv_7_cast_fp16 = expand_dims(axes = per_layer_slice_conv_7_axes_0, x = var_1948_cast_fp16)[name = string("per_layer_slice_conv_7_cast_fp16")]; |
| tensor<fp16, [1, 256, 1, 1]> input_93_cast_fp16 = mul(x = gated_21, y = per_layer_slice_conv_7_cast_fp16)[name = string("input_93_cast_fp16")]; |
| string gated_23_pad_type_0 = const()[name = string("gated_23_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gated_23_strides_0 = const()[name = string("gated_23_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gated_23_pad_0 = const()[name = string("gated_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gated_23_dilations_0 = const()[name = string("gated_23_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gated_23_groups_0 = const()[name = string("gated_23_groups_0"), val = int32(1)]; |
| tensor<fp16, [1536, 256, 1, 1]> layers_3_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311223680))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311420352))))[name = string("layers_3_per_layer_projection_weight_promoted_to_fp16_palettized")]; |
| tensor<fp16, [1, 1536, 1, 1]> gated_23_cast_fp16 = conv(dilations = gated_23_dilations_0, groups = gated_23_groups_0, pad = gated_23_pad_0, pad_type = gated_23_pad_type_0, strides = gated_23_strides_0, weight = layers_3_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_93_cast_fp16)[name = string("gated_23_cast_fp16")]; |
| tensor<int32, [1]> var_1964_axes_0 = const()[name = string("op_1964_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_1964_cast_fp16 = squeeze(axes = var_1964_axes_0, x = gated_23_cast_fp16)[name = string("op_1964_cast_fp16")]; |
| tensor<int32, [3]> var_1968 = const()[name = string("op_1968"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_1974 = const()[name = string("op_1974"), val = int32(-1)]; |
| fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_61_cast_fp16 = transpose(perm = var_1968, x = var_1964_cast_fp16)[name = string("transpose_68")]; |
| tensor<fp16, [1, 1, 1536]> var_1976_cast_fp16 = mul(x = x_61_cast_fp16, y = const_30_promoted_to_fp16)[name = string("op_1976_cast_fp16")]; |
| bool input_95_interleave_0 = const()[name = string("input_95_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_95_cast_fp16 = concat(axis = var_1974, interleave = input_95_interleave_0, values = (x_61_cast_fp16, var_1976_cast_fp16))[name = string("input_95_cast_fp16")]; |
| tensor<int32, [1]> normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_1971_to_fp16 = const()[name = string("op_1971_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_1971_to_fp16, x = input_95_cast_fp16)[name = string("normed_93_cast_fp16")]; |
| tensor<int32, [2]> var_1981_split_sizes_0 = const()[name = string("op_1981_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_1981_axis_0 = const()[name = string("op_1981_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_1981_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_1981_cast_fp16_1 = split(axis = var_1981_axis_0, split_sizes = var_1981_split_sizes_0, x = normed_93_cast_fp16)[name = string("op_1981_cast_fp16")]; |
| tensor<fp16, [1536]> layers_3_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_3_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311421952)))]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_39_cast_fp16 = mul(x = var_1981_cast_fp16_0, y = layers_3_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_39_cast_fp16")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_41_cast_fp16 = add(x = hidden_states_35_cast_fp16, y = hidden_states_39_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; |
| tensor<fp16, [1]> const_31_promoted_to_fp16 = const()[name = string("const_31_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.a4p-1])]; |
| tensor<fp16, [1, 1, 1536]> x_63_cast_fp16 = mul(x = hidden_states_41_cast_fp16, y = const_31_promoted_to_fp16)[name = string("x_63_cast_fp16")]; |
| int32 var_1996 = const()[name = string("op_1996"), val = int32(-1)]; |
| fp16 const_32_promoted_to_fp16 = const()[name = string("const_32_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> var_1998_cast_fp16 = mul(x = x_63_cast_fp16, y = const_32_promoted_to_fp16)[name = string("op_1998_cast_fp16")]; |
| bool input_97_interleave_0 = const()[name = string("input_97_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_97_cast_fp16 = concat(axis = var_1996, interleave = input_97_interleave_0, values = (x_63_cast_fp16, var_1998_cast_fp16))[name = string("input_97_cast_fp16")]; |
| tensor<int32, [1]> normed_97_axes_0 = const()[name = string("normed_97_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_1993_to_fp16 = const()[name = string("op_1993_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_97_cast_fp16 = layer_norm(axes = normed_97_axes_0, epsilon = var_1993_to_fp16, x = input_97_cast_fp16)[name = string("normed_97_cast_fp16")]; |
| tensor<int32, [2]> var_2003_split_sizes_0 = const()[name = string("op_2003_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_2003_axis_0 = const()[name = string("op_2003_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_2003_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2003_cast_fp16_1 = split(axis = var_2003_axis_0, split_sizes = var_2003_split_sizes_0, x = normed_97_cast_fp16)[name = string("op_2003_cast_fp16")]; |
| tensor<fp16, [1536]> layers_4_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311425088)))]; |
| tensor<fp16, [1, 1, 1536]> h_25_cast_fp16 = mul(x = var_2003_cast_fp16_0, y = layers_4_input_layernorm_weight_promoted_to_fp16)[name = string("h_25_cast_fp16")]; |
| tensor<int32, [3]> var_2009 = const()[name = string("op_2009"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> var_2012_axes_0 = const()[name = string("op_2012_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_2010_cast_fp16 = transpose(perm = var_2009, x = h_25_cast_fp16)[name = string("transpose_67")]; |
| tensor<fp16, [1, 1536, 1, 1]> var_2012_cast_fp16 = expand_dims(axes = var_2012_axes_0, x = var_2010_cast_fp16)[name = string("op_2012_cast_fp16")]; |
| string var_2028_pad_type_0 = const()[name = string("op_2028_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> var_2028_strides_0 = const()[name = string("op_2028_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> var_2028_pad_0 = const()[name = string("op_2028_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> var_2028_dilations_0 = const()[name = string("op_2028_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 var_2028_groups_0 = const()[name = string("op_2028_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 4096, 1, 1]> var_2028 = conv(dilations = var_2028_dilations_0, groups = var_2028_groups_0, pad = var_2028_pad_0, pad_type = var_2028_pad_type_0, strides = var_2028_strides_0, weight = layers_4_self_attn_q_proj_weight_palettized, x = var_2012_cast_fp16)[name = string("op_2028")]; |
| tensor<int32, [4]> var_2033 = const()[name = string("op_2033"), val = tensor<int32, [4]>([1, 8, 512, 1])]; |
| tensor<fp16, [1, 8, 512, 1]> var_2034 = reshape(shape = var_2033, x = var_2028)[name = string("op_2034")]; |
| tensor<int32, [4]> var_2039 = const()[name = string("op_2039"), val = tensor<int32, [4]>([0, 1, 3, 2])]; |
| tensor<int32, [3]> var_2049 = const()[name = string("op_2049"), val = tensor<int32, [3]>([1, 8, 512])]; |
| tensor<fp16, [1, 8, 1, 512]> var_2040 = transpose(perm = var_2039, x = var_2034)[name = string("transpose_66")]; |
| tensor<fp16, [1, 8, 512]> x_65 = reshape(shape = var_2049, x = var_2040)[name = string("x_65")]; |
| int32 var_2055 = const()[name = string("op_2055"), val = int32(-1)]; |
| fp16 const_33_promoted = const()[name = string("const_33_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 8, 512]> var_2057 = mul(x = x_65, y = const_33_promoted)[name = string("op_2057")]; |
| bool input_101_interleave_0 = const()[name = string("input_101_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1024]> input_101 = concat(axis = var_2055, interleave = input_101_interleave_0, values = (x_65, var_2057))[name = string("input_101")]; |
| tensor<int32, [1]> normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_2052_to_fp16 = const()[name = string("op_2052_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 8, 1024]> normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_2052_to_fp16, x = input_101)[name = string("normed_101_cast_fp16")]; |
| tensor<int32, [2]> var_2062_split_sizes_0 = const()[name = string("op_2062_split_sizes_0"), val = tensor<int32, [2]>([512, 512])]; |
| int32 var_2062_axis_0 = const()[name = string("op_2062_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 8, 512]> var_2062_0, tensor<fp16, [1, 8, 512]> var_2062_1 = split(axis = var_2062_axis_0, split_sizes = var_2062_split_sizes_0, x = normed_101_cast_fp16)[name = string("op_2062")]; |
| tensor<fp16, [1, 8, 512]> var_2064 = mul(x = var_2062_0, y = layers_4_self_attn_q_norm_weight)[name = string("op_2064")]; |
| tensor<int32, [4]> var_2069 = const()[name = string("op_2069"), val = tensor<int32, [4]>([1, 8, 1, 512])]; |
| tensor<fp16, [1, 8, 1, 512]> q_27 = reshape(shape = var_2069, x = var_2064)[name = string("q_27")]; |
| tensor<fp16, [1, 8, 1, 512]> var_2071_cast_fp16 = mul(x = q_27, y = cos_f)[name = string("op_2071_cast_fp16")]; |
| tensor<int32, [2]> var_2072_split_sizes_0 = const()[name = string("op_2072_split_sizes_0"), val = tensor<int32, [2]>([256, 256])]; |
| int32 var_2072_axis_0 = const()[name = string("op_2072_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 8, 1, 256]> var_2072_0, tensor<fp16, [1, 8, 1, 256]> var_2072_1 = split(axis = var_2072_axis_0, split_sizes = var_2072_split_sizes_0, x = q_27)[name = string("op_2072")]; |
| fp16 const_34_promoted = const()[name = string("const_34_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 8, 1, 256]> var_2074 = mul(x = var_2072_1, y = const_34_promoted)[name = string("op_2074")]; |
| int32 var_2076 = const()[name = string("op_2076"), val = int32(-1)]; |
| bool var_2077_interleave_0 = const()[name = string("op_2077_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1, 512]> var_2077 = concat(axis = var_2076, interleave = var_2077_interleave_0, values = (var_2074, var_2072_0))[name = string("op_2077")]; |
| tensor<fp16, [1, 8, 1, 512]> var_2078_cast_fp16 = mul(x = var_2077, y = sin_f)[name = string("op_2078_cast_fp16")]; |
| tensor<fp16, [1, 8, 1, 512]> q_29_cast_fp16 = add(x = var_2071_cast_fp16, y = var_2078_cast_fp16)[name = string("q_29_cast_fp16")]; |
| tensor<int32, [4]> transpose_16_perm_0 = const()[name = string("transpose_16_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])]; |
| tensor<int32, [4]> tile_8_reps_0 = const()[name = string("tile_8_reps_0"), val = tensor<int32, [4]>([8, 1, 1, 1])]; |
| tensor<fp16, [1, 1, 2048, 512]> transpose_16_cast_fp16 = transpose(perm = transpose_16_perm_0, x = kv14_k)[name = string("transpose_65")]; |
| tensor<fp16, [8, 1, 2048, 512]> tile_8_cast_fp16 = tile(reps = tile_8_reps_0, x = transpose_16_cast_fp16)[name = string("tile_8_cast_fp16")]; |
| tensor<int32, [5]> concat_16 = const()[name = string("concat_16"), val = tensor<int32, [5]>([8, 1, 1, 2048, 512])]; |
| tensor<fp16, [8, 1, 1, 2048, 512]> reshape_16_cast_fp16 = reshape(shape = concat_16, x = tile_8_cast_fp16)[name = string("reshape_16_cast_fp16")]; |
| tensor<int32, [5]> transpose_17_perm_0 = const()[name = string("transpose_17_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])]; |
| tensor<int32, [4]> concat_17 = const()[name = string("concat_17"), val = tensor<int32, [4]>([-1, 1, 2048, 512])]; |
| tensor<fp16, [1, 8, 1, 2048, 512]> transpose_17_cast_fp16 = transpose(perm = transpose_17_perm_0, x = reshape_16_cast_fp16)[name = string("transpose_64")]; |
| tensor<fp16, [8, 1, 2048, 512]> reshape_17_cast_fp16 = reshape(shape = concat_17, x = transpose_17_cast_fp16)[name = string("reshape_17_cast_fp16")]; |
| tensor<int32, [4]> transpose_44_perm_0 = const()[name = string("transpose_44_perm_0"), val = tensor<int32, [4]>([1, 0, -1, -2])]; |
| tensor<int32, [4]> transpose_18_perm_0 = const()[name = string("transpose_18_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])]; |
| tensor<int32, [4]> tile_9_reps_0 = const()[name = string("tile_9_reps_0"), val = tensor<int32, [4]>([8, 1, 1, 1])]; |
| tensor<fp16, [1, 1, 2048, 512]> transpose_18_cast_fp16 = transpose(perm = transpose_18_perm_0, x = kv14_v)[name = string("transpose_63")]; |
| tensor<fp16, [8, 1, 2048, 512]> tile_9_cast_fp16 = tile(reps = tile_9_reps_0, x = transpose_18_cast_fp16)[name = string("tile_9_cast_fp16")]; |
| tensor<int32, [5]> concat_18 = const()[name = string("concat_18"), val = tensor<int32, [5]>([8, 1, 1, 2048, 512])]; |
| tensor<fp16, [8, 1, 1, 2048, 512]> reshape_18_cast_fp16 = reshape(shape = concat_18, x = tile_9_cast_fp16)[name = string("reshape_18_cast_fp16")]; |
| tensor<int32, [5]> transpose_19_perm_0 = const()[name = string("transpose_19_perm_0"), val = tensor<int32, [5]>([1, 0, 2, 3, 4])]; |
| tensor<int32, [4]> concat_19 = const()[name = string("concat_19"), val = tensor<int32, [4]>([-1, 1, 2048, 512])]; |
| tensor<fp16, [1, 8, 1, 2048, 512]> transpose_19_cast_fp16 = transpose(perm = transpose_19_perm_0, x = reshape_18_cast_fp16)[name = string("transpose_62")]; |
| tensor<fp16, [8, 1, 2048, 512]> reshape_19_cast_fp16 = reshape(shape = concat_19, x = transpose_19_cast_fp16)[name = string("reshape_19_cast_fp16")]; |
| tensor<int32, [4]> V_expanded_9_perm_0 = const()[name = string("V_expanded_9_perm_0"), val = tensor<int32, [4]>([1, 0, -2, -1])]; |
| bool attn_weights_17_transpose_x_0 = const()[name = string("attn_weights_17_transpose_x_0"), val = bool(false)]; |
| bool attn_weights_17_transpose_y_0 = const()[name = string("attn_weights_17_transpose_y_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 512, 2048]> transpose_44_cast_fp16 = transpose(perm = transpose_44_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_61")]; |
| tensor<fp16, [1, 8, 1, 2048]> attn_weights_17_cast_fp16 = matmul(transpose_x = attn_weights_17_transpose_x_0, transpose_y = attn_weights_17_transpose_y_0, x = q_29_cast_fp16, y = transpose_44_cast_fp16)[name = string("attn_weights_17_cast_fp16")]; |
| tensor<fp16, [1, 8, 1, 2048]> x_67_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask_full)[name = string("x_67_cast_fp16")]; |
| tensor<int32, [1]> reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor<int32, [1]>([-1])]; |
| bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; |
| tensor<fp16, [1, 8, 1, 1]> reduce_max_4 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_67_cast_fp16)[name = string("reduce_max_4")]; |
| tensor<fp16, [1, 8, 1, 2048]> var_2110 = sub(x = x_67_cast_fp16, y = reduce_max_4)[name = string("op_2110")]; |
| tensor<fp16, [1, 8, 1, 2048]> var_2116 = exp(x = var_2110)[name = string("op_2116")]; |
| tensor<int32, [1]> var_2126_axes_0 = const()[name = string("op_2126_axes_0"), val = tensor<int32, [1]>([-1])]; |
| bool var_2126_keep_dims_0 = const()[name = string("op_2126_keep_dims_0"), val = bool(true)]; |
| tensor<fp16, [1, 8, 1, 1]> var_2126 = reduce_sum(axes = var_2126_axes_0, keep_dims = var_2126_keep_dims_0, x = var_2116)[name = string("op_2126")]; |
| tensor<fp16, [1, 8, 1, 2048]> var_2132_cast_fp16 = real_div(x = var_2116, y = var_2126)[name = string("op_2132_cast_fp16")]; |
| bool attn_output_25_transpose_x_0 = const()[name = string("attn_output_25_transpose_x_0"), val = bool(false)]; |
| bool attn_output_25_transpose_y_0 = const()[name = string("attn_output_25_transpose_y_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 2048, 512]> V_expanded_9_cast_fp16 = transpose(perm = V_expanded_9_perm_0, x = reshape_19_cast_fp16)[name = string("transpose_60")]; |
| tensor<fp16, [1, 8, 1, 512]> attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = var_2132_cast_fp16, y = V_expanded_9_cast_fp16)[name = string("attn_output_25_cast_fp16")]; |
| tensor<int32, [4]> var_2143 = const()[name = string("op_2143"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_2150 = const()[name = string("op_2150"), val = tensor<int32, [3]>([1, 1, -1])]; |
| tensor<fp16, [1, 1, 8, 512]> var_2144_cast_fp16 = transpose(perm = var_2143, x = attn_output_25_cast_fp16)[name = string("transpose_59")]; |
| tensor<fp16, [1, 1, 4096]> attn_output_27_cast_fp16 = reshape(shape = var_2150, x = var_2144_cast_fp16)[name = string("attn_output_27_cast_fp16")]; |
| tensor<int32, [3]> var_2155 = const()[name = string("op_2155"), val = tensor<int32, [3]>([0, 2, 1])]; |
| string var_2171_pad_type_0 = const()[name = string("op_2171_pad_type_0"), val = string("valid")]; |
| int32 var_2171_groups_0 = const()[name = string("op_2171_groups_0"), val = int32(1)]; |
| tensor<int32, [1]> var_2171_strides_0 = const()[name = string("op_2171_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> var_2171_pad_0 = const()[name = string("op_2171_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> var_2171_dilations_0 = const()[name = string("op_2171_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<fp16, [1536, 4096, 1]> squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 4096, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311428224))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314574016))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; |
| tensor<fp16, [1, 4096, 1]> var_2156_cast_fp16 = transpose(perm = var_2155, x = attn_output_27_cast_fp16)[name = string("transpose_58")]; |
| tensor<fp16, [1, 1536, 1]> var_2171_cast_fp16 = conv(dilations = var_2171_dilations_0, groups = var_2171_groups_0, pad = var_2171_pad_0, pad_type = var_2171_pad_type_0, strides = var_2171_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_2156_cast_fp16)[name = string("op_2171_cast_fp16")]; |
| tensor<int32, [3]> var_2175 = const()[name = string("op_2175"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_2181 = const()[name = string("op_2181"), val = int32(-1)]; |
| fp16 const_35_promoted_to_fp16 = const()[name = string("const_35_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_71_cast_fp16 = transpose(perm = var_2175, x = var_2171_cast_fp16)[name = string("transpose_57")]; |
| tensor<fp16, [1, 1, 1536]> var_2183_cast_fp16 = mul(x = x_71_cast_fp16, y = const_35_promoted_to_fp16)[name = string("op_2183_cast_fp16")]; |
| bool input_105_interleave_0 = const()[name = string("input_105_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_105_cast_fp16 = concat(axis = var_2181, interleave = input_105_interleave_0, values = (x_71_cast_fp16, var_2183_cast_fp16))[name = string("input_105_cast_fp16")]; |
| tensor<int32, [1]> normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_2178_to_fp16 = const()[name = string("op_2178_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_2178_to_fp16, x = input_105_cast_fp16)[name = string("normed_105_cast_fp16")]; |
| tensor<int32, [2]> var_2188_split_sizes_0 = const()[name = string("op_2188_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_2188_axis_0 = const()[name = string("op_2188_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_2188_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2188_cast_fp16_1 = split(axis = var_2188_axis_0, split_sizes = var_2188_split_sizes_0, x = normed_105_cast_fp16)[name = string("op_2188_cast_fp16")]; |
| tensor<fp16, [1536]> layers_4_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314575616)))]; |
| tensor<fp16, [1, 1, 1536]> attn_output_29_cast_fp16 = mul(x = var_2188_cast_fp16_0, y = layers_4_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_29_cast_fp16")]; |
| tensor<fp16, [1, 1, 1536]> x_73_cast_fp16 = add(x = x_63_cast_fp16, y = attn_output_29_cast_fp16)[name = string("x_73_cast_fp16")]; |
| int32 var_2197 = const()[name = string("op_2197"), val = int32(-1)]; |
| fp16 const_36_promoted_to_fp16 = const()[name = string("const_36_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> var_2199_cast_fp16 = mul(x = x_73_cast_fp16, y = const_36_promoted_to_fp16)[name = string("op_2199_cast_fp16")]; |
| bool input_107_interleave_0 = const()[name = string("input_107_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_107_cast_fp16 = concat(axis = var_2197, interleave = input_107_interleave_0, values = (x_73_cast_fp16, var_2199_cast_fp16))[name = string("input_107_cast_fp16")]; |
| tensor<int32, [1]> normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_2194_to_fp16 = const()[name = string("op_2194_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_2194_to_fp16, x = input_107_cast_fp16)[name = string("normed_109_cast_fp16")]; |
| tensor<int32, [2]> var_2204_split_sizes_0 = const()[name = string("op_2204_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_2204_axis_0 = const()[name = string("op_2204_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_2204_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2204_cast_fp16_1 = split(axis = var_2204_axis_0, split_sizes = var_2204_split_sizes_0, x = normed_109_cast_fp16)[name = string("op_2204_cast_fp16")]; |
| tensor<fp16, [1536]> layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314578752)))]; |
| tensor<fp16, [1, 1, 1536]> h_27_cast_fp16 = mul(x = var_2204_cast_fp16_0, y = layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_27_cast_fp16")]; |
| tensor<int32, [3]> var_2215 = const()[name = string("op_2215"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> input_109_axes_0 = const()[name = string("input_109_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_2216 = transpose(perm = var_2215, x = h_27_cast_fp16)[name = string("transpose_56")]; |
| tensor<fp16, [1, 1536, 1, 1]> input_109 = expand_dims(axes = input_109_axes_0, x = var_2216)[name = string("input_109")]; |
| string gate_17_pad_type_0 = const()[name = string("gate_17_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gate_17_strides_0 = const()[name = string("gate_17_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gate_17_pad_0 = const()[name = string("gate_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gate_17_dilations_0 = const()[name = string("gate_17_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gate_17_groups_0 = const()[name = string("gate_17_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 12288, 1, 1]> gate_17 = conv(dilations = gate_17_dilations_0, groups = gate_17_groups_0, pad = gate_17_pad_0, pad_type = gate_17_pad_type_0, strides = gate_17_strides_0, weight = layers_4_mlp_gate_proj_weight_palettized, x = input_109)[name = string("gate_17")]; |
| string up_9_pad_type_0 = const()[name = string("up_9_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> up_9_strides_0 = const()[name = string("up_9_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> up_9_pad_0 = const()[name = string("up_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> up_9_dilations_0 = const()[name = string("up_9_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 up_9_groups_0 = const()[name = string("up_9_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 12288, 1, 1]> up_9 = conv(dilations = up_9_dilations_0, groups = up_9_groups_0, pad = up_9_pad_0, pad_type = up_9_pad_type_0, strides = up_9_strides_0, weight = layers_4_mlp_up_proj_weight_palettized, x = input_109)[name = string("up_9")]; |
| string gate_19_mode_0 = const()[name = string("gate_19_mode_0"), val = string("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 12288, 1, 1]> gate_19 = gelu(mode = gate_19_mode_0, x = gate_17)[name = string("gate_19")]; |
| tensor<fp16, [1, 12288, 1, 1]> input_111 = mul(x = gate_19, y = up_9)[name = string("input_111")]; |
| string mlp_out_9_pad_type_0 = const()[name = string("mlp_out_9_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> mlp_out_9_strides_0 = const()[name = string("mlp_out_9_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> mlp_out_9_pad_0 = const()[name = string("mlp_out_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> mlp_out_9_dilations_0 = const()[name = string("mlp_out_9_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 mlp_out_9_groups_0 = const()[name = string("mlp_out_9_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 1536, 1, 1]> mlp_out_9 = conv(dilations = mlp_out_9_dilations_0, groups = mlp_out_9_groups_0, pad = mlp_out_9_pad_0, pad_type = mlp_out_9_pad_type_0, strides = mlp_out_9_strides_0, weight = layers_4_mlp_down_proj_weight_palettized, x = input_111)[name = string("mlp_out_9")]; |
| tensor<int32, [1]> var_2256_axes_0 = const()[name = string("op_2256_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_2256 = squeeze(axes = var_2256_axes_0, x = mlp_out_9)[name = string("op_2256")]; |
| tensor<int32, [3]> var_2260 = const()[name = string("op_2260"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_2266 = const()[name = string("op_2266"), val = int32(-1)]; |
| fp16 const_37_promoted = const()[name = string("const_37_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_75 = transpose(perm = var_2260, x = var_2256)[name = string("transpose_55")]; |
| tensor<fp16, [1, 1, 1536]> var_2268 = mul(x = x_75, y = const_37_promoted)[name = string("op_2268")]; |
| bool input_113_interleave_0 = const()[name = string("input_113_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_113 = concat(axis = var_2266, interleave = input_113_interleave_0, values = (x_75, var_2268))[name = string("input_113")]; |
| tensor<int32, [1]> normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_2263_to_fp16 = const()[name = string("op_2263_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_2263_to_fp16, x = input_113)[name = string("normed_113_cast_fp16")]; |
| tensor<int32, [2]> var_2273_split_sizes_0 = const()[name = string("op_2273_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_2273_axis_0 = const()[name = string("op_2273_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_2273_0, tensor<fp16, [1, 1, 1536]> var_2273_1 = split(axis = var_2273_axis_0, split_sizes = var_2273_split_sizes_0, x = normed_113_cast_fp16)[name = string("op_2273")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_43 = mul(x = var_2273_0, y = layers_4_post_feedforward_layernorm_weight)[name = string("hidden_states_43")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_45_cast_fp16 = add(x = x_73_cast_fp16, y = hidden_states_43)[name = string("hidden_states_45_cast_fp16")]; |
| tensor<int32, [3]> per_layer_slice_9_begin_0 = const()[name = string("per_layer_slice_9_begin_0"), val = tensor<int32, [3]>([0, 0, 7424])]; |
| tensor<int32, [3]> per_layer_slice_9_end_0 = const()[name = string("per_layer_slice_9_end_0"), val = tensor<int32, [3]>([1, 1, 7680])]; |
| tensor<bool, [3]> per_layer_slice_9_end_mask_0 = const()[name = string("per_layer_slice_9_end_mask_0"), val = tensor<bool, [3]>([true, true, false])]; |
| tensor<fp16, [1, 1, 256]> per_layer_slice_9_cast_fp16 = slice_by_index(begin = per_layer_slice_9_begin_0, end = per_layer_slice_9_end_0, end_mask = per_layer_slice_9_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_9_cast_fp16")]; |
| tensor<int32, [3]> var_2301 = const()[name = string("op_2301"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> input_115_axes_0 = const()[name = string("input_115_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_2302 = transpose(perm = var_2301, x = hidden_states_45_cast_fp16)[name = string("transpose_54")]; |
| tensor<fp16, [1, 1536, 1, 1]> input_115 = expand_dims(axes = input_115_axes_0, x = var_2302)[name = string("input_115")]; |
| string gated_25_pad_type_0 = const()[name = string("gated_25_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gated_25_strides_0 = const()[name = string("gated_25_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gated_25_pad_0 = const()[name = string("gated_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gated_25_dilations_0 = const()[name = string("gated_25_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gated_25_groups_0 = const()[name = string("gated_25_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 256, 1, 1]> gated_25 = conv(dilations = gated_25_dilations_0, groups = gated_25_groups_0, pad = gated_25_pad_0, pad_type = gated_25_pad_type_0, strides = gated_25_strides_0, weight = layers_4_per_layer_input_gate_weight_palettized, x = input_115)[name = string("gated_25")]; |
| string gated_27_mode_0 = const()[name = string("gated_27_mode_0"), val = string("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 256, 1, 1]> gated_27 = gelu(mode = gated_27_mode_0, x = gated_25)[name = string("gated_27")]; |
| tensor<int32, [3]> var_2321 = const()[name = string("op_2321"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> per_layer_slice_conv_9_axes_0 = const()[name = string("per_layer_slice_conv_9_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 256, 1]> var_2322_cast_fp16 = transpose(perm = var_2321, x = per_layer_slice_9_cast_fp16)[name = string("transpose_53")]; |
| tensor<fp16, [1, 256, 1, 1]> per_layer_slice_conv_9_cast_fp16 = expand_dims(axes = per_layer_slice_conv_9_axes_0, x = var_2322_cast_fp16)[name = string("per_layer_slice_conv_9_cast_fp16")]; |
| tensor<fp16, [1, 256, 1, 1]> input_117_cast_fp16 = mul(x = gated_27, y = per_layer_slice_conv_9_cast_fp16)[name = string("input_117_cast_fp16")]; |
| string gated_29_pad_type_0 = const()[name = string("gated_29_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gated_29_strides_0 = const()[name = string("gated_29_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gated_29_pad_0 = const()[name = string("gated_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gated_29_dilations_0 = const()[name = string("gated_29_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gated_29_groups_0 = const()[name = string("gated_29_groups_0"), val = int32(1)]; |
| tensor<fp16, [1536, 256, 1, 1]> layers_4_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314581888))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314778560))))[name = string("layers_4_per_layer_projection_weight_promoted_to_fp16_palettized")]; |
| tensor<fp16, [1, 1536, 1, 1]> gated_29_cast_fp16 = conv(dilations = gated_29_dilations_0, groups = gated_29_groups_0, pad = gated_29_pad_0, pad_type = gated_29_pad_type_0, strides = gated_29_strides_0, weight = layers_4_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("gated_29_cast_fp16")]; |
| tensor<int32, [1]> var_2338_axes_0 = const()[name = string("op_2338_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_2338_cast_fp16 = squeeze(axes = var_2338_axes_0, x = gated_29_cast_fp16)[name = string("op_2338_cast_fp16")]; |
| tensor<int32, [3]> var_2342 = const()[name = string("op_2342"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_2348 = const()[name = string("op_2348"), val = int32(-1)]; |
| fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_77_cast_fp16 = transpose(perm = var_2342, x = var_2338_cast_fp16)[name = string("transpose_52")]; |
| tensor<fp16, [1, 1, 1536]> var_2350_cast_fp16 = mul(x = x_77_cast_fp16, y = const_38_promoted_to_fp16)[name = string("op_2350_cast_fp16")]; |
| bool input_119_interleave_0 = const()[name = string("input_119_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_119_cast_fp16 = concat(axis = var_2348, interleave = input_119_interleave_0, values = (x_77_cast_fp16, var_2350_cast_fp16))[name = string("input_119_cast_fp16")]; |
| tensor<int32, [1]> normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_2345_to_fp16 = const()[name = string("op_2345_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_2345_to_fp16, x = input_119_cast_fp16)[name = string("normed_117_cast_fp16")]; |
| tensor<int32, [2]> var_2355_split_sizes_0 = const()[name = string("op_2355_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_2355_axis_0 = const()[name = string("op_2355_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_2355_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2355_cast_fp16_1 = split(axis = var_2355_axis_0, split_sizes = var_2355_split_sizes_0, x = normed_117_cast_fp16)[name = string("op_2355_cast_fp16")]; |
| tensor<fp16, [1536]> layers_4_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_4_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314780160)))]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_49_cast_fp16 = mul(x = var_2355_cast_fp16_0, y = layers_4_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_49_cast_fp16")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_51_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = hidden_states_49_cast_fp16)[name = string("hidden_states_51_cast_fp16")]; |
| tensor<fp16, [1]> const_39_promoted_to_fp16 = const()[name = string("const_39_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.ap-1])]; |
| tensor<fp16, [1, 1, 1536]> x_79_cast_fp16 = mul(x = hidden_states_51_cast_fp16, y = const_39_promoted_to_fp16)[name = string("x_79_cast_fp16")]; |
| int32 var_2370 = const()[name = string("op_2370"), val = int32(-1)]; |
| fp16 const_40_promoted_to_fp16 = const()[name = string("const_40_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> var_2372_cast_fp16 = mul(x = x_79_cast_fp16, y = const_40_promoted_to_fp16)[name = string("op_2372_cast_fp16")]; |
| bool input_121_interleave_0 = const()[name = string("input_121_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_121_cast_fp16 = concat(axis = var_2370, interleave = input_121_interleave_0, values = (x_79_cast_fp16, var_2372_cast_fp16))[name = string("input_121_cast_fp16")]; |
| tensor<int32, [1]> normed_121_axes_0 = const()[name = string("normed_121_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_2367_to_fp16 = const()[name = string("op_2367_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_121_cast_fp16 = layer_norm(axes = normed_121_axes_0, epsilon = var_2367_to_fp16, x = input_121_cast_fp16)[name = string("normed_121_cast_fp16")]; |
| tensor<int32, [2]> var_2377_split_sizes_0 = const()[name = string("op_2377_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_2377_axis_0 = const()[name = string("op_2377_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_2377_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2377_cast_fp16_1 = split(axis = var_2377_axis_0, split_sizes = var_2377_split_sizes_0, x = normed_121_cast_fp16)[name = string("op_2377_cast_fp16")]; |
| tensor<fp16, [1536]> layers_5_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314783296)))]; |
| tensor<fp16, [1, 1, 1536]> h_31_cast_fp16 = mul(x = var_2377_cast_fp16_0, y = layers_5_input_layernorm_weight_promoted_to_fp16)[name = string("h_31_cast_fp16")]; |
| tensor<int32, [3]> var_2383 = const()[name = string("op_2383"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> var_2386_axes_0 = const()[name = string("op_2386_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_2384_cast_fp16 = transpose(perm = var_2383, x = h_31_cast_fp16)[name = string("transpose_51")]; |
| tensor<fp16, [1, 1536, 1, 1]> var_2386_cast_fp16 = expand_dims(axes = var_2386_axes_0, x = var_2384_cast_fp16)[name = string("op_2386_cast_fp16")]; |
| string var_2402_pad_type_0 = const()[name = string("op_2402_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> var_2402_strides_0 = const()[name = string("op_2402_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> var_2402_pad_0 = const()[name = string("op_2402_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> var_2402_dilations_0 = const()[name = string("op_2402_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 var_2402_groups_0 = const()[name = string("op_2402_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 2048, 1, 1]> var_2402 = conv(dilations = var_2402_dilations_0, groups = var_2402_groups_0, pad = var_2402_pad_0, pad_type = var_2402_pad_type_0, strides = var_2402_strides_0, weight = layers_5_self_attn_q_proj_weight_palettized, x = var_2386_cast_fp16)[name = string("op_2402")]; |
| tensor<int32, [4]> var_2407 = const()[name = string("op_2407"), val = tensor<int32, [4]>([1, 8, 256, 1])]; |
| tensor<fp16, [1, 8, 256, 1]> var_2408 = reshape(shape = var_2407, x = var_2402)[name = string("op_2408")]; |
| tensor<int32, [4]> var_2413 = const()[name = string("op_2413"), val = tensor<int32, [4]>([0, 1, 3, 2])]; |
| tensor<int32, [3]> var_2423 = const()[name = string("op_2423"), val = tensor<int32, [3]>([1, 8, 256])]; |
| tensor<fp16, [1, 8, 1, 256]> var_2414 = transpose(perm = var_2413, x = var_2408)[name = string("transpose_50")]; |
| tensor<fp16, [1, 8, 256]> x_81 = reshape(shape = var_2423, x = var_2414)[name = string("x_81")]; |
| int32 var_2429 = const()[name = string("op_2429"), val = int32(-1)]; |
| fp16 const_41_promoted = const()[name = string("const_41_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 8, 256]> var_2431 = mul(x = x_81, y = const_41_promoted)[name = string("op_2431")]; |
| bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 512]> input_125 = concat(axis = var_2429, interleave = input_125_interleave_0, values = (x_81, var_2431))[name = string("input_125")]; |
| tensor<int32, [1]> normed_125_axes_0 = const()[name = string("normed_125_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_2426_to_fp16 = const()[name = string("op_2426_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 8, 512]> normed_125_cast_fp16 = layer_norm(axes = normed_125_axes_0, epsilon = var_2426_to_fp16, x = input_125)[name = string("normed_125_cast_fp16")]; |
| tensor<int32, [2]> var_2436_split_sizes_0 = const()[name = string("op_2436_split_sizes_0"), val = tensor<int32, [2]>([256, 256])]; |
| int32 var_2436_axis_0 = const()[name = string("op_2436_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 8, 256]> var_2436_0, tensor<fp16, [1, 8, 256]> var_2436_1 = split(axis = var_2436_axis_0, split_sizes = var_2436_split_sizes_0, x = normed_125_cast_fp16)[name = string("op_2436")]; |
| tensor<fp16, [1, 8, 256]> var_2438 = mul(x = var_2436_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_2438")]; |
| tensor<int32, [4]> var_2443 = const()[name = string("op_2443"), val = tensor<int32, [4]>([1, 8, 1, 256])]; |
| tensor<fp16, [1, 8, 1, 256]> q_33 = reshape(shape = var_2443, x = var_2438)[name = string("q_33")]; |
| tensor<fp16, [1, 8, 1, 256]> var_2445_cast_fp16 = mul(x = q_33, y = cos_s)[name = string("op_2445_cast_fp16")]; |
| tensor<int32, [2]> var_2446_split_sizes_0 = const()[name = string("op_2446_split_sizes_0"), val = tensor<int32, [2]>([128, 128])]; |
| int32 var_2446_axis_0 = const()[name = string("op_2446_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 8, 1, 128]> var_2446_0, tensor<fp16, [1, 8, 1, 128]> var_2446_1 = split(axis = var_2446_axis_0, split_sizes = var_2446_split_sizes_0, x = q_33)[name = string("op_2446")]; |
| fp16 const_42_promoted = const()[name = string("const_42_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 8, 1, 128]> var_2448 = mul(x = var_2446_1, y = const_42_promoted)[name = string("op_2448")]; |
| int32 var_2450 = const()[name = string("op_2450"), val = int32(-1)]; |
| bool var_2451_interleave_0 = const()[name = string("op_2451_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1, 256]> var_2451 = concat(axis = var_2450, interleave = var_2451_interleave_0, values = (var_2448, var_2446_0))[name = string("op_2451")]; |
| tensor<fp16, [1, 8, 1, 256]> var_2452_cast_fp16 = mul(x = var_2451, y = sin_s)[name = string("op_2452_cast_fp16")]; |
| tensor<fp16, [1, 8, 1, 256]> q_35_cast_fp16 = add(x = var_2445_cast_fp16, y = var_2452_cast_fp16)[name = string("q_35_cast_fp16")]; |
| bool attn_weights_21_transpose_x_0 = const()[name = string("attn_weights_21_transpose_x_0"), val = bool(false)]; |
| bool attn_weights_21_transpose_y_0 = const()[name = string("attn_weights_21_transpose_y_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1, 512]> attn_weights_21_cast_fp16 = matmul(transpose_x = attn_weights_21_transpose_x_0, transpose_y = attn_weights_21_transpose_y_0, x = q_35_cast_fp16, y = transpose_40_cast_fp16)[name = string("attn_weights_21_cast_fp16")]; |
| tensor<fp16, [1, 8, 1, 512]> x_83_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask_sliding)[name = string("x_83_cast_fp16")]; |
| tensor<int32, [1]> reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor<int32, [1]>([-1])]; |
| bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; |
| tensor<fp16, [1, 8, 1, 1]> reduce_max_5 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_83_cast_fp16)[name = string("reduce_max_5")]; |
| tensor<fp16, [1, 8, 1, 512]> var_2484 = sub(x = x_83_cast_fp16, y = reduce_max_5)[name = string("op_2484")]; |
| tensor<fp16, [1, 8, 1, 512]> var_2490 = exp(x = var_2484)[name = string("op_2490")]; |
| tensor<int32, [1]> var_2500_axes_0 = const()[name = string("op_2500_axes_0"), val = tensor<int32, [1]>([-1])]; |
| bool var_2500_keep_dims_0 = const()[name = string("op_2500_keep_dims_0"), val = bool(true)]; |
| tensor<fp16, [1, 8, 1, 1]> var_2500 = reduce_sum(axes = var_2500_axes_0, keep_dims = var_2500_keep_dims_0, x = var_2490)[name = string("op_2500")]; |
| tensor<fp16, [1, 8, 1, 512]> var_2506_cast_fp16 = real_div(x = var_2490, y = var_2500)[name = string("op_2506_cast_fp16")]; |
| bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; |
| bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1, 256]> attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = var_2506_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_31_cast_fp16")]; |
| tensor<int32, [4]> var_2517 = const()[name = string("op_2517"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_2524 = const()[name = string("op_2524"), val = tensor<int32, [3]>([1, 1, -1])]; |
| tensor<fp16, [1, 1, 8, 256]> var_2518_cast_fp16 = transpose(perm = var_2517, x = attn_output_31_cast_fp16)[name = string("transpose_49")]; |
| tensor<fp16, [1, 1, 2048]> attn_output_33_cast_fp16 = reshape(shape = var_2524, x = var_2518_cast_fp16)[name = string("attn_output_33_cast_fp16")]; |
| tensor<int32, [3]> var_2529 = const()[name = string("op_2529"), val = tensor<int32, [3]>([0, 2, 1])]; |
| string var_2545_pad_type_0 = const()[name = string("op_2545_pad_type_0"), val = string("valid")]; |
| int32 var_2545_groups_0 = const()[name = string("op_2545_groups_0"), val = int32(1)]; |
| tensor<int32, [1]> var_2545_strides_0 = const()[name = string("op_2545_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> var_2545_pad_0 = const()[name = string("op_2545_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> var_2545_dilations_0 = const()[name = string("op_2545_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<fp16, [1536, 2048, 1]> squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314786432))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316359360))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; |
| tensor<fp16, [1, 2048, 1]> var_2530_cast_fp16 = transpose(perm = var_2529, x = attn_output_33_cast_fp16)[name = string("transpose_48")]; |
| tensor<fp16, [1, 1536, 1]> var_2545_cast_fp16 = conv(dilations = var_2545_dilations_0, groups = var_2545_groups_0, pad = var_2545_pad_0, pad_type = var_2545_pad_type_0, strides = var_2545_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_2530_cast_fp16)[name = string("op_2545_cast_fp16")]; |
| tensor<int32, [3]> var_2549 = const()[name = string("op_2549"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_2555 = const()[name = string("op_2555"), val = int32(-1)]; |
| fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_87_cast_fp16 = transpose(perm = var_2549, x = var_2545_cast_fp16)[name = string("transpose_47")]; |
| tensor<fp16, [1, 1, 1536]> var_2557_cast_fp16 = mul(x = x_87_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_2557_cast_fp16")]; |
| bool input_129_interleave_0 = const()[name = string("input_129_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_129_cast_fp16 = concat(axis = var_2555, interleave = input_129_interleave_0, values = (x_87_cast_fp16, var_2557_cast_fp16))[name = string("input_129_cast_fp16")]; |
| tensor<int32, [1]> normed_129_axes_0 = const()[name = string("normed_129_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_2552_to_fp16 = const()[name = string("op_2552_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_129_cast_fp16 = layer_norm(axes = normed_129_axes_0, epsilon = var_2552_to_fp16, x = input_129_cast_fp16)[name = string("normed_129_cast_fp16")]; |
| tensor<int32, [2]> var_2562_split_sizes_0 = const()[name = string("op_2562_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_2562_axis_0 = const()[name = string("op_2562_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_2562_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2562_cast_fp16_1 = split(axis = var_2562_axis_0, split_sizes = var_2562_split_sizes_0, x = normed_129_cast_fp16)[name = string("op_2562_cast_fp16")]; |
| tensor<fp16, [1536]> layers_5_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316360960)))]; |
| tensor<fp16, [1, 1, 1536]> attn_output_35_cast_fp16 = mul(x = var_2562_cast_fp16_0, y = layers_5_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_35_cast_fp16")]; |
| tensor<fp16, [1, 1, 1536]> x_89_cast_fp16 = add(x = x_79_cast_fp16, y = attn_output_35_cast_fp16)[name = string("x_89_cast_fp16")]; |
| int32 var_2571 = const()[name = string("op_2571"), val = int32(-1)]; |
| fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> var_2573_cast_fp16 = mul(x = x_89_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_2573_cast_fp16")]; |
| bool input_131_interleave_0 = const()[name = string("input_131_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_131_cast_fp16 = concat(axis = var_2571, interleave = input_131_interleave_0, values = (x_89_cast_fp16, var_2573_cast_fp16))[name = string("input_131_cast_fp16")]; |
| tensor<int32, [1]> normed_133_axes_0 = const()[name = string("normed_133_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_2568_to_fp16 = const()[name = string("op_2568_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_133_cast_fp16 = layer_norm(axes = normed_133_axes_0, epsilon = var_2568_to_fp16, x = input_131_cast_fp16)[name = string("normed_133_cast_fp16")]; |
| tensor<int32, [2]> var_2578_split_sizes_0 = const()[name = string("op_2578_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_2578_axis_0 = const()[name = string("op_2578_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_2578_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2578_cast_fp16_1 = split(axis = var_2578_axis_0, split_sizes = var_2578_split_sizes_0, x = normed_133_cast_fp16)[name = string("op_2578_cast_fp16")]; |
| tensor<fp16, [1536]> layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316364096)))]; |
| tensor<fp16, [1, 1, 1536]> h_33_cast_fp16 = mul(x = var_2578_cast_fp16_0, y = layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_33_cast_fp16")]; |
| tensor<int32, [3]> var_2589 = const()[name = string("op_2589"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> input_133_axes_0 = const()[name = string("input_133_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_2590 = transpose(perm = var_2589, x = h_33_cast_fp16)[name = string("transpose_46")]; |
| tensor<fp16, [1, 1536, 1, 1]> input_133 = expand_dims(axes = input_133_axes_0, x = var_2590)[name = string("input_133")]; |
| string gate_21_pad_type_0 = const()[name = string("gate_21_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gate_21_strides_0 = const()[name = string("gate_21_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gate_21_pad_0 = const()[name = string("gate_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gate_21_dilations_0 = const()[name = string("gate_21_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gate_21_groups_0 = const()[name = string("gate_21_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 12288, 1, 1]> gate_21 = conv(dilations = gate_21_dilations_0, groups = gate_21_groups_0, pad = gate_21_pad_0, pad_type = gate_21_pad_type_0, strides = gate_21_strides_0, weight = layers_5_mlp_gate_proj_weight_palettized, x = input_133)[name = string("gate_21")]; |
| string up_11_pad_type_0 = const()[name = string("up_11_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> up_11_strides_0 = const()[name = string("up_11_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> up_11_pad_0 = const()[name = string("up_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> up_11_dilations_0 = const()[name = string("up_11_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 up_11_groups_0 = const()[name = string("up_11_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 12288, 1, 1]> up_11 = conv(dilations = up_11_dilations_0, groups = up_11_groups_0, pad = up_11_pad_0, pad_type = up_11_pad_type_0, strides = up_11_strides_0, weight = layers_5_mlp_up_proj_weight_palettized, x = input_133)[name = string("up_11")]; |
| string gate_23_mode_0 = const()[name = string("gate_23_mode_0"), val = string("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 12288, 1, 1]> gate_23 = gelu(mode = gate_23_mode_0, x = gate_21)[name = string("gate_23")]; |
| tensor<fp16, [1, 12288, 1, 1]> input_135 = mul(x = gate_23, y = up_11)[name = string("input_135")]; |
| string mlp_out_11_pad_type_0 = const()[name = string("mlp_out_11_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> mlp_out_11_strides_0 = const()[name = string("mlp_out_11_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> mlp_out_11_pad_0 = const()[name = string("mlp_out_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> mlp_out_11_dilations_0 = const()[name = string("mlp_out_11_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 mlp_out_11_groups_0 = const()[name = string("mlp_out_11_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 1536, 1, 1]> mlp_out_11 = conv(dilations = mlp_out_11_dilations_0, groups = mlp_out_11_groups_0, pad = mlp_out_11_pad_0, pad_type = mlp_out_11_pad_type_0, strides = mlp_out_11_strides_0, weight = layers_5_mlp_down_proj_weight_palettized, x = input_135)[name = string("mlp_out_11")]; |
| tensor<int32, [1]> var_2630_axes_0 = const()[name = string("op_2630_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_2630 = squeeze(axes = var_2630_axes_0, x = mlp_out_11)[name = string("op_2630")]; |
| tensor<int32, [3]> var_2634 = const()[name = string("op_2634"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_2640 = const()[name = string("op_2640"), val = int32(-1)]; |
| fp16 const_45_promoted = const()[name = string("const_45_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_91 = transpose(perm = var_2634, x = var_2630)[name = string("transpose_45")]; |
| tensor<fp16, [1, 1, 1536]> var_2642 = mul(x = x_91, y = const_45_promoted)[name = string("op_2642")]; |
| bool input_137_interleave_0 = const()[name = string("input_137_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_137 = concat(axis = var_2640, interleave = input_137_interleave_0, values = (x_91, var_2642))[name = string("input_137")]; |
| tensor<int32, [1]> normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_2637_to_fp16 = const()[name = string("op_2637_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_2637_to_fp16, x = input_137)[name = string("normed_137_cast_fp16")]; |
| tensor<int32, [2]> var_2647_split_sizes_0 = const()[name = string("op_2647_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_2647_axis_0 = const()[name = string("op_2647_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_2647_0, tensor<fp16, [1, 1, 1536]> var_2647_1 = split(axis = var_2647_axis_0, split_sizes = var_2647_split_sizes_0, x = normed_137_cast_fp16)[name = string("op_2647")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_53 = mul(x = var_2647_0, y = layers_5_post_feedforward_layernorm_weight)[name = string("hidden_states_53")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_55_cast_fp16 = add(x = x_89_cast_fp16, y = hidden_states_53)[name = string("hidden_states_55_cast_fp16")]; |
| tensor<int32, [3]> per_layer_slice_11_begin_0 = const()[name = string("per_layer_slice_11_begin_0"), val = tensor<int32, [3]>([0, 0, 7680])]; |
| tensor<int32, [3]> per_layer_slice_11_end_0 = const()[name = string("per_layer_slice_11_end_0"), val = tensor<int32, [3]>([1, 1, 7936])]; |
| tensor<bool, [3]> per_layer_slice_11_end_mask_0 = const()[name = string("per_layer_slice_11_end_mask_0"), val = tensor<bool, [3]>([true, true, false])]; |
| tensor<fp16, [1, 1, 256]> per_layer_slice_11_cast_fp16 = slice_by_index(begin = per_layer_slice_11_begin_0, end = per_layer_slice_11_end_0, end_mask = per_layer_slice_11_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_11_cast_fp16")]; |
| tensor<int32, [3]> var_2675 = const()[name = string("op_2675"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> input_139_axes_0 = const()[name = string("input_139_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_2676 = transpose(perm = var_2675, x = hidden_states_55_cast_fp16)[name = string("transpose_44")]; |
| tensor<fp16, [1, 1536, 1, 1]> input_139 = expand_dims(axes = input_139_axes_0, x = var_2676)[name = string("input_139")]; |
| string gated_31_pad_type_0 = const()[name = string("gated_31_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gated_31_strides_0 = const()[name = string("gated_31_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gated_31_pad_0 = const()[name = string("gated_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gated_31_dilations_0 = const()[name = string("gated_31_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gated_31_groups_0 = const()[name = string("gated_31_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 256, 1, 1]> gated_31 = conv(dilations = gated_31_dilations_0, groups = gated_31_groups_0, pad = gated_31_pad_0, pad_type = gated_31_pad_type_0, strides = gated_31_strides_0, weight = layers_5_per_layer_input_gate_weight_palettized, x = input_139)[name = string("gated_31")]; |
| string gated_33_mode_0 = const()[name = string("gated_33_mode_0"), val = string("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 256, 1, 1]> gated_33 = gelu(mode = gated_33_mode_0, x = gated_31)[name = string("gated_33")]; |
| tensor<int32, [3]> var_2695 = const()[name = string("op_2695"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> per_layer_slice_conv_11_axes_0 = const()[name = string("per_layer_slice_conv_11_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 256, 1]> var_2696_cast_fp16 = transpose(perm = var_2695, x = per_layer_slice_11_cast_fp16)[name = string("transpose_43")]; |
| tensor<fp16, [1, 256, 1, 1]> per_layer_slice_conv_11_cast_fp16 = expand_dims(axes = per_layer_slice_conv_11_axes_0, x = var_2696_cast_fp16)[name = string("per_layer_slice_conv_11_cast_fp16")]; |
| tensor<fp16, [1, 256, 1, 1]> input_141_cast_fp16 = mul(x = gated_33, y = per_layer_slice_conv_11_cast_fp16)[name = string("input_141_cast_fp16")]; |
| string gated_35_pad_type_0 = const()[name = string("gated_35_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gated_35_strides_0 = const()[name = string("gated_35_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gated_35_pad_0 = const()[name = string("gated_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gated_35_dilations_0 = const()[name = string("gated_35_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gated_35_groups_0 = const()[name = string("gated_35_groups_0"), val = int32(1)]; |
| tensor<fp16, [1536, 256, 1, 1]> layers_5_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316367232))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316563904))))[name = string("layers_5_per_layer_projection_weight_promoted_to_fp16_palettized")]; |
| tensor<fp16, [1, 1536, 1, 1]> gated_35_cast_fp16 = conv(dilations = gated_35_dilations_0, groups = gated_35_groups_0, pad = gated_35_pad_0, pad_type = gated_35_pad_type_0, strides = gated_35_strides_0, weight = layers_5_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_141_cast_fp16)[name = string("gated_35_cast_fp16")]; |
| tensor<int32, [1]> var_2712_axes_0 = const()[name = string("op_2712_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_2712_cast_fp16 = squeeze(axes = var_2712_axes_0, x = gated_35_cast_fp16)[name = string("op_2712_cast_fp16")]; |
| tensor<int32, [3]> var_2716 = const()[name = string("op_2716"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_2722 = const()[name = string("op_2722"), val = int32(-1)]; |
| fp16 const_46_promoted_to_fp16 = const()[name = string("const_46_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_93_cast_fp16 = transpose(perm = var_2716, x = var_2712_cast_fp16)[name = string("transpose_42")]; |
| tensor<fp16, [1, 1, 1536]> var_2724_cast_fp16 = mul(x = x_93_cast_fp16, y = const_46_promoted_to_fp16)[name = string("op_2724_cast_fp16")]; |
| bool input_143_interleave_0 = const()[name = string("input_143_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_143_cast_fp16 = concat(axis = var_2722, interleave = input_143_interleave_0, values = (x_93_cast_fp16, var_2724_cast_fp16))[name = string("input_143_cast_fp16")]; |
| tensor<int32, [1]> normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_2719_to_fp16 = const()[name = string("op_2719_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_2719_to_fp16, x = input_143_cast_fp16)[name = string("normed_141_cast_fp16")]; |
| tensor<int32, [2]> var_2729_split_sizes_0 = const()[name = string("op_2729_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_2729_axis_0 = const()[name = string("op_2729_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_2729_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2729_cast_fp16_1 = split(axis = var_2729_axis_0, split_sizes = var_2729_split_sizes_0, x = normed_141_cast_fp16)[name = string("op_2729_cast_fp16")]; |
| tensor<fp16, [1536]> layers_5_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_5_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316565504)))]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_59_cast_fp16 = mul(x = var_2729_cast_fp16_0, y = layers_5_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_59_cast_fp16")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_61_cast_fp16 = add(x = hidden_states_55_cast_fp16, y = hidden_states_59_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; |
| tensor<fp16, [1]> const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.bep-1])]; |
| tensor<fp16, [1, 1, 1536]> x_95_cast_fp16 = mul(x = hidden_states_61_cast_fp16, y = const_47_promoted_to_fp16)[name = string("x_95_cast_fp16")]; |
| int32 var_2744 = const()[name = string("op_2744"), val = int32(-1)]; |
| fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> var_2746_cast_fp16 = mul(x = x_95_cast_fp16, y = const_48_promoted_to_fp16)[name = string("op_2746_cast_fp16")]; |
| bool input_145_interleave_0 = const()[name = string("input_145_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_145_cast_fp16 = concat(axis = var_2744, interleave = input_145_interleave_0, values = (x_95_cast_fp16, var_2746_cast_fp16))[name = string("input_145_cast_fp16")]; |
| tensor<int32, [1]> normed_145_axes_0 = const()[name = string("normed_145_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_2741_to_fp16 = const()[name = string("op_2741_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_145_cast_fp16 = layer_norm(axes = normed_145_axes_0, epsilon = var_2741_to_fp16, x = input_145_cast_fp16)[name = string("normed_145_cast_fp16")]; |
| tensor<int32, [2]> var_2751_split_sizes_0 = const()[name = string("op_2751_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_2751_axis_0 = const()[name = string("op_2751_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_2751_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2751_cast_fp16_1 = split(axis = var_2751_axis_0, split_sizes = var_2751_split_sizes_0, x = normed_145_cast_fp16)[name = string("op_2751_cast_fp16")]; |
| tensor<fp16, [1536]> layers_6_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316568640)))]; |
| tensor<fp16, [1, 1, 1536]> h_37_cast_fp16 = mul(x = var_2751_cast_fp16_0, y = layers_6_input_layernorm_weight_promoted_to_fp16)[name = string("h_37_cast_fp16")]; |
| tensor<int32, [3]> var_2757 = const()[name = string("op_2757"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> var_2760_axes_0 = const()[name = string("op_2760_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_2758_cast_fp16 = transpose(perm = var_2757, x = h_37_cast_fp16)[name = string("transpose_41")]; |
| tensor<fp16, [1, 1536, 1, 1]> var_2760_cast_fp16 = expand_dims(axes = var_2760_axes_0, x = var_2758_cast_fp16)[name = string("op_2760_cast_fp16")]; |
| string var_2776_pad_type_0 = const()[name = string("op_2776_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> var_2776_strides_0 = const()[name = string("op_2776_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> var_2776_pad_0 = const()[name = string("op_2776_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> var_2776_dilations_0 = const()[name = string("op_2776_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 var_2776_groups_0 = const()[name = string("op_2776_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 2048, 1, 1]> var_2776 = conv(dilations = var_2776_dilations_0, groups = var_2776_groups_0, pad = var_2776_pad_0, pad_type = var_2776_pad_type_0, strides = var_2776_strides_0, weight = layers_6_self_attn_q_proj_weight_palettized, x = var_2760_cast_fp16)[name = string("op_2776")]; |
| tensor<int32, [4]> var_2781 = const()[name = string("op_2781"), val = tensor<int32, [4]>([1, 8, 256, 1])]; |
| tensor<fp16, [1, 8, 256, 1]> var_2782 = reshape(shape = var_2781, x = var_2776)[name = string("op_2782")]; |
| tensor<int32, [4]> var_2787 = const()[name = string("op_2787"), val = tensor<int32, [4]>([0, 1, 3, 2])]; |
| tensor<int32, [3]> var_2797 = const()[name = string("op_2797"), val = tensor<int32, [3]>([1, 8, 256])]; |
| tensor<fp16, [1, 8, 1, 256]> var_2788 = transpose(perm = var_2787, x = var_2782)[name = string("transpose_40")]; |
| tensor<fp16, [1, 8, 256]> x_97 = reshape(shape = var_2797, x = var_2788)[name = string("x_97")]; |
| int32 var_2803 = const()[name = string("op_2803"), val = int32(-1)]; |
| fp16 const_49_promoted = const()[name = string("const_49_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 8, 256]> var_2805 = mul(x = x_97, y = const_49_promoted)[name = string("op_2805")]; |
| bool input_149_interleave_0 = const()[name = string("input_149_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 512]> input_149 = concat(axis = var_2803, interleave = input_149_interleave_0, values = (x_97, var_2805))[name = string("input_149")]; |
| tensor<int32, [1]> normed_149_axes_0 = const()[name = string("normed_149_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_2800_to_fp16 = const()[name = string("op_2800_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 8, 512]> normed_149_cast_fp16 = layer_norm(axes = normed_149_axes_0, epsilon = var_2800_to_fp16, x = input_149)[name = string("normed_149_cast_fp16")]; |
| tensor<int32, [2]> var_2810_split_sizes_0 = const()[name = string("op_2810_split_sizes_0"), val = tensor<int32, [2]>([256, 256])]; |
| int32 var_2810_axis_0 = const()[name = string("op_2810_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 8, 256]> var_2810_0, tensor<fp16, [1, 8, 256]> var_2810_1 = split(axis = var_2810_axis_0, split_sizes = var_2810_split_sizes_0, x = normed_149_cast_fp16)[name = string("op_2810")]; |
| tensor<fp16, [1, 8, 256]> var_2812 = mul(x = var_2810_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_2812")]; |
| tensor<int32, [4]> var_2817 = const()[name = string("op_2817"), val = tensor<int32, [4]>([1, 8, 1, 256])]; |
| tensor<fp16, [1, 8, 1, 256]> q_39 = reshape(shape = var_2817, x = var_2812)[name = string("q_39")]; |
| tensor<fp16, [1, 8, 1, 256]> var_2819_cast_fp16 = mul(x = q_39, y = cos_s)[name = string("op_2819_cast_fp16")]; |
| tensor<int32, [2]> var_2820_split_sizes_0 = const()[name = string("op_2820_split_sizes_0"), val = tensor<int32, [2]>([128, 128])]; |
| int32 var_2820_axis_0 = const()[name = string("op_2820_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 8, 1, 128]> var_2820_0, tensor<fp16, [1, 8, 1, 128]> var_2820_1 = split(axis = var_2820_axis_0, split_sizes = var_2820_split_sizes_0, x = q_39)[name = string("op_2820")]; |
| fp16 const_50_promoted = const()[name = string("const_50_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 8, 1, 128]> var_2822 = mul(x = var_2820_1, y = const_50_promoted)[name = string("op_2822")]; |
| int32 var_2824 = const()[name = string("op_2824"), val = int32(-1)]; |
| bool var_2825_interleave_0 = const()[name = string("op_2825_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1, 256]> var_2825 = concat(axis = var_2824, interleave = var_2825_interleave_0, values = (var_2822, var_2820_0))[name = string("op_2825")]; |
| tensor<fp16, [1, 8, 1, 256]> var_2826_cast_fp16 = mul(x = var_2825, y = sin_s)[name = string("op_2826_cast_fp16")]; |
| tensor<fp16, [1, 8, 1, 256]> q_41_cast_fp16 = add(x = var_2819_cast_fp16, y = var_2826_cast_fp16)[name = string("q_41_cast_fp16")]; |
| bool attn_weights_25_transpose_x_0 = const()[name = string("attn_weights_25_transpose_x_0"), val = bool(false)]; |
| bool attn_weights_25_transpose_y_0 = const()[name = string("attn_weights_25_transpose_y_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1, 512]> attn_weights_25_cast_fp16 = matmul(transpose_x = attn_weights_25_transpose_x_0, transpose_y = attn_weights_25_transpose_y_0, x = q_41_cast_fp16, y = transpose_40_cast_fp16)[name = string("attn_weights_25_cast_fp16")]; |
| tensor<fp16, [1, 8, 1, 512]> x_99_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask_sliding)[name = string("x_99_cast_fp16")]; |
| tensor<int32, [1]> reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor<int32, [1]>([-1])]; |
| bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; |
| tensor<fp16, [1, 8, 1, 1]> reduce_max_6 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_99_cast_fp16)[name = string("reduce_max_6")]; |
| tensor<fp16, [1, 8, 1, 512]> var_2858 = sub(x = x_99_cast_fp16, y = reduce_max_6)[name = string("op_2858")]; |
| tensor<fp16, [1, 8, 1, 512]> var_2864 = exp(x = var_2858)[name = string("op_2864")]; |
| tensor<int32, [1]> var_2874_axes_0 = const()[name = string("op_2874_axes_0"), val = tensor<int32, [1]>([-1])]; |
| bool var_2874_keep_dims_0 = const()[name = string("op_2874_keep_dims_0"), val = bool(true)]; |
| tensor<fp16, [1, 8, 1, 1]> var_2874 = reduce_sum(axes = var_2874_axes_0, keep_dims = var_2874_keep_dims_0, x = var_2864)[name = string("op_2874")]; |
| tensor<fp16, [1, 8, 1, 512]> var_2880_cast_fp16 = real_div(x = var_2864, y = var_2874)[name = string("op_2880_cast_fp16")]; |
| bool attn_output_37_transpose_x_0 = const()[name = string("attn_output_37_transpose_x_0"), val = bool(false)]; |
| bool attn_output_37_transpose_y_0 = const()[name = string("attn_output_37_transpose_y_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1, 256]> attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = var_2880_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_37_cast_fp16")]; |
| tensor<int32, [4]> var_2891 = const()[name = string("op_2891"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_2898 = const()[name = string("op_2898"), val = tensor<int32, [3]>([1, 1, -1])]; |
| tensor<fp16, [1, 1, 8, 256]> var_2892_cast_fp16 = transpose(perm = var_2891, x = attn_output_37_cast_fp16)[name = string("transpose_39")]; |
| tensor<fp16, [1, 1, 2048]> attn_output_39_cast_fp16 = reshape(shape = var_2898, x = var_2892_cast_fp16)[name = string("attn_output_39_cast_fp16")]; |
| tensor<int32, [3]> var_2903 = const()[name = string("op_2903"), val = tensor<int32, [3]>([0, 2, 1])]; |
| string var_2919_pad_type_0 = const()[name = string("op_2919_pad_type_0"), val = string("valid")]; |
| int32 var_2919_groups_0 = const()[name = string("op_2919_groups_0"), val = int32(1)]; |
| tensor<int32, [1]> var_2919_strides_0 = const()[name = string("op_2919_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> var_2919_pad_0 = const()[name = string("op_2919_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> var_2919_dilations_0 = const()[name = string("op_2919_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<fp16, [1536, 2048, 1]> squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316571776))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318144704))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; |
| tensor<fp16, [1, 2048, 1]> var_2904_cast_fp16 = transpose(perm = var_2903, x = attn_output_39_cast_fp16)[name = string("transpose_38")]; |
| tensor<fp16, [1, 1536, 1]> var_2919_cast_fp16 = conv(dilations = var_2919_dilations_0, groups = var_2919_groups_0, pad = var_2919_pad_0, pad_type = var_2919_pad_type_0, strides = var_2919_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_2904_cast_fp16)[name = string("op_2919_cast_fp16")]; |
| tensor<int32, [3]> var_2923 = const()[name = string("op_2923"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_2929 = const()[name = string("op_2929"), val = int32(-1)]; |
| fp16 const_51_promoted_to_fp16 = const()[name = string("const_51_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_103_cast_fp16 = transpose(perm = var_2923, x = var_2919_cast_fp16)[name = string("transpose_37")]; |
| tensor<fp16, [1, 1, 1536]> var_2931_cast_fp16 = mul(x = x_103_cast_fp16, y = const_51_promoted_to_fp16)[name = string("op_2931_cast_fp16")]; |
| bool input_153_interleave_0 = const()[name = string("input_153_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_153_cast_fp16 = concat(axis = var_2929, interleave = input_153_interleave_0, values = (x_103_cast_fp16, var_2931_cast_fp16))[name = string("input_153_cast_fp16")]; |
| tensor<int32, [1]> normed_153_axes_0 = const()[name = string("normed_153_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_2926_to_fp16 = const()[name = string("op_2926_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_153_cast_fp16 = layer_norm(axes = normed_153_axes_0, epsilon = var_2926_to_fp16, x = input_153_cast_fp16)[name = string("normed_153_cast_fp16")]; |
| tensor<int32, [2]> var_2936_split_sizes_0 = const()[name = string("op_2936_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_2936_axis_0 = const()[name = string("op_2936_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_2936_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2936_cast_fp16_1 = split(axis = var_2936_axis_0, split_sizes = var_2936_split_sizes_0, x = normed_153_cast_fp16)[name = string("op_2936_cast_fp16")]; |
| tensor<fp16, [1536]> layers_6_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318146304)))]; |
| tensor<fp16, [1, 1, 1536]> attn_output_41_cast_fp16 = mul(x = var_2936_cast_fp16_0, y = layers_6_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_41_cast_fp16")]; |
| tensor<fp16, [1, 1, 1536]> x_105_cast_fp16 = add(x = x_95_cast_fp16, y = attn_output_41_cast_fp16)[name = string("x_105_cast_fp16")]; |
| int32 var_2945 = const()[name = string("op_2945"), val = int32(-1)]; |
| fp16 const_52_promoted_to_fp16 = const()[name = string("const_52_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> var_2947_cast_fp16 = mul(x = x_105_cast_fp16, y = const_52_promoted_to_fp16)[name = string("op_2947_cast_fp16")]; |
| bool input_155_interleave_0 = const()[name = string("input_155_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_155_cast_fp16 = concat(axis = var_2945, interleave = input_155_interleave_0, values = (x_105_cast_fp16, var_2947_cast_fp16))[name = string("input_155_cast_fp16")]; |
| tensor<int32, [1]> normed_157_axes_0 = const()[name = string("normed_157_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_2942_to_fp16 = const()[name = string("op_2942_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_157_cast_fp16 = layer_norm(axes = normed_157_axes_0, epsilon = var_2942_to_fp16, x = input_155_cast_fp16)[name = string("normed_157_cast_fp16")]; |
| tensor<int32, [2]> var_2952_split_sizes_0 = const()[name = string("op_2952_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_2952_axis_0 = const()[name = string("op_2952_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_2952_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_2952_cast_fp16_1 = split(axis = var_2952_axis_0, split_sizes = var_2952_split_sizes_0, x = normed_157_cast_fp16)[name = string("op_2952_cast_fp16")]; |
| tensor<fp16, [1536]> layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318149440)))]; |
| tensor<fp16, [1, 1, 1536]> h_39_cast_fp16 = mul(x = var_2952_cast_fp16_0, y = layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_39_cast_fp16")]; |
| tensor<int32, [3]> var_2963 = const()[name = string("op_2963"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> input_157_axes_0 = const()[name = string("input_157_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_2964 = transpose(perm = var_2963, x = h_39_cast_fp16)[name = string("transpose_36")]; |
| tensor<fp16, [1, 1536, 1, 1]> input_157 = expand_dims(axes = input_157_axes_0, x = var_2964)[name = string("input_157")]; |
| string gate_25_pad_type_0 = const()[name = string("gate_25_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gate_25_strides_0 = const()[name = string("gate_25_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gate_25_pad_0 = const()[name = string("gate_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gate_25_dilations_0 = const()[name = string("gate_25_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gate_25_groups_0 = const()[name = string("gate_25_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 12288, 1, 1]> gate_25 = conv(dilations = gate_25_dilations_0, groups = gate_25_groups_0, pad = gate_25_pad_0, pad_type = gate_25_pad_type_0, strides = gate_25_strides_0, weight = layers_6_mlp_gate_proj_weight_palettized, x = input_157)[name = string("gate_25")]; |
| string up_13_pad_type_0 = const()[name = string("up_13_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> up_13_strides_0 = const()[name = string("up_13_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> up_13_pad_0 = const()[name = string("up_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> up_13_dilations_0 = const()[name = string("up_13_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 up_13_groups_0 = const()[name = string("up_13_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 12288, 1, 1]> up_13 = conv(dilations = up_13_dilations_0, groups = up_13_groups_0, pad = up_13_pad_0, pad_type = up_13_pad_type_0, strides = up_13_strides_0, weight = layers_6_mlp_up_proj_weight_palettized, x = input_157)[name = string("up_13")]; |
| string gate_27_mode_0 = const()[name = string("gate_27_mode_0"), val = string("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 12288, 1, 1]> gate_27 = gelu(mode = gate_27_mode_0, x = gate_25)[name = string("gate_27")]; |
| tensor<fp16, [1, 12288, 1, 1]> input_159 = mul(x = gate_27, y = up_13)[name = string("input_159")]; |
| string mlp_out_13_pad_type_0 = const()[name = string("mlp_out_13_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> mlp_out_13_strides_0 = const()[name = string("mlp_out_13_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> mlp_out_13_pad_0 = const()[name = string("mlp_out_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> mlp_out_13_dilations_0 = const()[name = string("mlp_out_13_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 mlp_out_13_groups_0 = const()[name = string("mlp_out_13_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 1536, 1, 1]> mlp_out_13 = conv(dilations = mlp_out_13_dilations_0, groups = mlp_out_13_groups_0, pad = mlp_out_13_pad_0, pad_type = mlp_out_13_pad_type_0, strides = mlp_out_13_strides_0, weight = layers_6_mlp_down_proj_weight_palettized, x = input_159)[name = string("mlp_out_13")]; |
| tensor<int32, [1]> var_3004_axes_0 = const()[name = string("op_3004_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_3004 = squeeze(axes = var_3004_axes_0, x = mlp_out_13)[name = string("op_3004")]; |
| tensor<int32, [3]> var_3008 = const()[name = string("op_3008"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_3014 = const()[name = string("op_3014"), val = int32(-1)]; |
| fp16 const_53_promoted = const()[name = string("const_53_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_107 = transpose(perm = var_3008, x = var_3004)[name = string("transpose_35")]; |
| tensor<fp16, [1, 1, 1536]> var_3016 = mul(x = x_107, y = const_53_promoted)[name = string("op_3016")]; |
| bool input_161_interleave_0 = const()[name = string("input_161_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_161 = concat(axis = var_3014, interleave = input_161_interleave_0, values = (x_107, var_3016))[name = string("input_161")]; |
| tensor<int32, [1]> normed_161_axes_0 = const()[name = string("normed_161_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_3011_to_fp16 = const()[name = string("op_3011_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_161_cast_fp16 = layer_norm(axes = normed_161_axes_0, epsilon = var_3011_to_fp16, x = input_161)[name = string("normed_161_cast_fp16")]; |
| tensor<int32, [2]> var_3021_split_sizes_0 = const()[name = string("op_3021_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_3021_axis_0 = const()[name = string("op_3021_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_3021_0, tensor<fp16, [1, 1, 1536]> var_3021_1 = split(axis = var_3021_axis_0, split_sizes = var_3021_split_sizes_0, x = normed_161_cast_fp16)[name = string("op_3021")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_63 = mul(x = var_3021_0, y = layers_6_post_feedforward_layernorm_weight)[name = string("hidden_states_63")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_65_cast_fp16 = add(x = x_105_cast_fp16, y = hidden_states_63)[name = string("hidden_states_65_cast_fp16")]; |
| tensor<int32, [3]> per_layer_slice_13_begin_0 = const()[name = string("per_layer_slice_13_begin_0"), val = tensor<int32, [3]>([0, 0, 7936])]; |
| tensor<int32, [3]> per_layer_slice_13_end_0 = const()[name = string("per_layer_slice_13_end_0"), val = tensor<int32, [3]>([1, 1, 8192])]; |
| tensor<bool, [3]> per_layer_slice_13_end_mask_0 = const()[name = string("per_layer_slice_13_end_mask_0"), val = tensor<bool, [3]>([true, true, false])]; |
| tensor<fp16, [1, 1, 256]> per_layer_slice_13_cast_fp16 = slice_by_index(begin = per_layer_slice_13_begin_0, end = per_layer_slice_13_end_0, end_mask = per_layer_slice_13_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_13_cast_fp16")]; |
| tensor<int32, [3]> var_3049 = const()[name = string("op_3049"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> input_163_axes_0 = const()[name = string("input_163_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_3050 = transpose(perm = var_3049, x = hidden_states_65_cast_fp16)[name = string("transpose_34")]; |
| tensor<fp16, [1, 1536, 1, 1]> input_163 = expand_dims(axes = input_163_axes_0, x = var_3050)[name = string("input_163")]; |
| string gated_37_pad_type_0 = const()[name = string("gated_37_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gated_37_strides_0 = const()[name = string("gated_37_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gated_37_pad_0 = const()[name = string("gated_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gated_37_dilations_0 = const()[name = string("gated_37_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gated_37_groups_0 = const()[name = string("gated_37_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 256, 1, 1]> gated_37 = conv(dilations = gated_37_dilations_0, groups = gated_37_groups_0, pad = gated_37_pad_0, pad_type = gated_37_pad_type_0, strides = gated_37_strides_0, weight = layers_6_per_layer_input_gate_weight_palettized, x = input_163)[name = string("gated_37")]; |
| string gated_39_mode_0 = const()[name = string("gated_39_mode_0"), val = string("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 256, 1, 1]> gated_39 = gelu(mode = gated_39_mode_0, x = gated_37)[name = string("gated_39")]; |
| tensor<int32, [3]> var_3069 = const()[name = string("op_3069"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> per_layer_slice_conv_13_axes_0 = const()[name = string("per_layer_slice_conv_13_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 256, 1]> var_3070_cast_fp16 = transpose(perm = var_3069, x = per_layer_slice_13_cast_fp16)[name = string("transpose_33")]; |
| tensor<fp16, [1, 256, 1, 1]> per_layer_slice_conv_13_cast_fp16 = expand_dims(axes = per_layer_slice_conv_13_axes_0, x = var_3070_cast_fp16)[name = string("per_layer_slice_conv_13_cast_fp16")]; |
| tensor<fp16, [1, 256, 1, 1]> input_165_cast_fp16 = mul(x = gated_39, y = per_layer_slice_conv_13_cast_fp16)[name = string("input_165_cast_fp16")]; |
| string gated_41_pad_type_0 = const()[name = string("gated_41_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gated_41_strides_0 = const()[name = string("gated_41_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gated_41_pad_0 = const()[name = string("gated_41_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gated_41_dilations_0 = const()[name = string("gated_41_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gated_41_groups_0 = const()[name = string("gated_41_groups_0"), val = int32(1)]; |
| tensor<fp16, [1536, 256, 1, 1]> layers_6_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318152576))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318349248))))[name = string("layers_6_per_layer_projection_weight_promoted_to_fp16_palettized")]; |
| tensor<fp16, [1, 1536, 1, 1]> gated_41_cast_fp16 = conv(dilations = gated_41_dilations_0, groups = gated_41_groups_0, pad = gated_41_pad_0, pad_type = gated_41_pad_type_0, strides = gated_41_strides_0, weight = layers_6_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_165_cast_fp16)[name = string("gated_41_cast_fp16")]; |
| tensor<int32, [1]> var_3086_axes_0 = const()[name = string("op_3086_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_3086_cast_fp16 = squeeze(axes = var_3086_axes_0, x = gated_41_cast_fp16)[name = string("op_3086_cast_fp16")]; |
| tensor<int32, [3]> var_3090 = const()[name = string("op_3090"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_3096 = const()[name = string("op_3096"), val = int32(-1)]; |
| fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_109_cast_fp16 = transpose(perm = var_3090, x = var_3086_cast_fp16)[name = string("transpose_32")]; |
| tensor<fp16, [1, 1, 1536]> var_3098_cast_fp16 = mul(x = x_109_cast_fp16, y = const_54_promoted_to_fp16)[name = string("op_3098_cast_fp16")]; |
| bool input_167_interleave_0 = const()[name = string("input_167_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_167_cast_fp16 = concat(axis = var_3096, interleave = input_167_interleave_0, values = (x_109_cast_fp16, var_3098_cast_fp16))[name = string("input_167_cast_fp16")]; |
| tensor<int32, [1]> normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_3093_to_fp16 = const()[name = string("op_3093_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_3093_to_fp16, x = input_167_cast_fp16)[name = string("normed_165_cast_fp16")]; |
| tensor<int32, [2]> var_3103_split_sizes_0 = const()[name = string("op_3103_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_3103_axis_0 = const()[name = string("op_3103_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_3103_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3103_cast_fp16_1 = split(axis = var_3103_axis_0, split_sizes = var_3103_split_sizes_0, x = normed_165_cast_fp16)[name = string("op_3103_cast_fp16")]; |
| tensor<fp16, [1536]> layers_6_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_6_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318350848)))]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_69_cast_fp16 = mul(x = var_3103_cast_fp16_0, y = layers_6_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_69_cast_fp16")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_71_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = hidden_states_69_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; |
| tensor<fp16, [1]> const_55_promoted_to_fp16 = const()[name = string("const_55_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.a8p-1])]; |
| tensor<fp16, [1, 1, 1536]> x_111_cast_fp16 = mul(x = hidden_states_71_cast_fp16, y = const_55_promoted_to_fp16)[name = string("x_111_cast_fp16")]; |
| int32 var_3118 = const()[name = string("op_3118"), val = int32(-1)]; |
| fp16 const_56_promoted_to_fp16 = const()[name = string("const_56_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> var_3120_cast_fp16 = mul(x = x_111_cast_fp16, y = const_56_promoted_to_fp16)[name = string("op_3120_cast_fp16")]; |
| bool input_169_interleave_0 = const()[name = string("input_169_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_169_cast_fp16 = concat(axis = var_3118, interleave = input_169_interleave_0, values = (x_111_cast_fp16, var_3120_cast_fp16))[name = string("input_169_cast_fp16")]; |
| tensor<int32, [1]> normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_3115_to_fp16 = const()[name = string("op_3115_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_3115_to_fp16, x = input_169_cast_fp16)[name = string("normed_169_cast_fp16")]; |
| tensor<int32, [2]> var_3125_split_sizes_0 = const()[name = string("op_3125_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_3125_axis_0 = const()[name = string("op_3125_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_3125_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3125_cast_fp16_1 = split(axis = var_3125_axis_0, split_sizes = var_3125_split_sizes_0, x = normed_169_cast_fp16)[name = string("op_3125_cast_fp16")]; |
| tensor<fp16, [1536]> layers_7_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318353984)))]; |
| tensor<fp16, [1, 1, 1536]> h_43_cast_fp16 = mul(x = var_3125_cast_fp16_0, y = layers_7_input_layernorm_weight_promoted_to_fp16)[name = string("h_43_cast_fp16")]; |
| tensor<int32, [3]> var_3131 = const()[name = string("op_3131"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> var_3134_axes_0 = const()[name = string("op_3134_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_3132_cast_fp16 = transpose(perm = var_3131, x = h_43_cast_fp16)[name = string("transpose_31")]; |
| tensor<fp16, [1, 1536, 1, 1]> var_3134_cast_fp16 = expand_dims(axes = var_3134_axes_0, x = var_3132_cast_fp16)[name = string("op_3134_cast_fp16")]; |
| string var_3150_pad_type_0 = const()[name = string("op_3150_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> var_3150_strides_0 = const()[name = string("op_3150_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> var_3150_pad_0 = const()[name = string("op_3150_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> var_3150_dilations_0 = const()[name = string("op_3150_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 var_3150_groups_0 = const()[name = string("op_3150_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 2048, 1, 1]> var_3150 = conv(dilations = var_3150_dilations_0, groups = var_3150_groups_0, pad = var_3150_pad_0, pad_type = var_3150_pad_type_0, strides = var_3150_strides_0, weight = layers_7_self_attn_q_proj_weight_palettized, x = var_3134_cast_fp16)[name = string("op_3150")]; |
| tensor<int32, [4]> var_3155 = const()[name = string("op_3155"), val = tensor<int32, [4]>([1, 8, 256, 1])]; |
| tensor<fp16, [1, 8, 256, 1]> var_3156 = reshape(shape = var_3155, x = var_3150)[name = string("op_3156")]; |
| tensor<int32, [4]> var_3161 = const()[name = string("op_3161"), val = tensor<int32, [4]>([0, 1, 3, 2])]; |
| tensor<int32, [3]> var_3171 = const()[name = string("op_3171"), val = tensor<int32, [3]>([1, 8, 256])]; |
| tensor<fp16, [1, 8, 1, 256]> var_3162 = transpose(perm = var_3161, x = var_3156)[name = string("transpose_30")]; |
| tensor<fp16, [1, 8, 256]> x_113 = reshape(shape = var_3171, x = var_3162)[name = string("x_113")]; |
| int32 var_3177 = const()[name = string("op_3177"), val = int32(-1)]; |
| fp16 const_57_promoted = const()[name = string("const_57_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 8, 256]> var_3179 = mul(x = x_113, y = const_57_promoted)[name = string("op_3179")]; |
| bool input_173_interleave_0 = const()[name = string("input_173_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 512]> input_173 = concat(axis = var_3177, interleave = input_173_interleave_0, values = (x_113, var_3179))[name = string("input_173")]; |
| tensor<int32, [1]> normed_173_axes_0 = const()[name = string("normed_173_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_3174_to_fp16 = const()[name = string("op_3174_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 8, 512]> normed_173_cast_fp16 = layer_norm(axes = normed_173_axes_0, epsilon = var_3174_to_fp16, x = input_173)[name = string("normed_173_cast_fp16")]; |
| tensor<int32, [2]> var_3184_split_sizes_0 = const()[name = string("op_3184_split_sizes_0"), val = tensor<int32, [2]>([256, 256])]; |
| int32 var_3184_axis_0 = const()[name = string("op_3184_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 8, 256]> var_3184_0, tensor<fp16, [1, 8, 256]> var_3184_1 = split(axis = var_3184_axis_0, split_sizes = var_3184_split_sizes_0, x = normed_173_cast_fp16)[name = string("op_3184")]; |
| tensor<fp16, [1, 8, 256]> var_3186 = mul(x = var_3184_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_3186")]; |
| tensor<int32, [4]> var_3191 = const()[name = string("op_3191"), val = tensor<int32, [4]>([1, 8, 1, 256])]; |
| tensor<fp16, [1, 8, 1, 256]> q_45 = reshape(shape = var_3191, x = var_3186)[name = string("q_45")]; |
| tensor<fp16, [1, 8, 1, 256]> var_3193_cast_fp16 = mul(x = q_45, y = cos_s)[name = string("op_3193_cast_fp16")]; |
| tensor<int32, [2]> var_3194_split_sizes_0 = const()[name = string("op_3194_split_sizes_0"), val = tensor<int32, [2]>([128, 128])]; |
| int32 var_3194_axis_0 = const()[name = string("op_3194_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 8, 1, 128]> var_3194_0, tensor<fp16, [1, 8, 1, 128]> var_3194_1 = split(axis = var_3194_axis_0, split_sizes = var_3194_split_sizes_0, x = q_45)[name = string("op_3194")]; |
| fp16 const_58_promoted = const()[name = string("const_58_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 8, 1, 128]> var_3196 = mul(x = var_3194_1, y = const_58_promoted)[name = string("op_3196")]; |
| int32 var_3198 = const()[name = string("op_3198"), val = int32(-1)]; |
| bool var_3199_interleave_0 = const()[name = string("op_3199_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1, 256]> var_3199 = concat(axis = var_3198, interleave = var_3199_interleave_0, values = (var_3196, var_3194_0))[name = string("op_3199")]; |
| tensor<fp16, [1, 8, 1, 256]> var_3200_cast_fp16 = mul(x = var_3199, y = sin_s)[name = string("op_3200_cast_fp16")]; |
| tensor<fp16, [1, 8, 1, 256]> q_47_cast_fp16 = add(x = var_3193_cast_fp16, y = var_3200_cast_fp16)[name = string("q_47_cast_fp16")]; |
| bool attn_weights_29_transpose_x_0 = const()[name = string("attn_weights_29_transpose_x_0"), val = bool(false)]; |
| bool attn_weights_29_transpose_y_0 = const()[name = string("attn_weights_29_transpose_y_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1, 512]> attn_weights_29_cast_fp16 = matmul(transpose_x = attn_weights_29_transpose_x_0, transpose_y = attn_weights_29_transpose_y_0, x = q_47_cast_fp16, y = transpose_40_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; |
| tensor<fp16, [1, 8, 1, 512]> x_115_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = causal_mask_sliding)[name = string("x_115_cast_fp16")]; |
| tensor<int32, [1]> reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor<int32, [1]>([-1])]; |
| bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; |
| tensor<fp16, [1, 8, 1, 1]> reduce_max_7 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_115_cast_fp16)[name = string("reduce_max_7")]; |
| tensor<fp16, [1, 8, 1, 512]> var_3232 = sub(x = x_115_cast_fp16, y = reduce_max_7)[name = string("op_3232")]; |
| tensor<fp16, [1, 8, 1, 512]> var_3238 = exp(x = var_3232)[name = string("op_3238")]; |
| tensor<int32, [1]> var_3248_axes_0 = const()[name = string("op_3248_axes_0"), val = tensor<int32, [1]>([-1])]; |
| bool var_3248_keep_dims_0 = const()[name = string("op_3248_keep_dims_0"), val = bool(true)]; |
| tensor<fp16, [1, 8, 1, 1]> var_3248 = reduce_sum(axes = var_3248_axes_0, keep_dims = var_3248_keep_dims_0, x = var_3238)[name = string("op_3248")]; |
| tensor<fp16, [1, 8, 1, 512]> var_3254_cast_fp16 = real_div(x = var_3238, y = var_3248)[name = string("op_3254_cast_fp16")]; |
| bool attn_output_43_transpose_x_0 = const()[name = string("attn_output_43_transpose_x_0"), val = bool(false)]; |
| bool attn_output_43_transpose_y_0 = const()[name = string("attn_output_43_transpose_y_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1, 256]> attn_output_43_cast_fp16 = matmul(transpose_x = attn_output_43_transpose_x_0, transpose_y = attn_output_43_transpose_y_0, x = var_3254_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_43_cast_fp16")]; |
| tensor<int32, [4]> var_3265 = const()[name = string("op_3265"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_3272 = const()[name = string("op_3272"), val = tensor<int32, [3]>([1, 1, -1])]; |
| tensor<fp16, [1, 1, 8, 256]> var_3266_cast_fp16 = transpose(perm = var_3265, x = attn_output_43_cast_fp16)[name = string("transpose_29")]; |
| tensor<fp16, [1, 1, 2048]> attn_output_45_cast_fp16 = reshape(shape = var_3272, x = var_3266_cast_fp16)[name = string("attn_output_45_cast_fp16")]; |
| tensor<int32, [3]> var_3277 = const()[name = string("op_3277"), val = tensor<int32, [3]>([0, 2, 1])]; |
| string var_3293_pad_type_0 = const()[name = string("op_3293_pad_type_0"), val = string("valid")]; |
| int32 var_3293_groups_0 = const()[name = string("op_3293_groups_0"), val = int32(1)]; |
| tensor<int32, [1]> var_3293_strides_0 = const()[name = string("op_3293_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> var_3293_pad_0 = const()[name = string("op_3293_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> var_3293_dilations_0 = const()[name = string("op_3293_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<fp16, [1536, 2048, 1]> squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318357120))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319930048))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; |
| tensor<fp16, [1, 2048, 1]> var_3278_cast_fp16 = transpose(perm = var_3277, x = attn_output_45_cast_fp16)[name = string("transpose_28")]; |
| tensor<fp16, [1, 1536, 1]> var_3293_cast_fp16 = conv(dilations = var_3293_dilations_0, groups = var_3293_groups_0, pad = var_3293_pad_0, pad_type = var_3293_pad_type_0, strides = var_3293_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_3278_cast_fp16)[name = string("op_3293_cast_fp16")]; |
| tensor<int32, [3]> var_3297 = const()[name = string("op_3297"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_3303 = const()[name = string("op_3303"), val = int32(-1)]; |
| fp16 const_59_promoted_to_fp16 = const()[name = string("const_59_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_119_cast_fp16 = transpose(perm = var_3297, x = var_3293_cast_fp16)[name = string("transpose_27")]; |
| tensor<fp16, [1, 1, 1536]> var_3305_cast_fp16 = mul(x = x_119_cast_fp16, y = const_59_promoted_to_fp16)[name = string("op_3305_cast_fp16")]; |
| bool input_177_interleave_0 = const()[name = string("input_177_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_177_cast_fp16 = concat(axis = var_3303, interleave = input_177_interleave_0, values = (x_119_cast_fp16, var_3305_cast_fp16))[name = string("input_177_cast_fp16")]; |
| tensor<int32, [1]> normed_177_axes_0 = const()[name = string("normed_177_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_3300_to_fp16 = const()[name = string("op_3300_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_177_cast_fp16 = layer_norm(axes = normed_177_axes_0, epsilon = var_3300_to_fp16, x = input_177_cast_fp16)[name = string("normed_177_cast_fp16")]; |
| tensor<int32, [2]> var_3310_split_sizes_0 = const()[name = string("op_3310_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_3310_axis_0 = const()[name = string("op_3310_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_3310_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3310_cast_fp16_1 = split(axis = var_3310_axis_0, split_sizes = var_3310_split_sizes_0, x = normed_177_cast_fp16)[name = string("op_3310_cast_fp16")]; |
| tensor<fp16, [1536]> layers_7_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319931648)))]; |
| tensor<fp16, [1, 1, 1536]> attn_output_47_cast_fp16 = mul(x = var_3310_cast_fp16_0, y = layers_7_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_47_cast_fp16")]; |
| tensor<fp16, [1, 1, 1536]> x_121_cast_fp16 = add(x = x_111_cast_fp16, y = attn_output_47_cast_fp16)[name = string("x_121_cast_fp16")]; |
| int32 var_3319 = const()[name = string("op_3319"), val = int32(-1)]; |
| fp16 const_60_promoted_to_fp16 = const()[name = string("const_60_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> var_3321_cast_fp16 = mul(x = x_121_cast_fp16, y = const_60_promoted_to_fp16)[name = string("op_3321_cast_fp16")]; |
| bool input_179_interleave_0 = const()[name = string("input_179_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_179_cast_fp16 = concat(axis = var_3319, interleave = input_179_interleave_0, values = (x_121_cast_fp16, var_3321_cast_fp16))[name = string("input_179_cast_fp16")]; |
| tensor<int32, [1]> normed_181_axes_0 = const()[name = string("normed_181_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_3316_to_fp16 = const()[name = string("op_3316_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_181_cast_fp16 = layer_norm(axes = normed_181_axes_0, epsilon = var_3316_to_fp16, x = input_179_cast_fp16)[name = string("normed_181_cast_fp16")]; |
| tensor<int32, [2]> var_3326_split_sizes_0 = const()[name = string("op_3326_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_3326_axis_0 = const()[name = string("op_3326_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_3326_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3326_cast_fp16_1 = split(axis = var_3326_axis_0, split_sizes = var_3326_split_sizes_0, x = normed_181_cast_fp16)[name = string("op_3326_cast_fp16")]; |
| tensor<fp16, [1536]> layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319934784)))]; |
| tensor<fp16, [1, 1, 1536]> h_45_cast_fp16 = mul(x = var_3326_cast_fp16_0, y = layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_45_cast_fp16")]; |
| tensor<int32, [3]> var_3337 = const()[name = string("op_3337"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> input_181_axes_0 = const()[name = string("input_181_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_3338 = transpose(perm = var_3337, x = h_45_cast_fp16)[name = string("transpose_26")]; |
| tensor<fp16, [1, 1536, 1, 1]> input_181 = expand_dims(axes = input_181_axes_0, x = var_3338)[name = string("input_181")]; |
| string gate_29_pad_type_0 = const()[name = string("gate_29_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gate_29_strides_0 = const()[name = string("gate_29_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gate_29_pad_0 = const()[name = string("gate_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gate_29_dilations_0 = const()[name = string("gate_29_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gate_29_groups_0 = const()[name = string("gate_29_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 12288, 1, 1]> gate_29 = conv(dilations = gate_29_dilations_0, groups = gate_29_groups_0, pad = gate_29_pad_0, pad_type = gate_29_pad_type_0, strides = gate_29_strides_0, weight = layers_7_mlp_gate_proj_weight_palettized, x = input_181)[name = string("gate_29")]; |
| string up_15_pad_type_0 = const()[name = string("up_15_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> up_15_strides_0 = const()[name = string("up_15_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> up_15_pad_0 = const()[name = string("up_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> up_15_dilations_0 = const()[name = string("up_15_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 up_15_groups_0 = const()[name = string("up_15_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 12288, 1, 1]> up_15 = conv(dilations = up_15_dilations_0, groups = up_15_groups_0, pad = up_15_pad_0, pad_type = up_15_pad_type_0, strides = up_15_strides_0, weight = layers_7_mlp_up_proj_weight_palettized, x = input_181)[name = string("up_15")]; |
| string gate_31_mode_0 = const()[name = string("gate_31_mode_0"), val = string("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 12288, 1, 1]> gate_31 = gelu(mode = gate_31_mode_0, x = gate_29)[name = string("gate_31")]; |
| tensor<fp16, [1, 12288, 1, 1]> input_183 = mul(x = gate_31, y = up_15)[name = string("input_183")]; |
| string mlp_out_15_pad_type_0 = const()[name = string("mlp_out_15_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> mlp_out_15_strides_0 = const()[name = string("mlp_out_15_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> mlp_out_15_pad_0 = const()[name = string("mlp_out_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> mlp_out_15_dilations_0 = const()[name = string("mlp_out_15_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 mlp_out_15_groups_0 = const()[name = string("mlp_out_15_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 1536, 1, 1]> mlp_out_15 = conv(dilations = mlp_out_15_dilations_0, groups = mlp_out_15_groups_0, pad = mlp_out_15_pad_0, pad_type = mlp_out_15_pad_type_0, strides = mlp_out_15_strides_0, weight = layers_7_mlp_down_proj_weight_palettized, x = input_183)[name = string("mlp_out_15")]; |
| tensor<int32, [1]> var_3378_axes_0 = const()[name = string("op_3378_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_3378 = squeeze(axes = var_3378_axes_0, x = mlp_out_15)[name = string("op_3378")]; |
| tensor<int32, [3]> var_3382 = const()[name = string("op_3382"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_3388 = const()[name = string("op_3388"), val = int32(-1)]; |
| fp16 const_61_promoted = const()[name = string("const_61_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_123 = transpose(perm = var_3382, x = var_3378)[name = string("transpose_25")]; |
| tensor<fp16, [1, 1, 1536]> var_3390 = mul(x = x_123, y = const_61_promoted)[name = string("op_3390")]; |
| bool input_185_interleave_0 = const()[name = string("input_185_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_185 = concat(axis = var_3388, interleave = input_185_interleave_0, values = (x_123, var_3390))[name = string("input_185")]; |
| tensor<int32, [1]> normed_185_axes_0 = const()[name = string("normed_185_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_3385_to_fp16 = const()[name = string("op_3385_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_185_cast_fp16 = layer_norm(axes = normed_185_axes_0, epsilon = var_3385_to_fp16, x = input_185)[name = string("normed_185_cast_fp16")]; |
| tensor<int32, [2]> var_3395_split_sizes_0 = const()[name = string("op_3395_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_3395_axis_0 = const()[name = string("op_3395_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_3395_0, tensor<fp16, [1, 1, 1536]> var_3395_1 = split(axis = var_3395_axis_0, split_sizes = var_3395_split_sizes_0, x = normed_185_cast_fp16)[name = string("op_3395")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_73 = mul(x = var_3395_0, y = layers_7_post_feedforward_layernorm_weight)[name = string("hidden_states_73")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_75_cast_fp16 = add(x = x_121_cast_fp16, y = hidden_states_73)[name = string("hidden_states_75_cast_fp16")]; |
| tensor<int32, [3]> per_layer_slice_15_begin_0 = const()[name = string("per_layer_slice_15_begin_0"), val = tensor<int32, [3]>([0, 0, 8192])]; |
| tensor<int32, [3]> per_layer_slice_15_end_0 = const()[name = string("per_layer_slice_15_end_0"), val = tensor<int32, [3]>([1, 1, 8448])]; |
| tensor<bool, [3]> per_layer_slice_15_end_mask_0 = const()[name = string("per_layer_slice_15_end_mask_0"), val = tensor<bool, [3]>([true, true, false])]; |
| tensor<fp16, [1, 1, 256]> per_layer_slice_15_cast_fp16 = slice_by_index(begin = per_layer_slice_15_begin_0, end = per_layer_slice_15_end_0, end_mask = per_layer_slice_15_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_15_cast_fp16")]; |
| tensor<int32, [3]> var_3423 = const()[name = string("op_3423"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> input_187_axes_0 = const()[name = string("input_187_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_3424 = transpose(perm = var_3423, x = hidden_states_75_cast_fp16)[name = string("transpose_24")]; |
| tensor<fp16, [1, 1536, 1, 1]> input_187 = expand_dims(axes = input_187_axes_0, x = var_3424)[name = string("input_187")]; |
| string gated_43_pad_type_0 = const()[name = string("gated_43_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gated_43_strides_0 = const()[name = string("gated_43_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gated_43_pad_0 = const()[name = string("gated_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gated_43_dilations_0 = const()[name = string("gated_43_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gated_43_groups_0 = const()[name = string("gated_43_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 256, 1, 1]> gated_43 = conv(dilations = gated_43_dilations_0, groups = gated_43_groups_0, pad = gated_43_pad_0, pad_type = gated_43_pad_type_0, strides = gated_43_strides_0, weight = layers_7_per_layer_input_gate_weight_palettized, x = input_187)[name = string("gated_43")]; |
| string gated_45_mode_0 = const()[name = string("gated_45_mode_0"), val = string("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 256, 1, 1]> gated_45 = gelu(mode = gated_45_mode_0, x = gated_43)[name = string("gated_45")]; |
| tensor<int32, [3]> var_3443 = const()[name = string("op_3443"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> per_layer_slice_conv_15_axes_0 = const()[name = string("per_layer_slice_conv_15_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 256, 1]> var_3444_cast_fp16 = transpose(perm = var_3443, x = per_layer_slice_15_cast_fp16)[name = string("transpose_23")]; |
| tensor<fp16, [1, 256, 1, 1]> per_layer_slice_conv_15_cast_fp16 = expand_dims(axes = per_layer_slice_conv_15_axes_0, x = var_3444_cast_fp16)[name = string("per_layer_slice_conv_15_cast_fp16")]; |
| tensor<fp16, [1, 256, 1, 1]> input_189_cast_fp16 = mul(x = gated_45, y = per_layer_slice_conv_15_cast_fp16)[name = string("input_189_cast_fp16")]; |
| string gated_47_pad_type_0 = const()[name = string("gated_47_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gated_47_strides_0 = const()[name = string("gated_47_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gated_47_pad_0 = const()[name = string("gated_47_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gated_47_dilations_0 = const()[name = string("gated_47_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gated_47_groups_0 = const()[name = string("gated_47_groups_0"), val = int32(1)]; |
| tensor<fp16, [1536, 256, 1, 1]> layers_7_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319937920))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320134592))))[name = string("layers_7_per_layer_projection_weight_promoted_to_fp16_palettized")]; |
| tensor<fp16, [1, 1536, 1, 1]> gated_47_cast_fp16 = conv(dilations = gated_47_dilations_0, groups = gated_47_groups_0, pad = gated_47_pad_0, pad_type = gated_47_pad_type_0, strides = gated_47_strides_0, weight = layers_7_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_189_cast_fp16)[name = string("gated_47_cast_fp16")]; |
| tensor<int32, [1]> var_3460_axes_0 = const()[name = string("op_3460_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_3460_cast_fp16 = squeeze(axes = var_3460_axes_0, x = gated_47_cast_fp16)[name = string("op_3460_cast_fp16")]; |
| tensor<int32, [3]> var_3464 = const()[name = string("op_3464"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_3470 = const()[name = string("op_3470"), val = int32(-1)]; |
| fp16 const_62_promoted_to_fp16 = const()[name = string("const_62_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_125_cast_fp16 = transpose(perm = var_3464, x = var_3460_cast_fp16)[name = string("transpose_22")]; |
| tensor<fp16, [1, 1, 1536]> var_3472_cast_fp16 = mul(x = x_125_cast_fp16, y = const_62_promoted_to_fp16)[name = string("op_3472_cast_fp16")]; |
| bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_191_cast_fp16 = concat(axis = var_3470, interleave = input_191_interleave_0, values = (x_125_cast_fp16, var_3472_cast_fp16))[name = string("input_191_cast_fp16")]; |
| tensor<int32, [1]> normed_189_axes_0 = const()[name = string("normed_189_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_3467_to_fp16 = const()[name = string("op_3467_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_189_cast_fp16 = layer_norm(axes = normed_189_axes_0, epsilon = var_3467_to_fp16, x = input_191_cast_fp16)[name = string("normed_189_cast_fp16")]; |
| tensor<int32, [2]> var_3477_split_sizes_0 = const()[name = string("op_3477_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_3477_axis_0 = const()[name = string("op_3477_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_3477_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3477_cast_fp16_1 = split(axis = var_3477_axis_0, split_sizes = var_3477_split_sizes_0, x = normed_189_cast_fp16)[name = string("op_3477_cast_fp16")]; |
| tensor<fp16, [1536]> layers_7_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_7_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320136192)))]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_79_cast_fp16 = mul(x = var_3477_cast_fp16_0, y = layers_7_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_79_cast_fp16")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_81_cast_fp16 = add(x = hidden_states_75_cast_fp16, y = hidden_states_79_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; |
| tensor<fp16, [1]> const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.bep-1])]; |
| tensor<fp16, [1, 1, 1536]> x_127_cast_fp16 = mul(x = hidden_states_81_cast_fp16, y = const_63_promoted_to_fp16)[name = string("x_127_cast_fp16")]; |
| int32 var_3492 = const()[name = string("op_3492"), val = int32(-1)]; |
| fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> var_3494_cast_fp16 = mul(x = x_127_cast_fp16, y = const_64_promoted_to_fp16)[name = string("op_3494_cast_fp16")]; |
| bool input_193_interleave_0 = const()[name = string("input_193_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_193_cast_fp16 = concat(axis = var_3492, interleave = input_193_interleave_0, values = (x_127_cast_fp16, var_3494_cast_fp16))[name = string("input_193_cast_fp16")]; |
| tensor<int32, [1]> normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_3489_to_fp16 = const()[name = string("op_3489_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_3489_to_fp16, x = input_193_cast_fp16)[name = string("normed_193_cast_fp16")]; |
| tensor<int32, [2]> var_3499_split_sizes_0 = const()[name = string("op_3499_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_3499_axis_0 = const()[name = string("op_3499_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_3499_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3499_cast_fp16_1 = split(axis = var_3499_axis_0, split_sizes = var_3499_split_sizes_0, x = normed_193_cast_fp16)[name = string("op_3499_cast_fp16")]; |
| tensor<fp16, [1536]> layers_8_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320139328)))]; |
| tensor<fp16, [1, 1, 1536]> h_49_cast_fp16 = mul(x = var_3499_cast_fp16_0, y = layers_8_input_layernorm_weight_promoted_to_fp16)[name = string("h_49_cast_fp16")]; |
| tensor<int32, [3]> var_3505 = const()[name = string("op_3505"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> var_3508_axes_0 = const()[name = string("op_3508_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_3506_cast_fp16 = transpose(perm = var_3505, x = h_49_cast_fp16)[name = string("transpose_21")]; |
| tensor<fp16, [1, 1536, 1, 1]> var_3508_cast_fp16 = expand_dims(axes = var_3508_axes_0, x = var_3506_cast_fp16)[name = string("op_3508_cast_fp16")]; |
| string var_3524_pad_type_0 = const()[name = string("op_3524_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> var_3524_strides_0 = const()[name = string("op_3524_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> var_3524_pad_0 = const()[name = string("op_3524_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> var_3524_dilations_0 = const()[name = string("op_3524_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 var_3524_groups_0 = const()[name = string("op_3524_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 2048, 1, 1]> var_3524 = conv(dilations = var_3524_dilations_0, groups = var_3524_groups_0, pad = var_3524_pad_0, pad_type = var_3524_pad_type_0, strides = var_3524_strides_0, weight = layers_8_self_attn_q_proj_weight_palettized, x = var_3508_cast_fp16)[name = string("op_3524")]; |
| tensor<int32, [4]> var_3529 = const()[name = string("op_3529"), val = tensor<int32, [4]>([1, 8, 256, 1])]; |
| tensor<fp16, [1, 8, 256, 1]> var_3530 = reshape(shape = var_3529, x = var_3524)[name = string("op_3530")]; |
| tensor<int32, [4]> var_3535 = const()[name = string("op_3535"), val = tensor<int32, [4]>([0, 1, 3, 2])]; |
| tensor<int32, [3]> var_3545 = const()[name = string("op_3545"), val = tensor<int32, [3]>([1, 8, 256])]; |
| tensor<fp16, [1, 8, 1, 256]> var_3536 = transpose(perm = var_3535, x = var_3530)[name = string("transpose_20")]; |
| tensor<fp16, [1, 8, 256]> x_129 = reshape(shape = var_3545, x = var_3536)[name = string("x_129")]; |
| int32 var_3551 = const()[name = string("op_3551"), val = int32(-1)]; |
| fp16 const_65_promoted = const()[name = string("const_65_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 8, 256]> var_3553 = mul(x = x_129, y = const_65_promoted)[name = string("op_3553")]; |
| bool input_197_interleave_0 = const()[name = string("input_197_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 512]> input_197 = concat(axis = var_3551, interleave = input_197_interleave_0, values = (x_129, var_3553))[name = string("input_197")]; |
| tensor<int32, [1]> normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_3548_to_fp16 = const()[name = string("op_3548_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 8, 512]> normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_3548_to_fp16, x = input_197)[name = string("normed_197_cast_fp16")]; |
| tensor<int32, [2]> var_3558_split_sizes_0 = const()[name = string("op_3558_split_sizes_0"), val = tensor<int32, [2]>([256, 256])]; |
| int32 var_3558_axis_0 = const()[name = string("op_3558_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 8, 256]> var_3558_0, tensor<fp16, [1, 8, 256]> var_3558_1 = split(axis = var_3558_axis_0, split_sizes = var_3558_split_sizes_0, x = normed_197_cast_fp16)[name = string("op_3558")]; |
| tensor<fp16, [1, 8, 256]> var_3560 = mul(x = var_3558_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_3560")]; |
| tensor<int32, [4]> var_3565 = const()[name = string("op_3565"), val = tensor<int32, [4]>([1, 8, 1, 256])]; |
| tensor<fp16, [1, 8, 1, 256]> q_51 = reshape(shape = var_3565, x = var_3560)[name = string("q_51")]; |
| tensor<fp16, [1, 8, 1, 256]> var_3567_cast_fp16 = mul(x = q_51, y = cos_s)[name = string("op_3567_cast_fp16")]; |
| tensor<int32, [2]> var_3568_split_sizes_0 = const()[name = string("op_3568_split_sizes_0"), val = tensor<int32, [2]>([128, 128])]; |
| int32 var_3568_axis_0 = const()[name = string("op_3568_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 8, 1, 128]> var_3568_0, tensor<fp16, [1, 8, 1, 128]> var_3568_1 = split(axis = var_3568_axis_0, split_sizes = var_3568_split_sizes_0, x = q_51)[name = string("op_3568")]; |
| fp16 const_66_promoted = const()[name = string("const_66_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 8, 1, 128]> var_3570 = mul(x = var_3568_1, y = const_66_promoted)[name = string("op_3570")]; |
| int32 var_3572 = const()[name = string("op_3572"), val = int32(-1)]; |
| bool var_3573_interleave_0 = const()[name = string("op_3573_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1, 256]> var_3573 = concat(axis = var_3572, interleave = var_3573_interleave_0, values = (var_3570, var_3568_0))[name = string("op_3573")]; |
| tensor<fp16, [1, 8, 1, 256]> var_3574_cast_fp16 = mul(x = var_3573, y = sin_s)[name = string("op_3574_cast_fp16")]; |
| tensor<fp16, [1, 8, 1, 256]> q_53_cast_fp16 = add(x = var_3567_cast_fp16, y = var_3574_cast_fp16)[name = string("q_53_cast_fp16")]; |
| bool attn_weights_33_transpose_x_0 = const()[name = string("attn_weights_33_transpose_x_0"), val = bool(false)]; |
| bool attn_weights_33_transpose_y_0 = const()[name = string("attn_weights_33_transpose_y_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1, 512]> attn_weights_33_cast_fp16 = matmul(transpose_x = attn_weights_33_transpose_x_0, transpose_y = attn_weights_33_transpose_y_0, x = q_53_cast_fp16, y = transpose_40_cast_fp16)[name = string("attn_weights_33_cast_fp16")]; |
| tensor<fp16, [1, 8, 1, 512]> x_131_cast_fp16 = add(x = attn_weights_33_cast_fp16, y = causal_mask_sliding)[name = string("x_131_cast_fp16")]; |
| tensor<int32, [1]> reduce_max_8_axes_0 = const()[name = string("reduce_max_8_axes_0"), val = tensor<int32, [1]>([-1])]; |
| bool reduce_max_8_keep_dims_0 = const()[name = string("reduce_max_8_keep_dims_0"), val = bool(true)]; |
| tensor<fp16, [1, 8, 1, 1]> reduce_max_8 = reduce_max(axes = reduce_max_8_axes_0, keep_dims = reduce_max_8_keep_dims_0, x = x_131_cast_fp16)[name = string("reduce_max_8")]; |
| tensor<fp16, [1, 8, 1, 512]> var_3606 = sub(x = x_131_cast_fp16, y = reduce_max_8)[name = string("op_3606")]; |
| tensor<fp16, [1, 8, 1, 512]> var_3612 = exp(x = var_3606)[name = string("op_3612")]; |
| tensor<int32, [1]> var_3622_axes_0 = const()[name = string("op_3622_axes_0"), val = tensor<int32, [1]>([-1])]; |
| bool var_3622_keep_dims_0 = const()[name = string("op_3622_keep_dims_0"), val = bool(true)]; |
| tensor<fp16, [1, 8, 1, 1]> var_3622 = reduce_sum(axes = var_3622_axes_0, keep_dims = var_3622_keep_dims_0, x = var_3612)[name = string("op_3622")]; |
| tensor<fp16, [1, 8, 1, 512]> var_3628_cast_fp16 = real_div(x = var_3612, y = var_3622)[name = string("op_3628_cast_fp16")]; |
| bool attn_output_49_transpose_x_0 = const()[name = string("attn_output_49_transpose_x_0"), val = bool(false)]; |
| bool attn_output_49_transpose_y_0 = const()[name = string("attn_output_49_transpose_y_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1, 256]> attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_0, transpose_y = attn_output_49_transpose_y_0, x = var_3628_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_49_cast_fp16")]; |
| tensor<int32, [4]> var_3639 = const()[name = string("op_3639"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_3646 = const()[name = string("op_3646"), val = tensor<int32, [3]>([1, 1, -1])]; |
| tensor<fp16, [1, 1, 8, 256]> var_3640_cast_fp16 = transpose(perm = var_3639, x = attn_output_49_cast_fp16)[name = string("transpose_19")]; |
| tensor<fp16, [1, 1, 2048]> attn_output_51_cast_fp16 = reshape(shape = var_3646, x = var_3640_cast_fp16)[name = string("attn_output_51_cast_fp16")]; |
| tensor<int32, [3]> var_3651 = const()[name = string("op_3651"), val = tensor<int32, [3]>([0, 2, 1])]; |
| string var_3667_pad_type_0 = const()[name = string("op_3667_pad_type_0"), val = string("valid")]; |
| int32 var_3667_groups_0 = const()[name = string("op_3667_groups_0"), val = int32(1)]; |
| tensor<int32, [1]> var_3667_strides_0 = const()[name = string("op_3667_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> var_3667_pad_0 = const()[name = string("op_3667_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> var_3667_dilations_0 = const()[name = string("op_3667_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<fp16, [1536, 2048, 1]> squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320142464))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321715392))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; |
| tensor<fp16, [1, 2048, 1]> var_3652_cast_fp16 = transpose(perm = var_3651, x = attn_output_51_cast_fp16)[name = string("transpose_18")]; |
| tensor<fp16, [1, 1536, 1]> var_3667_cast_fp16 = conv(dilations = var_3667_dilations_0, groups = var_3667_groups_0, pad = var_3667_pad_0, pad_type = var_3667_pad_type_0, strides = var_3667_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_3652_cast_fp16)[name = string("op_3667_cast_fp16")]; |
| tensor<int32, [3]> var_3671 = const()[name = string("op_3671"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_3677 = const()[name = string("op_3677"), val = int32(-1)]; |
| fp16 const_67_promoted_to_fp16 = const()[name = string("const_67_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_135_cast_fp16 = transpose(perm = var_3671, x = var_3667_cast_fp16)[name = string("transpose_17")]; |
| tensor<fp16, [1, 1, 1536]> var_3679_cast_fp16 = mul(x = x_135_cast_fp16, y = const_67_promoted_to_fp16)[name = string("op_3679_cast_fp16")]; |
| bool input_201_interleave_0 = const()[name = string("input_201_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_201_cast_fp16 = concat(axis = var_3677, interleave = input_201_interleave_0, values = (x_135_cast_fp16, var_3679_cast_fp16))[name = string("input_201_cast_fp16")]; |
| tensor<int32, [1]> normed_201_axes_0 = const()[name = string("normed_201_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_3674_to_fp16 = const()[name = string("op_3674_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_201_cast_fp16 = layer_norm(axes = normed_201_axes_0, epsilon = var_3674_to_fp16, x = input_201_cast_fp16)[name = string("normed_201_cast_fp16")]; |
| tensor<int32, [2]> var_3684_split_sizes_0 = const()[name = string("op_3684_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_3684_axis_0 = const()[name = string("op_3684_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_3684_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3684_cast_fp16_1 = split(axis = var_3684_axis_0, split_sizes = var_3684_split_sizes_0, x = normed_201_cast_fp16)[name = string("op_3684_cast_fp16")]; |
| tensor<fp16, [1536]> layers_8_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321716992)))]; |
| tensor<fp16, [1, 1, 1536]> attn_output_53_cast_fp16 = mul(x = var_3684_cast_fp16_0, y = layers_8_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_53_cast_fp16")]; |
| tensor<fp16, [1, 1, 1536]> x_137_cast_fp16 = add(x = x_127_cast_fp16, y = attn_output_53_cast_fp16)[name = string("x_137_cast_fp16")]; |
| int32 var_3693 = const()[name = string("op_3693"), val = int32(-1)]; |
| fp16 const_68_promoted_to_fp16 = const()[name = string("const_68_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> var_3695_cast_fp16 = mul(x = x_137_cast_fp16, y = const_68_promoted_to_fp16)[name = string("op_3695_cast_fp16")]; |
| bool input_203_interleave_0 = const()[name = string("input_203_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_203_cast_fp16 = concat(axis = var_3693, interleave = input_203_interleave_0, values = (x_137_cast_fp16, var_3695_cast_fp16))[name = string("input_203_cast_fp16")]; |
| tensor<int32, [1]> normed_205_axes_0 = const()[name = string("normed_205_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_3690_to_fp16 = const()[name = string("op_3690_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_205_cast_fp16 = layer_norm(axes = normed_205_axes_0, epsilon = var_3690_to_fp16, x = input_203_cast_fp16)[name = string("normed_205_cast_fp16")]; |
| tensor<int32, [2]> var_3700_split_sizes_0 = const()[name = string("op_3700_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_3700_axis_0 = const()[name = string("op_3700_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_3700_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3700_cast_fp16_1 = split(axis = var_3700_axis_0, split_sizes = var_3700_split_sizes_0, x = normed_205_cast_fp16)[name = string("op_3700_cast_fp16")]; |
| tensor<fp16, [1536]> layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321720128)))]; |
| tensor<fp16, [1, 1, 1536]> h_51_cast_fp16 = mul(x = var_3700_cast_fp16_0, y = layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_51_cast_fp16")]; |
| tensor<int32, [3]> var_3711 = const()[name = string("op_3711"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> input_205_axes_0 = const()[name = string("input_205_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_3712 = transpose(perm = var_3711, x = h_51_cast_fp16)[name = string("transpose_16")]; |
| tensor<fp16, [1, 1536, 1, 1]> input_205 = expand_dims(axes = input_205_axes_0, x = var_3712)[name = string("input_205")]; |
| string gate_33_pad_type_0 = const()[name = string("gate_33_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gate_33_strides_0 = const()[name = string("gate_33_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gate_33_pad_0 = const()[name = string("gate_33_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gate_33_dilations_0 = const()[name = string("gate_33_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gate_33_groups_0 = const()[name = string("gate_33_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 12288, 1, 1]> gate_33 = conv(dilations = gate_33_dilations_0, groups = gate_33_groups_0, pad = gate_33_pad_0, pad_type = gate_33_pad_type_0, strides = gate_33_strides_0, weight = layers_8_mlp_gate_proj_weight_palettized, x = input_205)[name = string("gate_33")]; |
| string up_17_pad_type_0 = const()[name = string("up_17_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> up_17_strides_0 = const()[name = string("up_17_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> up_17_pad_0 = const()[name = string("up_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> up_17_dilations_0 = const()[name = string("up_17_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 up_17_groups_0 = const()[name = string("up_17_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 12288, 1, 1]> up_17 = conv(dilations = up_17_dilations_0, groups = up_17_groups_0, pad = up_17_pad_0, pad_type = up_17_pad_type_0, strides = up_17_strides_0, weight = layers_8_mlp_up_proj_weight_palettized, x = input_205)[name = string("up_17")]; |
| string gate_35_mode_0 = const()[name = string("gate_35_mode_0"), val = string("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 12288, 1, 1]> gate_35 = gelu(mode = gate_35_mode_0, x = gate_33)[name = string("gate_35")]; |
| tensor<fp16, [1, 12288, 1, 1]> input_207 = mul(x = gate_35, y = up_17)[name = string("input_207")]; |
| string mlp_out_17_pad_type_0 = const()[name = string("mlp_out_17_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> mlp_out_17_strides_0 = const()[name = string("mlp_out_17_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> mlp_out_17_pad_0 = const()[name = string("mlp_out_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> mlp_out_17_dilations_0 = const()[name = string("mlp_out_17_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 mlp_out_17_groups_0 = const()[name = string("mlp_out_17_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 1536, 1, 1]> mlp_out_17 = conv(dilations = mlp_out_17_dilations_0, groups = mlp_out_17_groups_0, pad = mlp_out_17_pad_0, pad_type = mlp_out_17_pad_type_0, strides = mlp_out_17_strides_0, weight = layers_8_mlp_down_proj_weight_palettized, x = input_207)[name = string("mlp_out_17")]; |
| tensor<int32, [1]> var_3752_axes_0 = const()[name = string("op_3752_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_3752 = squeeze(axes = var_3752_axes_0, x = mlp_out_17)[name = string("op_3752")]; |
| tensor<int32, [3]> var_3756 = const()[name = string("op_3756"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_3762 = const()[name = string("op_3762"), val = int32(-1)]; |
| fp16 const_69_promoted = const()[name = string("const_69_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_139 = transpose(perm = var_3756, x = var_3752)[name = string("transpose_15")]; |
| tensor<fp16, [1, 1, 1536]> var_3764 = mul(x = x_139, y = const_69_promoted)[name = string("op_3764")]; |
| bool input_209_interleave_0 = const()[name = string("input_209_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_209 = concat(axis = var_3762, interleave = input_209_interleave_0, values = (x_139, var_3764))[name = string("input_209")]; |
| tensor<int32, [1]> normed_209_axes_0 = const()[name = string("normed_209_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_3759_to_fp16 = const()[name = string("op_3759_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_209_cast_fp16 = layer_norm(axes = normed_209_axes_0, epsilon = var_3759_to_fp16, x = input_209)[name = string("normed_209_cast_fp16")]; |
| tensor<int32, [2]> var_3769_split_sizes_0 = const()[name = string("op_3769_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_3769_axis_0 = const()[name = string("op_3769_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_3769_0, tensor<fp16, [1, 1, 1536]> var_3769_1 = split(axis = var_3769_axis_0, split_sizes = var_3769_split_sizes_0, x = normed_209_cast_fp16)[name = string("op_3769")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_83 = mul(x = var_3769_0, y = layers_8_post_feedforward_layernorm_weight)[name = string("hidden_states_83")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_85_cast_fp16 = add(x = x_137_cast_fp16, y = hidden_states_83)[name = string("hidden_states_85_cast_fp16")]; |
| tensor<int32, [3]> per_layer_slice_17_begin_0 = const()[name = string("per_layer_slice_17_begin_0"), val = tensor<int32, [3]>([0, 0, 8448])]; |
| tensor<int32, [3]> per_layer_slice_17_end_0 = const()[name = string("per_layer_slice_17_end_0"), val = tensor<int32, [3]>([1, 1, 8704])]; |
| tensor<bool, [3]> per_layer_slice_17_end_mask_0 = const()[name = string("per_layer_slice_17_end_mask_0"), val = tensor<bool, [3]>([true, true, false])]; |
| tensor<fp16, [1, 1, 256]> per_layer_slice_17_cast_fp16 = slice_by_index(begin = per_layer_slice_17_begin_0, end = per_layer_slice_17_end_0, end_mask = per_layer_slice_17_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_17_cast_fp16")]; |
| tensor<int32, [3]> var_3797 = const()[name = string("op_3797"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> input_211_axes_0 = const()[name = string("input_211_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_3798 = transpose(perm = var_3797, x = hidden_states_85_cast_fp16)[name = string("transpose_14")]; |
| tensor<fp16, [1, 1536, 1, 1]> input_211 = expand_dims(axes = input_211_axes_0, x = var_3798)[name = string("input_211")]; |
| string gated_49_pad_type_0 = const()[name = string("gated_49_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gated_49_strides_0 = const()[name = string("gated_49_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gated_49_pad_0 = const()[name = string("gated_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gated_49_dilations_0 = const()[name = string("gated_49_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gated_49_groups_0 = const()[name = string("gated_49_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 256, 1, 1]> gated_49 = conv(dilations = gated_49_dilations_0, groups = gated_49_groups_0, pad = gated_49_pad_0, pad_type = gated_49_pad_type_0, strides = gated_49_strides_0, weight = layers_8_per_layer_input_gate_weight_palettized, x = input_211)[name = string("gated_49")]; |
| string gated_51_mode_0 = const()[name = string("gated_51_mode_0"), val = string("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 256, 1, 1]> gated_51 = gelu(mode = gated_51_mode_0, x = gated_49)[name = string("gated_51")]; |
| tensor<int32, [3]> var_3817 = const()[name = string("op_3817"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> per_layer_slice_conv_17_axes_0 = const()[name = string("per_layer_slice_conv_17_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 256, 1]> var_3818_cast_fp16 = transpose(perm = var_3817, x = per_layer_slice_17_cast_fp16)[name = string("transpose_13")]; |
| tensor<fp16, [1, 256, 1, 1]> per_layer_slice_conv_17_cast_fp16 = expand_dims(axes = per_layer_slice_conv_17_axes_0, x = var_3818_cast_fp16)[name = string("per_layer_slice_conv_17_cast_fp16")]; |
| tensor<fp16, [1, 256, 1, 1]> input_213_cast_fp16 = mul(x = gated_51, y = per_layer_slice_conv_17_cast_fp16)[name = string("input_213_cast_fp16")]; |
| string gated_53_pad_type_0 = const()[name = string("gated_53_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gated_53_strides_0 = const()[name = string("gated_53_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gated_53_pad_0 = const()[name = string("gated_53_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gated_53_dilations_0 = const()[name = string("gated_53_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gated_53_groups_0 = const()[name = string("gated_53_groups_0"), val = int32(1)]; |
| tensor<fp16, [1536, 256, 1, 1]> layers_8_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321723264))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321919936))))[name = string("layers_8_per_layer_projection_weight_promoted_to_fp16_palettized")]; |
| tensor<fp16, [1, 1536, 1, 1]> gated_53_cast_fp16 = conv(dilations = gated_53_dilations_0, groups = gated_53_groups_0, pad = gated_53_pad_0, pad_type = gated_53_pad_type_0, strides = gated_53_strides_0, weight = layers_8_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_213_cast_fp16)[name = string("gated_53_cast_fp16")]; |
| tensor<int32, [1]> var_3834_axes_0 = const()[name = string("op_3834_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_3834_cast_fp16 = squeeze(axes = var_3834_axes_0, x = gated_53_cast_fp16)[name = string("op_3834_cast_fp16")]; |
| tensor<int32, [3]> var_3838 = const()[name = string("op_3838"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_3844 = const()[name = string("op_3844"), val = int32(-1)]; |
| fp16 const_70_promoted_to_fp16 = const()[name = string("const_70_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_141_cast_fp16 = transpose(perm = var_3838, x = var_3834_cast_fp16)[name = string("transpose_12")]; |
| tensor<fp16, [1, 1, 1536]> var_3846_cast_fp16 = mul(x = x_141_cast_fp16, y = const_70_promoted_to_fp16)[name = string("op_3846_cast_fp16")]; |
| bool input_215_interleave_0 = const()[name = string("input_215_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_215_cast_fp16 = concat(axis = var_3844, interleave = input_215_interleave_0, values = (x_141_cast_fp16, var_3846_cast_fp16))[name = string("input_215_cast_fp16")]; |
| tensor<int32, [1]> normed_213_axes_0 = const()[name = string("normed_213_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_3841_to_fp16 = const()[name = string("op_3841_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_213_cast_fp16 = layer_norm(axes = normed_213_axes_0, epsilon = var_3841_to_fp16, x = input_215_cast_fp16)[name = string("normed_213_cast_fp16")]; |
| tensor<int32, [2]> var_3851_split_sizes_0 = const()[name = string("op_3851_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_3851_axis_0 = const()[name = string("op_3851_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_3851_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3851_cast_fp16_1 = split(axis = var_3851_axis_0, split_sizes = var_3851_split_sizes_0, x = normed_213_cast_fp16)[name = string("op_3851_cast_fp16")]; |
| tensor<fp16, [1536]> layers_8_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_8_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321921536)))]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_89_cast_fp16 = mul(x = var_3851_cast_fp16_0, y = layers_8_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_89_cast_fp16")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_91_cast_fp16 = add(x = hidden_states_85_cast_fp16, y = hidden_states_89_cast_fp16)[name = string("hidden_states_91_cast_fp16")]; |
| tensor<fp16, [1]> const_71_promoted_to_fp16 = const()[name = string("const_71_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.64p-1])]; |
| tensor<fp16, [1, 1, 1536]> x_143_cast_fp16 = mul(x = hidden_states_91_cast_fp16, y = const_71_promoted_to_fp16)[name = string("x_143_cast_fp16")]; |
| int32 var_3866 = const()[name = string("op_3866"), val = int32(-1)]; |
| fp16 const_72_promoted_to_fp16 = const()[name = string("const_72_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> var_3868_cast_fp16 = mul(x = x_143_cast_fp16, y = const_72_promoted_to_fp16)[name = string("op_3868_cast_fp16")]; |
| bool input_217_interleave_0 = const()[name = string("input_217_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_217_cast_fp16 = concat(axis = var_3866, interleave = input_217_interleave_0, values = (x_143_cast_fp16, var_3868_cast_fp16))[name = string("input_217_cast_fp16")]; |
| tensor<int32, [1]> normed_217_axes_0 = const()[name = string("normed_217_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_3863_to_fp16 = const()[name = string("op_3863_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_217_cast_fp16 = layer_norm(axes = normed_217_axes_0, epsilon = var_3863_to_fp16, x = input_217_cast_fp16)[name = string("normed_217_cast_fp16")]; |
| tensor<int32, [2]> var_3873_split_sizes_0 = const()[name = string("op_3873_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_3873_axis_0 = const()[name = string("op_3873_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_3873_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_3873_cast_fp16_1 = split(axis = var_3873_axis_0, split_sizes = var_3873_split_sizes_0, x = normed_217_cast_fp16)[name = string("op_3873_cast_fp16")]; |
| tensor<fp16, [1536]> layers_9_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_9_input_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321924672)))]; |
| tensor<fp16, [1, 1, 1536]> h_55_cast_fp16 = mul(x = var_3873_cast_fp16_0, y = layers_9_input_layernorm_weight_promoted_to_fp16)[name = string("h_55_cast_fp16")]; |
| tensor<int32, [3]> var_3879 = const()[name = string("op_3879"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> var_3882_axes_0 = const()[name = string("op_3882_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_3880_cast_fp16 = transpose(perm = var_3879, x = h_55_cast_fp16)[name = string("transpose_11")]; |
| tensor<fp16, [1, 1536, 1, 1]> var_3882_cast_fp16 = expand_dims(axes = var_3882_axes_0, x = var_3880_cast_fp16)[name = string("op_3882_cast_fp16")]; |
| string var_3898_pad_type_0 = const()[name = string("op_3898_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> var_3898_strides_0 = const()[name = string("op_3898_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> var_3898_pad_0 = const()[name = string("op_3898_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> var_3898_dilations_0 = const()[name = string("op_3898_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 var_3898_groups_0 = const()[name = string("op_3898_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 4096, 1, 1]> var_3898 = conv(dilations = var_3898_dilations_0, groups = var_3898_groups_0, pad = var_3898_pad_0, pad_type = var_3898_pad_type_0, strides = var_3898_strides_0, weight = layers_9_self_attn_q_proj_weight_palettized, x = var_3882_cast_fp16)[name = string("op_3898")]; |
| tensor<int32, [4]> var_3903 = const()[name = string("op_3903"), val = tensor<int32, [4]>([1, 8, 512, 1])]; |
| tensor<fp16, [1, 8, 512, 1]> var_3904 = reshape(shape = var_3903, x = var_3898)[name = string("op_3904")]; |
| tensor<int32, [4]> var_3909 = const()[name = string("op_3909"), val = tensor<int32, [4]>([0, 1, 3, 2])]; |
| tensor<int32, [3]> var_3919 = const()[name = string("op_3919"), val = tensor<int32, [3]>([1, 8, 512])]; |
| tensor<fp16, [1, 8, 1, 512]> var_3910 = transpose(perm = var_3909, x = var_3904)[name = string("transpose_10")]; |
| tensor<fp16, [1, 8, 512]> x_145 = reshape(shape = var_3919, x = var_3910)[name = string("x_145")]; |
| int32 var_3925 = const()[name = string("op_3925"), val = int32(-1)]; |
| fp16 const_73_promoted = const()[name = string("const_73_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 8, 512]> var_3927 = mul(x = x_145, y = const_73_promoted)[name = string("op_3927")]; |
| bool input_221_interleave_0 = const()[name = string("input_221_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1024]> input_221 = concat(axis = var_3925, interleave = input_221_interleave_0, values = (x_145, var_3927))[name = string("input_221")]; |
| tensor<int32, [1]> normed_221_axes_0 = const()[name = string("normed_221_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_3922_to_fp16 = const()[name = string("op_3922_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 8, 1024]> normed_221_cast_fp16 = layer_norm(axes = normed_221_axes_0, epsilon = var_3922_to_fp16, x = input_221)[name = string("normed_221_cast_fp16")]; |
| tensor<int32, [2]> var_3932_split_sizes_0 = const()[name = string("op_3932_split_sizes_0"), val = tensor<int32, [2]>([512, 512])]; |
| int32 var_3932_axis_0 = const()[name = string("op_3932_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 8, 512]> var_3932_0, tensor<fp16, [1, 8, 512]> var_3932_1 = split(axis = var_3932_axis_0, split_sizes = var_3932_split_sizes_0, x = normed_221_cast_fp16)[name = string("op_3932")]; |
| tensor<fp16, [1, 8, 512]> var_3934 = mul(x = var_3932_0, y = layers_4_self_attn_q_norm_weight)[name = string("op_3934")]; |
| tensor<int32, [4]> var_3939 = const()[name = string("op_3939"), val = tensor<int32, [4]>([1, 8, 1, 512])]; |
| tensor<fp16, [1, 8, 1, 512]> q_57 = reshape(shape = var_3939, x = var_3934)[name = string("q_57")]; |
| tensor<fp16, [1, 8, 1, 512]> var_3941_cast_fp16 = mul(x = q_57, y = cos_f)[name = string("op_3941_cast_fp16")]; |
| tensor<int32, [2]> var_3942_split_sizes_0 = const()[name = string("op_3942_split_sizes_0"), val = tensor<int32, [2]>([256, 256])]; |
| int32 var_3942_axis_0 = const()[name = string("op_3942_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 8, 1, 256]> var_3942_0, tensor<fp16, [1, 8, 1, 256]> var_3942_1 = split(axis = var_3942_axis_0, split_sizes = var_3942_split_sizes_0, x = q_57)[name = string("op_3942")]; |
| fp16 const_74_promoted = const()[name = string("const_74_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 8, 1, 256]> var_3944 = mul(x = var_3942_1, y = const_74_promoted)[name = string("op_3944")]; |
| int32 var_3946 = const()[name = string("op_3946"), val = int32(-1)]; |
| bool var_3947_interleave_0 = const()[name = string("op_3947_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1, 512]> var_3947 = concat(axis = var_3946, interleave = var_3947_interleave_0, values = (var_3944, var_3942_0))[name = string("op_3947")]; |
| tensor<fp16, [1, 8, 1, 512]> var_3948_cast_fp16 = mul(x = var_3947, y = sin_f)[name = string("op_3948_cast_fp16")]; |
| tensor<fp16, [1, 8, 1, 512]> q_cast_fp16 = add(x = var_3941_cast_fp16, y = var_3948_cast_fp16)[name = string("q_cast_fp16")]; |
| bool attn_weights_37_transpose_x_0 = const()[name = string("attn_weights_37_transpose_x_0"), val = bool(false)]; |
| bool attn_weights_37_transpose_y_0 = const()[name = string("attn_weights_37_transpose_y_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1, 2048]> attn_weights_37_cast_fp16 = matmul(transpose_x = attn_weights_37_transpose_x_0, transpose_y = attn_weights_37_transpose_y_0, x = q_cast_fp16, y = transpose_44_cast_fp16)[name = string("attn_weights_37_cast_fp16")]; |
| tensor<fp16, [1, 8, 1, 2048]> x_147_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = causal_mask_full)[name = string("x_147_cast_fp16")]; |
| tensor<int32, [1]> reduce_max_9_axes_0 = const()[name = string("reduce_max_9_axes_0"), val = tensor<int32, [1]>([-1])]; |
| bool reduce_max_9_keep_dims_0 = const()[name = string("reduce_max_9_keep_dims_0"), val = bool(true)]; |
| tensor<fp16, [1, 8, 1, 1]> reduce_max_9 = reduce_max(axes = reduce_max_9_axes_0, keep_dims = reduce_max_9_keep_dims_0, x = x_147_cast_fp16)[name = string("reduce_max_9")]; |
| tensor<fp16, [1, 8, 1, 2048]> var_3980 = sub(x = x_147_cast_fp16, y = reduce_max_9)[name = string("op_3980")]; |
| tensor<fp16, [1, 8, 1, 2048]> var_3986 = exp(x = var_3980)[name = string("op_3986")]; |
| tensor<int32, [1]> var_3996_axes_0 = const()[name = string("op_3996_axes_0"), val = tensor<int32, [1]>([-1])]; |
| bool var_3996_keep_dims_0 = const()[name = string("op_3996_keep_dims_0"), val = bool(true)]; |
| tensor<fp16, [1, 8, 1, 1]> var_3996 = reduce_sum(axes = var_3996_axes_0, keep_dims = var_3996_keep_dims_0, x = var_3986)[name = string("op_3996")]; |
| tensor<fp16, [1, 8, 1, 2048]> var_4002_cast_fp16 = real_div(x = var_3986, y = var_3996)[name = string("op_4002_cast_fp16")]; |
| bool attn_output_55_transpose_x_0 = const()[name = string("attn_output_55_transpose_x_0"), val = bool(false)]; |
| bool attn_output_55_transpose_y_0 = const()[name = string("attn_output_55_transpose_y_0"), val = bool(false)]; |
| tensor<fp16, [1, 8, 1, 512]> attn_output_55_cast_fp16 = matmul(transpose_x = attn_output_55_transpose_x_0, transpose_y = attn_output_55_transpose_y_0, x = var_4002_cast_fp16, y = V_expanded_9_cast_fp16)[name = string("attn_output_55_cast_fp16")]; |
| tensor<int32, [4]> var_4013 = const()[name = string("op_4013"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_4020 = const()[name = string("op_4020"), val = tensor<int32, [3]>([1, 1, -1])]; |
| tensor<fp16, [1, 1, 8, 512]> var_4014_cast_fp16 = transpose(perm = var_4013, x = attn_output_55_cast_fp16)[name = string("transpose_9")]; |
| tensor<fp16, [1, 1, 4096]> attn_output_57_cast_fp16 = reshape(shape = var_4020, x = var_4014_cast_fp16)[name = string("attn_output_57_cast_fp16")]; |
| tensor<int32, [3]> var_4025 = const()[name = string("op_4025"), val = tensor<int32, [3]>([0, 2, 1])]; |
| string var_4041_pad_type_0 = const()[name = string("op_4041_pad_type_0"), val = string("valid")]; |
| int32 var_4041_groups_0 = const()[name = string("op_4041_groups_0"), val = int32(1)]; |
| tensor<int32, [1]> var_4041_strides_0 = const()[name = string("op_4041_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> var_4041_pad_0 = const()[name = string("op_4041_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> var_4041_dilations_0 = const()[name = string("op_4041_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<fp16, [1536, 4096, 1]> squeeze_9_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 4096, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321927808))), lut = tensor<fp16, [48, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325073600))))[name = string("squeeze_9_cast_fp16_to_fp32_to_fp16_palettized")]; |
| tensor<fp16, [1, 4096, 1]> var_4026_cast_fp16 = transpose(perm = var_4025, x = attn_output_57_cast_fp16)[name = string("transpose_8")]; |
| tensor<fp16, [1, 1536, 1]> var_4041_cast_fp16 = conv(dilations = var_4041_dilations_0, groups = var_4041_groups_0, pad = var_4041_pad_0, pad_type = var_4041_pad_type_0, strides = var_4041_strides_0, weight = squeeze_9_cast_fp16_to_fp32_to_fp16_palettized, x = var_4026_cast_fp16)[name = string("op_4041_cast_fp16")]; |
| tensor<int32, [3]> var_4045 = const()[name = string("op_4045"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_4051 = const()[name = string("op_4051"), val = int32(-1)]; |
| fp16 const_75_promoted_to_fp16 = const()[name = string("const_75_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_151_cast_fp16 = transpose(perm = var_4045, x = var_4041_cast_fp16)[name = string("transpose_7")]; |
| tensor<fp16, [1, 1, 1536]> var_4053_cast_fp16 = mul(x = x_151_cast_fp16, y = const_75_promoted_to_fp16)[name = string("op_4053_cast_fp16")]; |
| bool input_225_interleave_0 = const()[name = string("input_225_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_225_cast_fp16 = concat(axis = var_4051, interleave = input_225_interleave_0, values = (x_151_cast_fp16, var_4053_cast_fp16))[name = string("input_225_cast_fp16")]; |
| tensor<int32, [1]> normed_225_axes_0 = const()[name = string("normed_225_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_4048_to_fp16 = const()[name = string("op_4048_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_225_cast_fp16 = layer_norm(axes = normed_225_axes_0, epsilon = var_4048_to_fp16, x = input_225_cast_fp16)[name = string("normed_225_cast_fp16")]; |
| tensor<int32, [2]> var_4058_split_sizes_0 = const()[name = string("op_4058_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_4058_axis_0 = const()[name = string("op_4058_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_4058_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_4058_cast_fp16_1 = split(axis = var_4058_axis_0, split_sizes = var_4058_split_sizes_0, x = normed_225_cast_fp16)[name = string("op_4058_cast_fp16")]; |
| tensor<fp16, [1536]> layers_9_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_9_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325075200)))]; |
| tensor<fp16, [1, 1, 1536]> attn_output_cast_fp16 = mul(x = var_4058_cast_fp16_0, y = layers_9_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_cast_fp16")]; |
| tensor<fp16, [1, 1, 1536]> x_153_cast_fp16 = add(x = x_143_cast_fp16, y = attn_output_cast_fp16)[name = string("x_153_cast_fp16")]; |
| int32 var_4067 = const()[name = string("op_4067"), val = int32(-1)]; |
| fp16 const_76_promoted_to_fp16 = const()[name = string("const_76_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> var_4069_cast_fp16 = mul(x = x_153_cast_fp16, y = const_76_promoted_to_fp16)[name = string("op_4069_cast_fp16")]; |
| bool input_227_interleave_0 = const()[name = string("input_227_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_227_cast_fp16 = concat(axis = var_4067, interleave = input_227_interleave_0, values = (x_153_cast_fp16, var_4069_cast_fp16))[name = string("input_227_cast_fp16")]; |
| tensor<int32, [1]> normed_229_axes_0 = const()[name = string("normed_229_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_4064_to_fp16 = const()[name = string("op_4064_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_229_cast_fp16 = layer_norm(axes = normed_229_axes_0, epsilon = var_4064_to_fp16, x = input_227_cast_fp16)[name = string("normed_229_cast_fp16")]; |
| tensor<int32, [2]> var_4074_split_sizes_0 = const()[name = string("op_4074_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_4074_axis_0 = const()[name = string("op_4074_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_4074_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_4074_cast_fp16_1 = split(axis = var_4074_axis_0, split_sizes = var_4074_split_sizes_0, x = normed_229_cast_fp16)[name = string("op_4074_cast_fp16")]; |
| tensor<fp16, [1536]> layers_9_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_9_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325078336)))]; |
| tensor<fp16, [1, 1, 1536]> h_57_cast_fp16 = mul(x = var_4074_cast_fp16_0, y = layers_9_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_57_cast_fp16")]; |
| tensor<int32, [3]> var_4085 = const()[name = string("op_4085"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> input_229_axes_0 = const()[name = string("input_229_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_4086 = transpose(perm = var_4085, x = h_57_cast_fp16)[name = string("transpose_6")]; |
| tensor<fp16, [1, 1536, 1, 1]> input_229 = expand_dims(axes = input_229_axes_0, x = var_4086)[name = string("input_229")]; |
| string gate_37_pad_type_0 = const()[name = string("gate_37_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gate_37_strides_0 = const()[name = string("gate_37_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gate_37_pad_0 = const()[name = string("gate_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gate_37_dilations_0 = const()[name = string("gate_37_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gate_37_groups_0 = const()[name = string("gate_37_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 12288, 1, 1]> gate_37 = conv(dilations = gate_37_dilations_0, groups = gate_37_groups_0, pad = gate_37_pad_0, pad_type = gate_37_pad_type_0, strides = gate_37_strides_0, weight = layers_9_mlp_gate_proj_weight_palettized, x = input_229)[name = string("gate_37")]; |
| string up_pad_type_0 = const()[name = string("up_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> up_strides_0 = const()[name = string("up_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> up_pad_0 = const()[name = string("up_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> up_dilations_0 = const()[name = string("up_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 up_groups_0 = const()[name = string("up_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 12288, 1, 1]> up = conv(dilations = up_dilations_0, groups = up_groups_0, pad = up_pad_0, pad_type = up_pad_type_0, strides = up_strides_0, weight = layers_9_mlp_up_proj_weight_palettized, x = input_229)[name = string("up")]; |
| string gate_mode_0 = const()[name = string("gate_mode_0"), val = string("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 12288, 1, 1]> gate = gelu(mode = gate_mode_0, x = gate_37)[name = string("gate")]; |
| tensor<fp16, [1, 12288, 1, 1]> input_231 = mul(x = gate, y = up)[name = string("input_231")]; |
| string mlp_out_pad_type_0 = const()[name = string("mlp_out_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> mlp_out_strides_0 = const()[name = string("mlp_out_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> mlp_out_pad_0 = const()[name = string("mlp_out_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> mlp_out_dilations_0 = const()[name = string("mlp_out_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 mlp_out_groups_0 = const()[name = string("mlp_out_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 1536, 1, 1]> mlp_out = conv(dilations = mlp_out_dilations_0, groups = mlp_out_groups_0, pad = mlp_out_pad_0, pad_type = mlp_out_pad_type_0, strides = mlp_out_strides_0, weight = layers_9_mlp_down_proj_weight_palettized, x = input_231)[name = string("mlp_out")]; |
| tensor<int32, [1]> var_4126_axes_0 = const()[name = string("op_4126_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_4126 = squeeze(axes = var_4126_axes_0, x = mlp_out)[name = string("op_4126")]; |
| tensor<int32, [3]> var_4130 = const()[name = string("op_4130"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_4136 = const()[name = string("op_4136"), val = int32(-1)]; |
| fp16 const_77_promoted = const()[name = string("const_77_promoted"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_155 = transpose(perm = var_4130, x = var_4126)[name = string("transpose_5")]; |
| tensor<fp16, [1, 1, 1536]> var_4138 = mul(x = x_155, y = const_77_promoted)[name = string("op_4138")]; |
| bool input_233_interleave_0 = const()[name = string("input_233_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_233 = concat(axis = var_4136, interleave = input_233_interleave_0, values = (x_155, var_4138))[name = string("input_233")]; |
| tensor<int32, [1]> normed_233_axes_0 = const()[name = string("normed_233_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_4133_to_fp16 = const()[name = string("op_4133_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_233_cast_fp16 = layer_norm(axes = normed_233_axes_0, epsilon = var_4133_to_fp16, x = input_233)[name = string("normed_233_cast_fp16")]; |
| tensor<int32, [2]> var_4143_split_sizes_0 = const()[name = string("op_4143_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_4143_axis_0 = const()[name = string("op_4143_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_4143_0, tensor<fp16, [1, 1, 1536]> var_4143_1 = split(axis = var_4143_axis_0, split_sizes = var_4143_split_sizes_0, x = normed_233_cast_fp16)[name = string("op_4143")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_93 = mul(x = var_4143_0, y = layers_9_post_feedforward_layernorm_weight)[name = string("hidden_states_93")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_95_cast_fp16 = add(x = x_153_cast_fp16, y = hidden_states_93)[name = string("hidden_states_95_cast_fp16")]; |
| tensor<int32, [3]> per_layer_slice_begin_0 = const()[name = string("per_layer_slice_begin_0"), val = tensor<int32, [3]>([0, 0, 8704])]; |
| tensor<int32, [3]> per_layer_slice_end_0 = const()[name = string("per_layer_slice_end_0"), val = tensor<int32, [3]>([1, 1, 1])]; |
| tensor<bool, [3]> per_layer_slice_end_mask_0 = const()[name = string("per_layer_slice_end_mask_0"), val = tensor<bool, [3]>([true, true, true])]; |
| tensor<fp16, [1, 1, 256]> per_layer_slice_cast_fp16 = slice_by_index(begin = per_layer_slice_begin_0, end = per_layer_slice_end_0, end_mask = per_layer_slice_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_cast_fp16")]; |
| tensor<int32, [3]> var_4171 = const()[name = string("op_4171"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> input_235_axes_0 = const()[name = string("input_235_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_4172 = transpose(perm = var_4171, x = hidden_states_95_cast_fp16)[name = string("transpose_4")]; |
| tensor<fp16, [1, 1536, 1, 1]> input_235 = expand_dims(axes = input_235_axes_0, x = var_4172)[name = string("input_235")]; |
| string gated_55_pad_type_0 = const()[name = string("gated_55_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gated_55_strides_0 = const()[name = string("gated_55_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gated_55_pad_0 = const()[name = string("gated_55_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gated_55_dilations_0 = const()[name = string("gated_55_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gated_55_groups_0 = const()[name = string("gated_55_groups_0"), val = int32(1)]; |
| tensor<fp16, [1, 256, 1, 1]> gated_55 = conv(dilations = gated_55_dilations_0, groups = gated_55_groups_0, pad = gated_55_pad_0, pad_type = gated_55_pad_type_0, strides = gated_55_strides_0, weight = layers_9_per_layer_input_gate_weight_palettized, x = input_235)[name = string("gated_55")]; |
| string gated_57_mode_0 = const()[name = string("gated_57_mode_0"), val = string("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 256, 1, 1]> gated_57 = gelu(mode = gated_57_mode_0, x = gated_55)[name = string("gated_57")]; |
| tensor<int32, [3]> var_4191 = const()[name = string("op_4191"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<int32, [1]> per_layer_slice_conv_axes_0 = const()[name = string("per_layer_slice_conv_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 256, 1]> var_4192_cast_fp16 = transpose(perm = var_4191, x = per_layer_slice_cast_fp16)[name = string("transpose_3")]; |
| tensor<fp16, [1, 256, 1, 1]> per_layer_slice_conv_cast_fp16 = expand_dims(axes = per_layer_slice_conv_axes_0, x = var_4192_cast_fp16)[name = string("per_layer_slice_conv_cast_fp16")]; |
| tensor<fp16, [1, 256, 1, 1]> input_237_cast_fp16 = mul(x = gated_57, y = per_layer_slice_conv_cast_fp16)[name = string("input_237_cast_fp16")]; |
| string gated_pad_type_0 = const()[name = string("gated_pad_type_0"), val = string("valid")]; |
| tensor<int32, [2]> gated_strides_0 = const()[name = string("gated_strides_0"), val = tensor<int32, [2]>([1, 1])]; |
| tensor<int32, [4]> gated_pad_0 = const()[name = string("gated_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])]; |
| tensor<int32, [2]> gated_dilations_0 = const()[name = string("gated_dilations_0"), val = tensor<int32, [2]>([1, 1])]; |
| int32 gated_groups_0 = const()[name = string("gated_groups_0"), val = int32(1)]; |
| tensor<fp16, [1536, 256, 1, 1]> layers_9_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1536, 256, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325081472))), lut = tensor<fp16, [48, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325278144))))[name = string("layers_9_per_layer_projection_weight_promoted_to_fp16_palettized")]; |
| tensor<fp16, [1, 1536, 1, 1]> gated_cast_fp16 = conv(dilations = gated_dilations_0, groups = gated_groups_0, pad = gated_pad_0, pad_type = gated_pad_type_0, strides = gated_strides_0, weight = layers_9_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_237_cast_fp16)[name = string("gated_cast_fp16")]; |
| tensor<int32, [1]> var_4208_axes_0 = const()[name = string("op_4208_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1536, 1]> var_4208_cast_fp16 = squeeze(axes = var_4208_axes_0, x = gated_cast_fp16)[name = string("op_4208_cast_fp16")]; |
| tensor<int32, [3]> var_4212 = const()[name = string("op_4212"), val = tensor<int32, [3]>([0, 2, 1])]; |
| int32 var_4218 = const()[name = string("op_4218"), val = int32(-1)]; |
| fp16 const_78_promoted_to_fp16 = const()[name = string("const_78_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> x_157_cast_fp16 = transpose(perm = var_4212, x = var_4208_cast_fp16)[name = string("transpose_2")]; |
| tensor<fp16, [1, 1, 1536]> var_4220_cast_fp16 = mul(x = x_157_cast_fp16, y = const_78_promoted_to_fp16)[name = string("op_4220_cast_fp16")]; |
| bool input_239_interleave_0 = const()[name = string("input_239_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_239_cast_fp16 = concat(axis = var_4218, interleave = input_239_interleave_0, values = (x_157_cast_fp16, var_4220_cast_fp16))[name = string("input_239_cast_fp16")]; |
| tensor<int32, [1]> normed_237_axes_0 = const()[name = string("normed_237_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_4215_to_fp16 = const()[name = string("op_4215_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_237_cast_fp16 = layer_norm(axes = normed_237_axes_0, epsilon = var_4215_to_fp16, x = input_239_cast_fp16)[name = string("normed_237_cast_fp16")]; |
| tensor<int32, [2]> var_4225_split_sizes_0 = const()[name = string("op_4225_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_4225_axis_0 = const()[name = string("op_4225_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_4225_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_4225_cast_fp16_1 = split(axis = var_4225_axis_0, split_sizes = var_4225_split_sizes_0, x = normed_237_cast_fp16)[name = string("op_4225_cast_fp16")]; |
| tensor<fp16, [1536]> layers_9_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_9_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325279744)))]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_99_cast_fp16 = mul(x = var_4225_cast_fp16_0, y = layers_9_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_99_cast_fp16")]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_101_cast_fp16 = add(x = hidden_states_95_cast_fp16, y = hidden_states_99_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; |
| tensor<fp16, [1]> const_79_promoted_to_fp16 = const()[name = string("const_79_promoted_to_fp16"), val = tensor<fp16, [1]>([0x1.56p-3])]; |
| tensor<fp16, [1, 1, 1536]> x_cast_fp16 = mul(x = hidden_states_101_cast_fp16, y = const_79_promoted_to_fp16)[name = string("x_cast_fp16")]; |
| int32 var_4240 = const()[name = string("op_4240"), val = int32(-1)]; |
| fp16 const_80_promoted_to_fp16 = const()[name = string("const_80_promoted_to_fp16"), val = fp16(-0x1p+0)]; |
| tensor<fp16, [1, 1, 1536]> var_4242_cast_fp16 = mul(x = x_cast_fp16, y = const_80_promoted_to_fp16)[name = string("op_4242_cast_fp16")]; |
| bool input_241_interleave_0 = const()[name = string("input_241_interleave_0"), val = bool(false)]; |
| tensor<fp16, [1, 1, 3072]> input_241_cast_fp16 = concat(axis = var_4240, interleave = input_241_interleave_0, values = (x_cast_fp16, var_4242_cast_fp16))[name = string("input_241_cast_fp16")]; |
| tensor<int32, [1]> normed_241_axes_0 = const()[name = string("normed_241_axes_0"), val = tensor<int32, [1]>([-1])]; |
| fp16 var_4237_to_fp16 = const()[name = string("op_4237_to_fp16"), val = fp16(0x1.1p-20)]; |
| tensor<fp16, [1, 1, 3072]> normed_241_cast_fp16 = layer_norm(axes = normed_241_axes_0, epsilon = var_4237_to_fp16, x = input_241_cast_fp16)[name = string("normed_241_cast_fp16")]; |
| tensor<int32, [2]> var_4247_split_sizes_0 = const()[name = string("op_4247_split_sizes_0"), val = tensor<int32, [2]>([1536, 1536])]; |
| int32 var_4247_axis_0 = const()[name = string("op_4247_axis_0"), val = int32(-1)]; |
| tensor<fp16, [1, 1, 1536]> var_4247_cast_fp16_0, tensor<fp16, [1, 1, 1536]> var_4247_cast_fp16_1 = split(axis = var_4247_axis_0, split_sizes = var_4247_split_sizes_0, x = normed_241_cast_fp16)[name = string("op_4247_cast_fp16")]; |
| tensor<fp16, [1536]> norm_weight_promoted_to_fp16 = const()[name = string("norm_weight_promoted_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325282880)))]; |
| tensor<fp16, [1, 1, 1536]> hidden_states_103_cast_fp16 = mul(x = var_4247_cast_fp16_0, y = norm_weight_promoted_to_fp16)[name = string("hidden_states_103_cast_fp16")]; |
| tensor<int32, [3]> var_4258 = const()[name = string("op_4258"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<fp16, [262144, 1536, 1]> squeeze_10_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [262144, 1536, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325286016))), lut = tensor<fp16, [8192, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(526612672))))[name = string("squeeze_10_palettized")]; |
| string var_4274_pad_type_0 = const()[name = string("op_4274_pad_type_0"), val = string("valid")]; |
| int32 var_4274_groups_0 = const()[name = string("op_4274_groups_0"), val = int32(1)]; |
| tensor<int32, [1]> var_4274_strides_0 = const()[name = string("op_4274_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> var_4274_pad_0 = const()[name = string("op_4274_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> var_4274_dilations_0 = const()[name = string("op_4274_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<fp16, [1, 1536, 1]> var_4259 = transpose(perm = var_4258, x = hidden_states_103_cast_fp16)[name = string("transpose_1")]; |
| tensor<fp16, [1, 262144, 1]> var_4274 = conv(dilations = var_4274_dilations_0, groups = var_4274_groups_0, pad = var_4274_pad_0, pad_type = var_4274_pad_type_0, strides = var_4274_strides_0, weight = squeeze_10_palettized, x = var_4259)[name = string("op_4274")]; |
| tensor<int32, [3]> var_4278 = const()[name = string("op_4278"), val = tensor<int32, [3]>([0, 2, 1])]; |
| fp16 _inversed_4281_y_0_to_fp16 = const()[name = string("_inversed_4281_y_0_to_fp16"), val = fp16(0x1.11p-5)]; |
| tensor<fp16, [1, 1, 262144]> logits_1 = transpose(perm = var_4278, x = var_4274)[name = string("transpose_0")]; |
| tensor<fp16, [1, 1, 262144]> _inversed_4281_cast_fp16 = mul(x = logits_1, y = _inversed_4281_y_0_to_fp16)[name = string("_inversed_4281_cast_fp16")]; |
| tensor<fp16, [1, 1, 262144]> var_4282_cast_fp16 = tanh(x = _inversed_4281_cast_fp16)[name = string("op_4282_cast_fp16")]; |
| fp16 var_4283_to_fp16 = const()[name = string("op_4283_to_fp16"), val = fp16(0x1.ep+4)]; |
| tensor<fp16, [1, 1, 262144]> logits_3_cast_fp16 = mul(x = var_4282_cast_fp16, y = var_4283_to_fp16)[name = string("logits_3_cast_fp16")]; |
| tensor<int32, [1]> logits_axes_0 = const()[name = string("logits_axes_0"), val = tensor<int32, [1]>([0])]; |
| tensor<fp16, [1, 262144]> logits_cast_fp16 = squeeze(axes = logits_axes_0, x = logits_3_cast_fp16)[name = string("logits_cast_fp16")]; |
| int32 var_4288 = const()[name = string("op_4288"), val = int32(-1)]; |
| int32 token_id_axis_0 = const()[name = string("token_id_axis_0"), val = int32(-1)]; |
| bool token_id_keep_dims_0 = const()[name = string("token_id_keep_dims_0"), val = bool(false)]; |
| string token_id_output_dtype_0 = const()[name = string("token_id_output_dtype_0"), val = string("int32")]; |
| tensor<int32, [1]> token_id = reduce_argmax(axis = token_id_axis_0, keep_dims = token_id_keep_dims_0, output_dtype = token_id_output_dtype_0, x = logits_cast_fp16)[name = string("token_id_cast_fp16")]; |
| tensor<int32, [1]> var_4290_axes_0 = const()[name = string("op_4290_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<int32, [1, 1]> var_4290 = expand_dims(axes = var_4290_axes_0, x = token_id)[name = string("op_4290")]; |
| bool var_4291_validate_indices_0 = const()[name = string("op_4291_validate_indices_0"), val = bool(false)]; |
| tensor<fp16, [1, 1]> var_4291_cast_fp16 = gather_along_axis(axis = var_4288, indices = var_4290, validate_indices = var_4291_validate_indices_0, x = logits_cast_fp16)[name = string("op_4291_cast_fp16")]; |
| tensor<int32, [1]> var_4292_axes_0 = const()[name = string("op_4292_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [1]> token_logit = squeeze(axes = var_4292_axes_0, x = var_4291_cast_fp16)[name = string("op_4292_cast_fp16")]; |
| tensor<fp16, [1, 1, 2048, 1]> update_mask_tmp = identity(x = update_mask)[name = string("update_mask_tmp")]; |
| } -> (token_id, token_logit); |
| } |