seba commited on
Commit
65f369e
·
verified ·
1 Parent(s): 81bbd9f

Upload folder using huggingface_hub

Browse files
locdit_f16.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad769b50fccd64c66cff88a33a9885a5c00dab08ca80ebb9c7ddba3e6590ad81
3
+ size 243
locdit_f16.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a4db82642a3ccac7e9163ea22bc326306cfd8dd6e204d5f9c7db8322a1a0603
3
+ size 441
locdit_f16.mlmodelc/model.mil ADDED
@@ -0,0 +1,476 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ program(1.3)
2
+ [buildInfo = dict<string, string>({{"coremlc-component-MIL", "3405.2.1"}, {"coremlc-version", "3405.2.1"}, {"coremltools-component-torch", "2.8.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0b1"}})]
3
+ {
4
+ func main<ios18>(tensor<fp16, [2, 64, 2]> cond, tensor<fp16, [2]> dt, tensor<fp16, [2, 1024]> mu, tensor<fp16, [2]> t, tensor<fp16, [2, 64, 2]> x) {
5
+ int32 var_38 = const()[name = string("op_38"), val = int32(-1)];
6
+ tensor<int32, [1]> var_50_axes_0 = const()[name = string("op_50_axes_0"), val = tensor<int32, [1]>([1])];
7
+ tensor<fp16, [2, 1]> var_50_cast_fp16 = expand_dims(axes = var_50_axes_0, x = t)[name = string("op_50_cast_fp16")];
8
+ fp16 var_51_promoted_to_fp16 = const()[name = string("op_51_promoted_to_fp16"), val = fp16(0x1.f4p+9)];
9
+ tensor<fp16, [2, 1]> var_52_cast_fp16 = mul(x = var_50_cast_fp16, y = var_51_promoted_to_fp16)[name = string("op_52_cast_fp16")];
10
+ tensor<fp16, [1, 512]> var_53_to_fp16 = const()[name = string("op_53_to_fp16"), val = tensor<fp16, [1, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
11
+ tensor<fp16, [2, 512]> emb_3_cast_fp16 = mul(x = var_52_cast_fp16, y = var_53_to_fp16)[name = string("emb_3_cast_fp16")];
12
+ tensor<fp16, [2, 512]> var_55_cast_fp16 = sin(x = emb_3_cast_fp16)[name = string("op_55_cast_fp16")];
13
+ tensor<fp16, [2, 512]> var_56_cast_fp16 = cos(x = emb_3_cast_fp16)[name = string("op_56_cast_fp16")];
14
+ bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)];
15
+ tensor<fp16, [2, 1024]> input_1_cast_fp16 = concat(axis = var_38, interleave = input_1_interleave_0, values = (var_55_cast_fp16, var_56_cast_fp16))[name = string("input_1_cast_fp16")];
16
+ int32 var_59 = const()[name = string("op_59"), val = int32(-1)];
17
+ tensor<int32, [1]> var_71_axes_0 = const()[name = string("op_71_axes_0"), val = tensor<int32, [1]>([1])];
18
+ tensor<fp16, [2, 1]> var_71_cast_fp16 = expand_dims(axes = var_71_axes_0, x = dt)[name = string("op_71_cast_fp16")];
19
+ fp16 var_72_promoted_to_fp16 = const()[name = string("op_72_promoted_to_fp16"), val = fp16(0x1.f4p+9)];
20
+ tensor<fp16, [2, 1]> var_73_cast_fp16 = mul(x = var_71_cast_fp16, y = var_72_promoted_to_fp16)[name = string("op_73_cast_fp16")];
21
+ tensor<fp16, [2, 512]> emb_cast_fp16 = mul(x = var_73_cast_fp16, y = var_53_to_fp16)[name = string("emb_cast_fp16")];
22
+ tensor<fp16, [2, 512]> var_76_cast_fp16 = sin(x = emb_cast_fp16)[name = string("op_76_cast_fp16")];
23
+ tensor<fp16, [2, 512]> var_77_cast_fp16 = cos(x = emb_cast_fp16)[name = string("op_77_cast_fp16")];
24
+ bool input_7_interleave_0 = const()[name = string("input_7_interleave_0"), val = bool(false)];
25
+ tensor<fp16, [2, 1024]> input_7_cast_fp16 = concat(axis = var_59, interleave = input_7_interleave_0, values = (var_76_cast_fp16, var_77_cast_fp16))[name = string("input_7_cast_fp16")];
26
+ tensor<fp16, [1024, 1024]> layer_time_mlp_linear_1_weight_to_fp16 = const()[name = string("layer_time_mlp_linear_1_weight_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1152)))];
27
+ tensor<fp16, [1024]> layer_time_mlp_linear_1_bias_to_fp16 = const()[name = string("layer_time_mlp_linear_1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2098368)))];
28
+ tensor<fp16, [2, 1024]> linear_0_cast_fp16 = linear(bias = layer_time_mlp_linear_1_bias_to_fp16, weight = layer_time_mlp_linear_1_weight_to_fp16, x = input_1_cast_fp16)[name = string("linear_0_cast_fp16")];
29
+ tensor<fp16, [2, 1024]> input_5_cast_fp16 = silu(x = linear_0_cast_fp16)[name = string("input_5_cast_fp16")];
30
+ tensor<fp16, [1024, 1024]> layer_time_mlp_linear_2_weight_to_fp16 = const()[name = string("layer_time_mlp_linear_2_weight_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2100480)))];
31
+ tensor<fp16, [1024]> layer_time_mlp_linear_2_bias_to_fp16 = const()[name = string("layer_time_mlp_linear_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4197696)))];
32
+ tensor<fp16, [2, 1024]> linear_1_cast_fp16 = linear(bias = layer_time_mlp_linear_2_bias_to_fp16, weight = layer_time_mlp_linear_2_weight_to_fp16, x = input_5_cast_fp16)[name = string("linear_1_cast_fp16")];
33
+ tensor<fp16, [1024, 1024]> layer_delta_time_mlp_linear_1_weight_to_fp16 = const()[name = string("layer_delta_time_mlp_linear_1_weight_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4199808)))];
34
+ tensor<fp16, [1024]> layer_delta_time_mlp_linear_1_bias_to_fp16 = const()[name = string("layer_delta_time_mlp_linear_1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6297024)))];
35
+ tensor<fp16, [2, 1024]> linear_2_cast_fp16 = linear(bias = layer_delta_time_mlp_linear_1_bias_to_fp16, weight = layer_delta_time_mlp_linear_1_weight_to_fp16, x = input_7_cast_fp16)[name = string("linear_2_cast_fp16")];
36
+ tensor<fp16, [2, 1024]> input_11_cast_fp16 = silu(x = linear_2_cast_fp16)[name = string("input_11_cast_fp16")];
37
+ tensor<fp16, [1024, 1024]> layer_delta_time_mlp_linear_2_weight_to_fp16 = const()[name = string("layer_delta_time_mlp_linear_2_weight_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6299136)))];
38
+ tensor<fp16, [1024]> layer_delta_time_mlp_linear_2_bias_to_fp16 = const()[name = string("layer_delta_time_mlp_linear_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8396352)))];
39
+ tensor<fp16, [2, 1024]> linear_3_cast_fp16 = linear(bias = layer_delta_time_mlp_linear_2_bias_to_fp16, weight = layer_delta_time_mlp_linear_2_weight_to_fp16, x = input_11_cast_fp16)[name = string("linear_3_cast_fp16")];
40
+ tensor<fp16, [2, 1024]> t_cast_fp16 = add(x = linear_1_cast_fp16, y = linear_3_cast_fp16)[name = string("t_cast_fp16")];
41
+ tensor<int32, [3]> var_119 = const()[name = string("op_119"), val = tensor<int32, [3]>([0, 2, 1])];
42
+ tensor<fp16, [1024, 64]> layer_in_proj_weight_to_fp16 = const()[name = string("layer_in_proj_weight_to_fp16"), val = tensor<fp16, [1024, 64]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8398464)))];
43
+ tensor<fp16, [1024]> layer_in_proj_bias_to_fp16 = const()[name = string("layer_in_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8529600)))];
44
+ tensor<fp16, [2, 2, 64]> input_13_cast_fp16 = transpose(perm = var_119, x = x)[name = string("transpose_18")];
45
+ tensor<fp16, [2, 2, 1024]> linear_4_cast_fp16 = linear(bias = layer_in_proj_bias_to_fp16, weight = layer_in_proj_weight_to_fp16, x = input_13_cast_fp16)[name = string("linear_4_cast_fp16")];
46
+ tensor<int32, [3]> input_15_perm_0 = const()[name = string("input_15_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
47
+ tensor<fp16, [1024, 64]> layer_cond_proj_weight_to_fp16 = const()[name = string("layer_cond_proj_weight_to_fp16"), val = tensor<fp16, [1024, 64]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8531712)))];
48
+ tensor<fp16, [1024]> layer_cond_proj_bias_to_fp16 = const()[name = string("layer_cond_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8662848)))];
49
+ tensor<fp16, [2, 2, 64]> input_15_cast_fp16 = transpose(perm = input_15_perm_0, x = cond)[name = string("transpose_17")];
50
+ tensor<fp16, [2, 2, 1024]> linear_5_cast_fp16 = linear(bias = layer_cond_proj_bias_to_fp16, weight = layer_cond_proj_weight_to_fp16, x = input_15_cast_fp16)[name = string("linear_5_cast_fp16")];
51
+ tensor<fp16, [2, 1024]> var_131_cast_fp16 = add(x = mu, y = t_cast_fp16)[name = string("op_131_cast_fp16")];
52
+ tensor<int32, [1]> var_133_axes_0 = const()[name = string("op_133_axes_0"), val = tensor<int32, [1]>([1])];
53
+ tensor<fp16, [2, 1, 1024]> var_133_cast_fp16 = expand_dims(axes = var_133_axes_0, x = var_131_cast_fp16)[name = string("op_133_cast_fp16")];
54
+ int32 var_135 = const()[name = string("op_135"), val = int32(1)];
55
+ bool x_interleave_0 = const()[name = string("x_interleave_0"), val = bool(false)];
56
+ tensor<fp16, [2, 5, 1024]> x_cast_fp16 = concat(axis = var_135, interleave = x_interleave_0, values = (var_133_cast_fp16, linear_5_cast_fp16, linear_4_cast_fp16))[name = string("x_cast_fp16")];
57
+ int32 var_137 = const()[name = string("op_137"), val = int32(1)];
58
+ int32 var_142 = const()[name = string("op_142"), val = int32(-1)];
59
+ fp16 const_5_promoted_to_fp16 = const()[name = string("const_5_promoted_to_fp16"), val = fp16(-0x1p+0)];
60
+ tensor<fp16, [2, 5, 1024]> var_156_cast_fp16 = mul(x = x_cast_fp16, y = const_5_promoted_to_fp16)[name = string("op_156_cast_fp16")];
61
+ bool hidden_states_1_interleave_0 = const()[name = string("hidden_states_1_interleave_0"), val = bool(false)];
62
+ tensor<fp16, [2, 5, 2048]> hidden_states_1_cast_fp16 = concat(axis = var_142, interleave = hidden_states_1_interleave_0, values = (x_cast_fp16, var_156_cast_fp16))[name = string("hidden_states_1_cast_fp16")];
63
+ tensor<int32, [1]> hidden_states_3_axes_0 = const()[name = string("hidden_states_3_axes_0"), val = tensor<int32, [1]>([-1])];
64
+ tensor<fp16, [2048]> weight_1_to_fp16 = const()[name = string("weight_1_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8664960)))];
65
+ fp16 var_145_to_fp16 = const()[name = string("op_145_to_fp16"), val = fp16(0x1.5p-17)];
66
+ tensor<fp16, [2, 5, 2048]> hidden_states_3_cast_fp16 = layer_norm(axes = hidden_states_3_axes_0, epsilon = var_145_to_fp16, gamma = weight_1_to_fp16, x = hidden_states_1_cast_fp16)[name = string("hidden_states_3_cast_fp16")];
67
+ tensor<int32, [2]> var_164_split_sizes_0 = const()[name = string("op_164_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
68
+ int32 var_164_axis_0 = const()[name = string("op_164_axis_0"), val = int32(-1)];
69
+ tensor<fp16, [2, 5, 1024]> var_164_cast_fp16_0, tensor<fp16, [2, 5, 1024]> var_164_cast_fp16_1 = split(axis = var_164_axis_0, split_sizes = var_164_split_sizes_0, x = hidden_states_3_cast_fp16)[name = string("op_164_cast_fp16")];
70
+ tensor<fp16, [1024, 1024]> layer_decoder_layers_0_self_attn_layer_q_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_0_self_attn_layer_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8669120)))];
71
+ tensor<fp16, [1024]> linear_6_bias_0_to_fp16 = const()[name = string("linear_6_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10766336)))];
72
+ tensor<fp16, [2, 5, 1024]> linear_6_cast_fp16 = linear(bias = linear_6_bias_0_to_fp16, weight = layer_decoder_layers_0_self_attn_layer_q_proj_weight_to_fp16, x = var_164_cast_fp16_0)[name = string("linear_6_cast_fp16")];
73
+ tensor<fp16, [128, 1024]> layer_decoder_layers_0_self_attn_layer_k_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_0_self_attn_layer_k_proj_weight_to_fp16"), val = tensor<fp16, [128, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10768448)))];
74
+ tensor<fp16, [128]> linear_7_bias_0_to_fp16 = const()[name = string("linear_7_bias_0_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11030656)))];
75
+ tensor<fp16, [2, 5, 128]> linear_7_cast_fp16 = linear(bias = linear_7_bias_0_to_fp16, weight = layer_decoder_layers_0_self_attn_layer_k_proj_weight_to_fp16, x = var_164_cast_fp16_0)[name = string("linear_7_cast_fp16")];
76
+ tensor<fp16, [128, 1024]> layer_decoder_layers_0_self_attn_layer_v_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_0_self_attn_layer_v_proj_weight_to_fp16"), val = tensor<fp16, [128, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11030976)))];
77
+ tensor<fp16, [2, 5, 128]> linear_8_cast_fp16 = linear(bias = linear_7_bias_0_to_fp16, weight = layer_decoder_layers_0_self_attn_layer_v_proj_weight_to_fp16, x = var_164_cast_fp16_0)[name = string("linear_8_cast_fp16")];
78
+ tensor<int32, [4]> var_183 = const()[name = string("op_183"), val = tensor<int32, [4]>([2, 5, 16, 64])];
79
+ tensor<fp16, [2, 5, 16, 64]> var_184_cast_fp16 = reshape(shape = var_183, x = linear_6_cast_fp16)[name = string("op_184_cast_fp16")];
80
+ tensor<int32, [4]> q_1_perm_0 = const()[name = string("q_1_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
81
+ tensor<int32, [4]> var_186 = const()[name = string("op_186"), val = tensor<int32, [4]>([2, 5, 2, 64])];
82
+ tensor<fp16, [2, 5, 2, 64]> var_187_cast_fp16 = reshape(shape = var_186, x = linear_7_cast_fp16)[name = string("op_187_cast_fp16")];
83
+ tensor<int32, [4]> k_1_perm_0 = const()[name = string("k_1_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
84
+ tensor<int32, [4]> var_189 = const()[name = string("op_189"), val = tensor<int32, [4]>([2, 5, 2, 64])];
85
+ tensor<fp16, [2, 5, 2, 64]> var_190_cast_fp16 = reshape(shape = var_189, x = linear_8_cast_fp16)[name = string("op_190_cast_fp16")];
86
+ tensor<int32, [4]> value_states_3_perm_0 = const()[name = string("value_states_3_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
87
+ tensor<fp16, [5, 64]> cos_to_fp16 = const()[name = string("cos_to_fp16"), val = tensor<fp16, [5, 64]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11293184)))];
88
+ tensor<fp16, [2, 16, 5, 64]> q_1_cast_fp16 = transpose(perm = q_1_perm_0, x = var_184_cast_fp16)[name = string("transpose_16")];
89
+ tensor<fp16, [2, 16, 5, 64]> var_194_cast_fp16 = mul(x = q_1_cast_fp16, y = cos_to_fp16)[name = string("op_194_cast_fp16")];
90
+ tensor<int32, [2]> var_195_split_sizes_0 = const()[name = string("op_195_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
91
+ int32 var_195_axis_0 = const()[name = string("op_195_axis_0"), val = int32(-1)];
92
+ tensor<fp16, [2, 16, 5, 32]> var_195_cast_fp16_0, tensor<fp16, [2, 16, 5, 32]> var_195_cast_fp16_1 = split(axis = var_195_axis_0, split_sizes = var_195_split_sizes_0, x = q_1_cast_fp16)[name = string("op_195_cast_fp16")];
93
+ fp16 const_9_promoted_to_fp16 = const()[name = string("const_9_promoted_to_fp16"), val = fp16(-0x1p+0)];
94
+ tensor<fp16, [2, 16, 5, 32]> var_197_cast_fp16 = mul(x = var_195_cast_fp16_1, y = const_9_promoted_to_fp16)[name = string("op_197_cast_fp16")];
95
+ bool var_199_interleave_0 = const()[name = string("op_199_interleave_0"), val = bool(false)];
96
+ tensor<fp16, [2, 16, 5, 64]> var_199_cast_fp16 = concat(axis = var_142, interleave = var_199_interleave_0, values = (var_197_cast_fp16, var_195_cast_fp16_0))[name = string("op_199_cast_fp16")];
97
+ tensor<fp16, [5, 64]> sin_to_fp16 = const()[name = string("sin_to_fp16"), val = tensor<fp16, [5, 64]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11293888)))];
98
+ tensor<fp16, [2, 16, 5, 64]> var_200_cast_fp16 = mul(x = var_199_cast_fp16, y = sin_to_fp16)[name = string("op_200_cast_fp16")];
99
+ tensor<fp16, [2, 16, 5, 64]> q_embed_1_cast_fp16 = add(x = var_194_cast_fp16, y = var_200_cast_fp16)[name = string("q_embed_1_cast_fp16")];
100
+ tensor<fp16, [2, 2, 5, 64]> k_1_cast_fp16 = transpose(perm = k_1_perm_0, x = var_187_cast_fp16)[name = string("transpose_15")];
101
+ tensor<fp16, [2, 2, 5, 64]> var_202_cast_fp16 = mul(x = k_1_cast_fp16, y = cos_to_fp16)[name = string("op_202_cast_fp16")];
102
+ tensor<int32, [2]> var_203_split_sizes_0 = const()[name = string("op_203_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
103
+ int32 var_203_axis_0 = const()[name = string("op_203_axis_0"), val = int32(-1)];
104
+ tensor<fp16, [2, 2, 5, 32]> var_203_cast_fp16_0, tensor<fp16, [2, 2, 5, 32]> var_203_cast_fp16_1 = split(axis = var_203_axis_0, split_sizes = var_203_split_sizes_0, x = k_1_cast_fp16)[name = string("op_203_cast_fp16")];
105
+ fp16 const_10_promoted_to_fp16 = const()[name = string("const_10_promoted_to_fp16"), val = fp16(-0x1p+0)];
106
+ tensor<fp16, [2, 2, 5, 32]> var_205_cast_fp16 = mul(x = var_203_cast_fp16_1, y = const_10_promoted_to_fp16)[name = string("op_205_cast_fp16")];
107
+ bool var_207_interleave_0 = const()[name = string("op_207_interleave_0"), val = bool(false)];
108
+ tensor<fp16, [2, 2, 5, 64]> var_207_cast_fp16 = concat(axis = var_142, interleave = var_207_interleave_0, values = (var_205_cast_fp16, var_203_cast_fp16_0))[name = string("op_207_cast_fp16")];
109
+ tensor<fp16, [2, 2, 5, 64]> var_208_cast_fp16 = mul(x = var_207_cast_fp16, y = sin_to_fp16)[name = string("op_208_cast_fp16")];
110
+ tensor<fp16, [2, 2, 5, 64]> k_embed_1_cast_fp16 = add(x = var_202_cast_fp16, y = var_208_cast_fp16)[name = string("k_embed_1_cast_fp16")];
111
+ tensor<int32, [2]> var_215_split_sizes_0 = const()[name = string("op_215_split_sizes_0"), val = tensor<int32, [2]>([8, 8])];
112
+ int32 var_215_axis_0 = const()[name = string("op_215_axis_0"), val = int32(1)];
113
+ tensor<fp16, [2, 8, 5, 64]> var_215_cast_fp16_0, tensor<fp16, [2, 8, 5, 64]> var_215_cast_fp16_1 = split(axis = var_215_axis_0, split_sizes = var_215_split_sizes_0, x = q_embed_1_cast_fp16)[name = string("op_215_cast_fp16")];
114
+ tensor<int32, [2]> var_217_split_sizes_0 = const()[name = string("op_217_split_sizes_0"), val = tensor<int32, [2]>([1, 1])];
115
+ int32 var_217_axis_0 = const()[name = string("op_217_axis_0"), val = int32(1)];
116
+ tensor<fp16, [2, 1, 5, 64]> var_217_cast_fp16_0, tensor<fp16, [2, 1, 5, 64]> var_217_cast_fp16_1 = split(axis = var_217_axis_0, split_sizes = var_217_split_sizes_0, x = k_embed_1_cast_fp16)[name = string("op_217_cast_fp16")];
117
+ tensor<int32, [2]> var_219_split_sizes_0 = const()[name = string("op_219_split_sizes_0"), val = tensor<int32, [2]>([1, 1])];
118
+ int32 var_219_axis_0 = const()[name = string("op_219_axis_0"), val = int32(1)];
119
+ tensor<fp16, [2, 2, 5, 64]> value_states_3_cast_fp16 = transpose(perm = value_states_3_perm_0, x = var_190_cast_fp16)[name = string("transpose_14")];
120
+ tensor<fp16, [2, 1, 5, 64]> var_219_cast_fp16_0, tensor<fp16, [2, 1, 5, 64]> var_219_cast_fp16_1 = split(axis = var_219_axis_0, split_sizes = var_219_split_sizes_0, x = value_states_3_cast_fp16)[name = string("op_219_cast_fp16")];
121
+ tensor<int32, [4]> var_217_0_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_217_0_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
122
+ tensor<fp16, [2, 8, 5, 64]> var_217_0_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_217_0_broadcast_to_same_batch_dims_reps_0, x = var_217_cast_fp16_0)[name = string("op_217_0_broadcast_to_same_batch_dims_cast_fp16")];
123
+ tensor<int32, [4]> var_219_0_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_219_0_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
124
+ tensor<fp16, [2, 8, 5, 64]> var_219_0_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_219_0_broadcast_to_same_batch_dims_reps_0, x = var_219_cast_fp16_0)[name = string("op_219_0_broadcast_to_same_batch_dims_cast_fp16")];
125
+ tensor<fp16, [2, 8, 5, 64]> var_221_cast_fp16 = scaled_dot_product_attention(key = var_217_0_broadcast_to_same_batch_dims_cast_fp16, query = var_215_cast_fp16_0, value = var_219_0_broadcast_to_same_batch_dims_cast_fp16)[name = string("op_221_cast_fp16")];
126
+ tensor<int32, [4]> var_217_1_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_217_1_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
127
+ tensor<fp16, [2, 8, 5, 64]> var_217_1_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_217_1_broadcast_to_same_batch_dims_reps_0, x = var_217_cast_fp16_1)[name = string("op_217_1_broadcast_to_same_batch_dims_cast_fp16")];
128
+ tensor<int32, [4]> var_219_1_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_219_1_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
129
+ tensor<fp16, [2, 8, 5, 64]> var_219_1_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_219_1_broadcast_to_same_batch_dims_reps_0, x = var_219_cast_fp16_1)[name = string("op_219_1_broadcast_to_same_batch_dims_cast_fp16")];
130
+ tensor<fp16, [2, 8, 5, 64]> attn_output_1_cast_fp16 = scaled_dot_product_attention(key = var_217_1_broadcast_to_same_batch_dims_cast_fp16, query = var_215_cast_fp16_1, value = var_219_1_broadcast_to_same_batch_dims_cast_fp16)[name = string("attn_output_1_cast_fp16")];
131
+ bool attn_output_3_interleave_0 = const()[name = string("attn_output_3_interleave_0"), val = bool(false)];
132
+ tensor<fp16, [2, 16, 5, 64]> attn_output_3_cast_fp16 = concat(axis = var_137, interleave = attn_output_3_interleave_0, values = (var_221_cast_fp16, attn_output_1_cast_fp16))[name = string("attn_output_3_cast_fp16")];
133
+ tensor<int32, [4]> var_225_perm_0 = const()[name = string("op_225_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
134
+ tensor<int32, [3]> var_227 = const()[name = string("op_227"), val = tensor<int32, [3]>([2, 5, 1024])];
135
+ tensor<fp16, [2, 5, 16, 64]> var_225_cast_fp16 = transpose(perm = var_225_perm_0, x = attn_output_3_cast_fp16)[name = string("transpose_13")];
136
+ tensor<fp16, [2, 5, 1024]> input_17_cast_fp16 = reshape(shape = var_227, x = var_225_cast_fp16)[name = string("input_17_cast_fp16")];
137
+ tensor<fp16, [1024, 1024]> layer_decoder_layers_0_self_attn_layer_o_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_0_self_attn_layer_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11294592)))];
138
+ tensor<fp16, [2, 5, 1024]> linear_9_cast_fp16 = linear(bias = linear_6_bias_0_to_fp16, weight = layer_decoder_layers_0_self_attn_layer_o_proj_weight_to_fp16, x = input_17_cast_fp16)[name = string("linear_9_cast_fp16")];
139
+ tensor<fp16, [2, 5, 1024]> hidden_states_9_cast_fp16 = add(x = x_cast_fp16, y = linear_9_cast_fp16)[name = string("hidden_states_9_cast_fp16")];
140
+ fp16 const_11_promoted_to_fp16 = const()[name = string("const_11_promoted_to_fp16"), val = fp16(-0x1p+0)];
141
+ tensor<fp16, [2, 5, 1024]> var_234_cast_fp16 = mul(x = hidden_states_9_cast_fp16, y = const_11_promoted_to_fp16)[name = string("op_234_cast_fp16")];
142
+ bool hidden_states_11_interleave_0 = const()[name = string("hidden_states_11_interleave_0"), val = bool(false)];
143
+ tensor<fp16, [2, 5, 2048]> hidden_states_11_cast_fp16 = concat(axis = var_142, interleave = hidden_states_11_interleave_0, values = (hidden_states_9_cast_fp16, var_234_cast_fp16))[name = string("hidden_states_11_cast_fp16")];
144
+ tensor<int32, [1]> hidden_states_13_axes_0 = const()[name = string("hidden_states_13_axes_0"), val = tensor<int32, [1]>([-1])];
145
+ tensor<fp16, [2048]> weight_3_to_fp16 = const()[name = string("weight_3_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13391808)))];
146
+ tensor<fp16, [2, 5, 2048]> hidden_states_13_cast_fp16 = layer_norm(axes = hidden_states_13_axes_0, epsilon = var_145_to_fp16, gamma = weight_3_to_fp16, x = hidden_states_11_cast_fp16)[name = string("hidden_states_13_cast_fp16")];
147
+ tensor<int32, [2]> var_242_split_sizes_0 = const()[name = string("op_242_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
148
+ int32 var_242_axis_0 = const()[name = string("op_242_axis_0"), val = int32(-1)];
149
+ tensor<fp16, [2, 5, 1024]> var_242_cast_fp16_0, tensor<fp16, [2, 5, 1024]> var_242_cast_fp16_1 = split(axis = var_242_axis_0, split_sizes = var_242_split_sizes_0, x = hidden_states_13_cast_fp16)[name = string("op_242_cast_fp16")];
150
+ tensor<fp16, [4096, 1024]> layer_decoder_layers_0_mlp_gate_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_0_mlp_gate_proj_weight_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13395968)))];
151
+ tensor<fp16, [4096]> linear_10_bias_0_to_fp16 = const()[name = string("linear_10_bias_0_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21784640)))];
152
+ tensor<fp16, [2, 5, 4096]> linear_10_cast_fp16 = linear(bias = linear_10_bias_0_to_fp16, weight = layer_decoder_layers_0_mlp_gate_proj_weight_to_fp16, x = var_242_cast_fp16_0)[name = string("linear_10_cast_fp16")];
153
+ tensor<fp16, [2, 5, 4096]> var_250_cast_fp16 = silu(x = linear_10_cast_fp16)[name = string("op_250_cast_fp16")];
154
+ tensor<fp16, [4096, 1024]> layer_decoder_layers_0_mlp_up_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_0_mlp_up_proj_weight_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21792896)))];
155
+ tensor<fp16, [2, 5, 4096]> linear_11_cast_fp16 = linear(bias = linear_10_bias_0_to_fp16, weight = layer_decoder_layers_0_mlp_up_proj_weight_to_fp16, x = var_242_cast_fp16_0)[name = string("linear_11_cast_fp16")];
156
+ tensor<fp16, [2, 5, 4096]> input_23_cast_fp16 = mul(x = var_250_cast_fp16, y = linear_11_cast_fp16)[name = string("input_23_cast_fp16")];
157
+ tensor<fp16, [1024, 4096]> layer_decoder_layers_0_mlp_down_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_0_mlp_down_proj_weight_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30181568)))];
158
+ tensor<fp16, [2, 5, 1024]> linear_12_cast_fp16 = linear(bias = linear_6_bias_0_to_fp16, weight = layer_decoder_layers_0_mlp_down_proj_weight_to_fp16, x = input_23_cast_fp16)[name = string("linear_12_cast_fp16")];
159
+ tensor<fp16, [2, 5, 1024]> hidden_states_17_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_12_cast_fp16)[name = string("hidden_states_17_cast_fp16")];
160
+ int32 var_257 = const()[name = string("op_257"), val = int32(1)];
161
+ int32 var_262 = const()[name = string("op_262"), val = int32(-1)];
162
+ fp16 const_13_promoted_to_fp16 = const()[name = string("const_13_promoted_to_fp16"), val = fp16(-0x1p+0)];
163
+ tensor<fp16, [2, 5, 1024]> var_276_cast_fp16 = mul(x = hidden_states_17_cast_fp16, y = const_13_promoted_to_fp16)[name = string("op_276_cast_fp16")];
164
+ bool hidden_states_19_interleave_0 = const()[name = string("hidden_states_19_interleave_0"), val = bool(false)];
165
+ tensor<fp16, [2, 5, 2048]> hidden_states_19_cast_fp16 = concat(axis = var_262, interleave = hidden_states_19_interleave_0, values = (hidden_states_17_cast_fp16, var_276_cast_fp16))[name = string("hidden_states_19_cast_fp16")];
166
+ tensor<int32, [1]> hidden_states_21_axes_0 = const()[name = string("hidden_states_21_axes_0"), val = tensor<int32, [1]>([-1])];
167
+ tensor<fp16, [2048]> weight_5_to_fp16 = const()[name = string("weight_5_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38570240)))];
168
+ fp16 var_265_to_fp16 = const()[name = string("op_265_to_fp16"), val = fp16(0x1.5p-17)];
169
+ tensor<fp16, [2, 5, 2048]> hidden_states_21_cast_fp16 = layer_norm(axes = hidden_states_21_axes_0, epsilon = var_265_to_fp16, gamma = weight_5_to_fp16, x = hidden_states_19_cast_fp16)[name = string("hidden_states_21_cast_fp16")];
170
+ tensor<int32, [2]> var_284_split_sizes_0 = const()[name = string("op_284_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
171
+ int32 var_284_axis_0 = const()[name = string("op_284_axis_0"), val = int32(-1)];
172
+ tensor<fp16, [2, 5, 1024]> var_284_cast_fp16_0, tensor<fp16, [2, 5, 1024]> var_284_cast_fp16_1 = split(axis = var_284_axis_0, split_sizes = var_284_split_sizes_0, x = hidden_states_21_cast_fp16)[name = string("op_284_cast_fp16")];
173
+ tensor<fp16, [1024, 1024]> layer_decoder_layers_1_self_attn_layer_q_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_1_self_attn_layer_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38574400)))];
174
+ tensor<fp16, [2, 5, 1024]> linear_13_cast_fp16 = linear(bias = linear_6_bias_0_to_fp16, weight = layer_decoder_layers_1_self_attn_layer_q_proj_weight_to_fp16, x = var_284_cast_fp16_0)[name = string("linear_13_cast_fp16")];
175
+ tensor<fp16, [128, 1024]> layer_decoder_layers_1_self_attn_layer_k_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_1_self_attn_layer_k_proj_weight_to_fp16"), val = tensor<fp16, [128, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40671616)))];
176
+ tensor<fp16, [2, 5, 128]> linear_14_cast_fp16 = linear(bias = linear_7_bias_0_to_fp16, weight = layer_decoder_layers_1_self_attn_layer_k_proj_weight_to_fp16, x = var_284_cast_fp16_0)[name = string("linear_14_cast_fp16")];
177
+ tensor<fp16, [128, 1024]> layer_decoder_layers_1_self_attn_layer_v_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_1_self_attn_layer_v_proj_weight_to_fp16"), val = tensor<fp16, [128, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40933824)))];
178
+ tensor<fp16, [2, 5, 128]> linear_15_cast_fp16 = linear(bias = linear_7_bias_0_to_fp16, weight = layer_decoder_layers_1_self_attn_layer_v_proj_weight_to_fp16, x = var_284_cast_fp16_0)[name = string("linear_15_cast_fp16")];
179
+ tensor<int32, [4]> var_303 = const()[name = string("op_303"), val = tensor<int32, [4]>([2, 5, 16, 64])];
180
+ tensor<fp16, [2, 5, 16, 64]> var_304_cast_fp16 = reshape(shape = var_303, x = linear_13_cast_fp16)[name = string("op_304_cast_fp16")];
181
+ tensor<int32, [4]> q_9_perm_0 = const()[name = string("q_9_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
182
+ tensor<int32, [4]> var_306 = const()[name = string("op_306"), val = tensor<int32, [4]>([2, 5, 2, 64])];
183
+ tensor<fp16, [2, 5, 2, 64]> var_307_cast_fp16 = reshape(shape = var_306, x = linear_14_cast_fp16)[name = string("op_307_cast_fp16")];
184
+ tensor<int32, [4]> k_9_perm_0 = const()[name = string("k_9_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
185
+ tensor<int32, [4]> var_309 = const()[name = string("op_309"), val = tensor<int32, [4]>([2, 5, 2, 64])];
186
+ tensor<fp16, [2, 5, 2, 64]> var_310_cast_fp16 = reshape(shape = var_309, x = linear_15_cast_fp16)[name = string("op_310_cast_fp16")];
187
+ tensor<int32, [4]> value_states_9_perm_0 = const()[name = string("value_states_9_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
188
+ tensor<fp16, [2, 16, 5, 64]> q_9_cast_fp16 = transpose(perm = q_9_perm_0, x = var_304_cast_fp16)[name = string("transpose_12")];
189
+ tensor<fp16, [2, 16, 5, 64]> var_314_cast_fp16 = mul(x = q_9_cast_fp16, y = cos_to_fp16)[name = string("op_314_cast_fp16")];
190
+ tensor<int32, [2]> var_315_split_sizes_0 = const()[name = string("op_315_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
191
+ int32 var_315_axis_0 = const()[name = string("op_315_axis_0"), val = int32(-1)];
192
+ tensor<fp16, [2, 16, 5, 32]> var_315_cast_fp16_0, tensor<fp16, [2, 16, 5, 32]> var_315_cast_fp16_1 = split(axis = var_315_axis_0, split_sizes = var_315_split_sizes_0, x = q_9_cast_fp16)[name = string("op_315_cast_fp16")];
193
+ fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)];
194
+ tensor<fp16, [2, 16, 5, 32]> var_317_cast_fp16 = mul(x = var_315_cast_fp16_1, y = const_17_promoted_to_fp16)[name = string("op_317_cast_fp16")];
195
+ bool var_319_interleave_0 = const()[name = string("op_319_interleave_0"), val = bool(false)];
196
+ tensor<fp16, [2, 16, 5, 64]> var_319_cast_fp16 = concat(axis = var_262, interleave = var_319_interleave_0, values = (var_317_cast_fp16, var_315_cast_fp16_0))[name = string("op_319_cast_fp16")];
197
+ tensor<fp16, [2, 16, 5, 64]> var_320_cast_fp16 = mul(x = var_319_cast_fp16, y = sin_to_fp16)[name = string("op_320_cast_fp16")];
198
+ tensor<fp16, [2, 16, 5, 64]> q_embed_3_cast_fp16 = add(x = var_314_cast_fp16, y = var_320_cast_fp16)[name = string("q_embed_3_cast_fp16")];
199
+ tensor<fp16, [2, 2, 5, 64]> k_9_cast_fp16 = transpose(perm = k_9_perm_0, x = var_307_cast_fp16)[name = string("transpose_11")];
200
+ tensor<fp16, [2, 2, 5, 64]> var_322_cast_fp16 = mul(x = k_9_cast_fp16, y = cos_to_fp16)[name = string("op_322_cast_fp16")];
201
+ tensor<int32, [2]> var_323_split_sizes_0 = const()[name = string("op_323_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
202
+ int32 var_323_axis_0 = const()[name = string("op_323_axis_0"), val = int32(-1)];
203
+ tensor<fp16, [2, 2, 5, 32]> var_323_cast_fp16_0, tensor<fp16, [2, 2, 5, 32]> var_323_cast_fp16_1 = split(axis = var_323_axis_0, split_sizes = var_323_split_sizes_0, x = k_9_cast_fp16)[name = string("op_323_cast_fp16")];
204
+ fp16 const_18_promoted_to_fp16 = const()[name = string("const_18_promoted_to_fp16"), val = fp16(-0x1p+0)];
205
+ tensor<fp16, [2, 2, 5, 32]> var_325_cast_fp16 = mul(x = var_323_cast_fp16_1, y = const_18_promoted_to_fp16)[name = string("op_325_cast_fp16")];
206
+ bool var_327_interleave_0 = const()[name = string("op_327_interleave_0"), val = bool(false)];
207
+ tensor<fp16, [2, 2, 5, 64]> var_327_cast_fp16 = concat(axis = var_262, interleave = var_327_interleave_0, values = (var_325_cast_fp16, var_323_cast_fp16_0))[name = string("op_327_cast_fp16")];
208
+ tensor<fp16, [2, 2, 5, 64]> var_328_cast_fp16 = mul(x = var_327_cast_fp16, y = sin_to_fp16)[name = string("op_328_cast_fp16")];
209
+ tensor<fp16, [2, 2, 5, 64]> k_embed_3_cast_fp16 = add(x = var_322_cast_fp16, y = var_328_cast_fp16)[name = string("k_embed_3_cast_fp16")];
210
+ tensor<int32, [2]> var_335_split_sizes_0 = const()[name = string("op_335_split_sizes_0"), val = tensor<int32, [2]>([8, 8])];
211
+ int32 var_335_axis_0 = const()[name = string("op_335_axis_0"), val = int32(1)];
212
+ tensor<fp16, [2, 8, 5, 64]> var_335_cast_fp16_0, tensor<fp16, [2, 8, 5, 64]> var_335_cast_fp16_1 = split(axis = var_335_axis_0, split_sizes = var_335_split_sizes_0, x = q_embed_3_cast_fp16)[name = string("op_335_cast_fp16")];
213
+ tensor<int32, [2]> var_337_split_sizes_0 = const()[name = string("op_337_split_sizes_0"), val = tensor<int32, [2]>([1, 1])];
214
+ int32 var_337_axis_0 = const()[name = string("op_337_axis_0"), val = int32(1)];
215
+ tensor<fp16, [2, 1, 5, 64]> var_337_cast_fp16_0, tensor<fp16, [2, 1, 5, 64]> var_337_cast_fp16_1 = split(axis = var_337_axis_0, split_sizes = var_337_split_sizes_0, x = k_embed_3_cast_fp16)[name = string("op_337_cast_fp16")];
216
+ tensor<int32, [2]> var_339_split_sizes_0 = const()[name = string("op_339_split_sizes_0"), val = tensor<int32, [2]>([1, 1])];
217
+ int32 var_339_axis_0 = const()[name = string("op_339_axis_0"), val = int32(1)];
218
+ tensor<fp16, [2, 2, 5, 64]> value_states_9_cast_fp16 = transpose(perm = value_states_9_perm_0, x = var_310_cast_fp16)[name = string("transpose_10")];
219
+ tensor<fp16, [2, 1, 5, 64]> var_339_cast_fp16_0, tensor<fp16, [2, 1, 5, 64]> var_339_cast_fp16_1 = split(axis = var_339_axis_0, split_sizes = var_339_split_sizes_0, x = value_states_9_cast_fp16)[name = string("op_339_cast_fp16")];
220
+ tensor<int32, [4]> var_337_0_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_337_0_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
221
+ tensor<fp16, [2, 8, 5, 64]> var_337_0_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_337_0_broadcast_to_same_batch_dims_reps_0, x = var_337_cast_fp16_0)[name = string("op_337_0_broadcast_to_same_batch_dims_cast_fp16")];
222
+ tensor<int32, [4]> var_339_0_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_339_0_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
223
+ tensor<fp16, [2, 8, 5, 64]> var_339_0_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_339_0_broadcast_to_same_batch_dims_reps_0, x = var_339_cast_fp16_0)[name = string("op_339_0_broadcast_to_same_batch_dims_cast_fp16")];
224
+ tensor<fp16, [2, 8, 5, 64]> var_341_cast_fp16 = scaled_dot_product_attention(key = var_337_0_broadcast_to_same_batch_dims_cast_fp16, query = var_335_cast_fp16_0, value = var_339_0_broadcast_to_same_batch_dims_cast_fp16)[name = string("op_341_cast_fp16")];
225
+ tensor<int32, [4]> var_337_1_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_337_1_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
226
+ tensor<fp16, [2, 8, 5, 64]> var_337_1_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_337_1_broadcast_to_same_batch_dims_reps_0, x = var_337_cast_fp16_1)[name = string("op_337_1_broadcast_to_same_batch_dims_cast_fp16")];
227
+ tensor<int32, [4]> var_339_1_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_339_1_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
228
+ tensor<fp16, [2, 8, 5, 64]> var_339_1_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_339_1_broadcast_to_same_batch_dims_reps_0, x = var_339_cast_fp16_1)[name = string("op_339_1_broadcast_to_same_batch_dims_cast_fp16")];
229
+ tensor<fp16, [2, 8, 5, 64]> attn_output_7_cast_fp16 = scaled_dot_product_attention(key = var_337_1_broadcast_to_same_batch_dims_cast_fp16, query = var_335_cast_fp16_1, value = var_339_1_broadcast_to_same_batch_dims_cast_fp16)[name = string("attn_output_7_cast_fp16")];
230
+ bool attn_output_9_interleave_0 = const()[name = string("attn_output_9_interleave_0"), val = bool(false)];
231
+ tensor<fp16, [2, 16, 5, 64]> attn_output_9_cast_fp16 = concat(axis = var_257, interleave = attn_output_9_interleave_0, values = (var_341_cast_fp16, attn_output_7_cast_fp16))[name = string("attn_output_9_cast_fp16")];
232
+ tensor<int32, [4]> var_345_perm_0 = const()[name = string("op_345_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
233
+ tensor<int32, [3]> var_347 = const()[name = string("op_347"), val = tensor<int32, [3]>([2, 5, 1024])];
234
+ tensor<fp16, [2, 5, 16, 64]> var_345_cast_fp16 = transpose(perm = var_345_perm_0, x = attn_output_9_cast_fp16)[name = string("transpose_9")];
235
+ tensor<fp16, [2, 5, 1024]> input_25_cast_fp16 = reshape(shape = var_347, x = var_345_cast_fp16)[name = string("input_25_cast_fp16")];
236
+ tensor<fp16, [1024, 1024]> layer_decoder_layers_1_self_attn_layer_o_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_1_self_attn_layer_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41196032)))];
237
+ tensor<fp16, [2, 5, 1024]> linear_16_cast_fp16 = linear(bias = linear_6_bias_0_to_fp16, weight = layer_decoder_layers_1_self_attn_layer_o_proj_weight_to_fp16, x = input_25_cast_fp16)[name = string("linear_16_cast_fp16")];
238
+ tensor<fp16, [2, 5, 1024]> hidden_states_27_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_16_cast_fp16)[name = string("hidden_states_27_cast_fp16")];
239
+ fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)];
240
+ tensor<fp16, [2, 5, 1024]> var_354_cast_fp16 = mul(x = hidden_states_27_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_354_cast_fp16")];
241
+ bool hidden_states_29_interleave_0 = const()[name = string("hidden_states_29_interleave_0"), val = bool(false)];
242
+ tensor<fp16, [2, 5, 2048]> hidden_states_29_cast_fp16 = concat(axis = var_262, interleave = hidden_states_29_interleave_0, values = (hidden_states_27_cast_fp16, var_354_cast_fp16))[name = string("hidden_states_29_cast_fp16")];
243
+ tensor<int32, [1]> hidden_states_31_axes_0 = const()[name = string("hidden_states_31_axes_0"), val = tensor<int32, [1]>([-1])];
244
+ tensor<fp16, [2048]> weight_7_to_fp16 = const()[name = string("weight_7_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43293248)))];
245
+ tensor<fp16, [2, 5, 2048]> hidden_states_31_cast_fp16 = layer_norm(axes = hidden_states_31_axes_0, epsilon = var_265_to_fp16, gamma = weight_7_to_fp16, x = hidden_states_29_cast_fp16)[name = string("hidden_states_31_cast_fp16")];
246
+ tensor<int32, [2]> var_362_split_sizes_0 = const()[name = string("op_362_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
247
+ int32 var_362_axis_0 = const()[name = string("op_362_axis_0"), val = int32(-1)];
248
+ tensor<fp16, [2, 5, 1024]> var_362_cast_fp16_0, tensor<fp16, [2, 5, 1024]> var_362_cast_fp16_1 = split(axis = var_362_axis_0, split_sizes = var_362_split_sizes_0, x = hidden_states_31_cast_fp16)[name = string("op_362_cast_fp16")];
249
+ tensor<fp16, [4096, 1024]> layer_decoder_layers_1_mlp_gate_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_1_mlp_gate_proj_weight_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43297408)))];
250
+ tensor<fp16, [2, 5, 4096]> linear_17_cast_fp16 = linear(bias = linear_10_bias_0_to_fp16, weight = layer_decoder_layers_1_mlp_gate_proj_weight_to_fp16, x = var_362_cast_fp16_0)[name = string("linear_17_cast_fp16")];
251
+ tensor<fp16, [2, 5, 4096]> var_370_cast_fp16 = silu(x = linear_17_cast_fp16)[name = string("op_370_cast_fp16")];
252
+ tensor<fp16, [4096, 1024]> layer_decoder_layers_1_mlp_up_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_1_mlp_up_proj_weight_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51686080)))];
253
+ tensor<fp16, [2, 5, 4096]> linear_18_cast_fp16 = linear(bias = linear_10_bias_0_to_fp16, weight = layer_decoder_layers_1_mlp_up_proj_weight_to_fp16, x = var_362_cast_fp16_0)[name = string("linear_18_cast_fp16")];
254
+ tensor<fp16, [2, 5, 4096]> input_31_cast_fp16 = mul(x = var_370_cast_fp16, y = linear_18_cast_fp16)[name = string("input_31_cast_fp16")];
255
+ tensor<fp16, [1024, 4096]> layer_decoder_layers_1_mlp_down_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_1_mlp_down_proj_weight_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60074752)))];
256
+ tensor<fp16, [2, 5, 1024]> linear_19_cast_fp16 = linear(bias = linear_6_bias_0_to_fp16, weight = layer_decoder_layers_1_mlp_down_proj_weight_to_fp16, x = input_31_cast_fp16)[name = string("linear_19_cast_fp16")];
257
+ tensor<fp16, [2, 5, 1024]> hidden_states_35_cast_fp16 = add(x = hidden_states_27_cast_fp16, y = linear_19_cast_fp16)[name = string("hidden_states_35_cast_fp16")];
258
+ int32 var_377 = const()[name = string("op_377"), val = int32(1)];
259
+ int32 var_382 = const()[name = string("op_382"), val = int32(-1)];
260
+ fp16 const_21_promoted_to_fp16 = const()[name = string("const_21_promoted_to_fp16"), val = fp16(-0x1p+0)];
261
+ tensor<fp16, [2, 5, 1024]> var_396_cast_fp16 = mul(x = hidden_states_35_cast_fp16, y = const_21_promoted_to_fp16)[name = string("op_396_cast_fp16")];
262
+ bool hidden_states_37_interleave_0 = const()[name = string("hidden_states_37_interleave_0"), val = bool(false)];
263
+ tensor<fp16, [2, 5, 2048]> hidden_states_37_cast_fp16 = concat(axis = var_382, interleave = hidden_states_37_interleave_0, values = (hidden_states_35_cast_fp16, var_396_cast_fp16))[name = string("hidden_states_37_cast_fp16")];
264
+ tensor<int32, [1]> hidden_states_39_axes_0 = const()[name = string("hidden_states_39_axes_0"), val = tensor<int32, [1]>([-1])];
265
+ tensor<fp16, [2048]> weight_9_to_fp16 = const()[name = string("weight_9_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68463424)))];
266
+ fp16 var_385_to_fp16 = const()[name = string("op_385_to_fp16"), val = fp16(0x1.5p-17)];
267
+ tensor<fp16, [2, 5, 2048]> hidden_states_39_cast_fp16 = layer_norm(axes = hidden_states_39_axes_0, epsilon = var_385_to_fp16, gamma = weight_9_to_fp16, x = hidden_states_37_cast_fp16)[name = string("hidden_states_39_cast_fp16")];
268
+ tensor<int32, [2]> var_404_split_sizes_0 = const()[name = string("op_404_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
269
+ int32 var_404_axis_0 = const()[name = string("op_404_axis_0"), val = int32(-1)];
270
+ tensor<fp16, [2, 5, 1024]> var_404_cast_fp16_0, tensor<fp16, [2, 5, 1024]> var_404_cast_fp16_1 = split(axis = var_404_axis_0, split_sizes = var_404_split_sizes_0, x = hidden_states_39_cast_fp16)[name = string("op_404_cast_fp16")];
271
+ tensor<fp16, [1024, 1024]> layer_decoder_layers_2_self_attn_layer_q_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_2_self_attn_layer_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68467584)))];
272
+ tensor<fp16, [2, 5, 1024]> linear_20_cast_fp16 = linear(bias = linear_6_bias_0_to_fp16, weight = layer_decoder_layers_2_self_attn_layer_q_proj_weight_to_fp16, x = var_404_cast_fp16_0)[name = string("linear_20_cast_fp16")];
273
+ tensor<fp16, [128, 1024]> layer_decoder_layers_2_self_attn_layer_k_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_2_self_attn_layer_k_proj_weight_to_fp16"), val = tensor<fp16, [128, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70564800)))];
274
+ tensor<fp16, [2, 5, 128]> linear_21_cast_fp16 = linear(bias = linear_7_bias_0_to_fp16, weight = layer_decoder_layers_2_self_attn_layer_k_proj_weight_to_fp16, x = var_404_cast_fp16_0)[name = string("linear_21_cast_fp16")];
275
+ tensor<fp16, [128, 1024]> layer_decoder_layers_2_self_attn_layer_v_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_2_self_attn_layer_v_proj_weight_to_fp16"), val = tensor<fp16, [128, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70827008)))];
276
+ tensor<fp16, [2, 5, 128]> linear_22_cast_fp16 = linear(bias = linear_7_bias_0_to_fp16, weight = layer_decoder_layers_2_self_attn_layer_v_proj_weight_to_fp16, x = var_404_cast_fp16_0)[name = string("linear_22_cast_fp16")];
277
+ tensor<int32, [4]> var_423 = const()[name = string("op_423"), val = tensor<int32, [4]>([2, 5, 16, 64])];
278
+ tensor<fp16, [2, 5, 16, 64]> var_424_cast_fp16 = reshape(shape = var_423, x = linear_20_cast_fp16)[name = string("op_424_cast_fp16")];
279
+ tensor<int32, [4]> q_17_perm_0 = const()[name = string("q_17_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
280
+ tensor<int32, [4]> var_426 = const()[name = string("op_426"), val = tensor<int32, [4]>([2, 5, 2, 64])];
281
+ tensor<fp16, [2, 5, 2, 64]> var_427_cast_fp16 = reshape(shape = var_426, x = linear_21_cast_fp16)[name = string("op_427_cast_fp16")];
282
+ tensor<int32, [4]> k_17_perm_0 = const()[name = string("k_17_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
283
+ tensor<int32, [4]> var_429 = const()[name = string("op_429"), val = tensor<int32, [4]>([2, 5, 2, 64])];
284
+ tensor<fp16, [2, 5, 2, 64]> var_430_cast_fp16 = reshape(shape = var_429, x = linear_22_cast_fp16)[name = string("op_430_cast_fp16")];
285
+ tensor<int32, [4]> value_states_15_perm_0 = const()[name = string("value_states_15_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
286
+ tensor<fp16, [2, 16, 5, 64]> q_17_cast_fp16 = transpose(perm = q_17_perm_0, x = var_424_cast_fp16)[name = string("transpose_8")];
287
+ tensor<fp16, [2, 16, 5, 64]> var_434_cast_fp16 = mul(x = q_17_cast_fp16, y = cos_to_fp16)[name = string("op_434_cast_fp16")];
288
+ tensor<int32, [2]> var_435_split_sizes_0 = const()[name = string("op_435_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
289
+ int32 var_435_axis_0 = const()[name = string("op_435_axis_0"), val = int32(-1)];
290
+ tensor<fp16, [2, 16, 5, 32]> var_435_cast_fp16_0, tensor<fp16, [2, 16, 5, 32]> var_435_cast_fp16_1 = split(axis = var_435_axis_0, split_sizes = var_435_split_sizes_0, x = q_17_cast_fp16)[name = string("op_435_cast_fp16")];
291
+ fp16 const_25_promoted_to_fp16 = const()[name = string("const_25_promoted_to_fp16"), val = fp16(-0x1p+0)];
292
+ tensor<fp16, [2, 16, 5, 32]> var_437_cast_fp16 = mul(x = var_435_cast_fp16_1, y = const_25_promoted_to_fp16)[name = string("op_437_cast_fp16")];
293
+ bool var_439_interleave_0 = const()[name = string("op_439_interleave_0"), val = bool(false)];
294
+ tensor<fp16, [2, 16, 5, 64]> var_439_cast_fp16 = concat(axis = var_382, interleave = var_439_interleave_0, values = (var_437_cast_fp16, var_435_cast_fp16_0))[name = string("op_439_cast_fp16")];
295
+ tensor<fp16, [2, 16, 5, 64]> var_440_cast_fp16 = mul(x = var_439_cast_fp16, y = sin_to_fp16)[name = string("op_440_cast_fp16")];
296
+ tensor<fp16, [2, 16, 5, 64]> q_embed_5_cast_fp16 = add(x = var_434_cast_fp16, y = var_440_cast_fp16)[name = string("q_embed_5_cast_fp16")];
297
+ tensor<fp16, [2, 2, 5, 64]> k_17_cast_fp16 = transpose(perm = k_17_perm_0, x = var_427_cast_fp16)[name = string("transpose_7")];
298
+ tensor<fp16, [2, 2, 5, 64]> var_442_cast_fp16 = mul(x = k_17_cast_fp16, y = cos_to_fp16)[name = string("op_442_cast_fp16")];
299
+ tensor<int32, [2]> var_443_split_sizes_0 = const()[name = string("op_443_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
300
+ int32 var_443_axis_0 = const()[name = string("op_443_axis_0"), val = int32(-1)];
301
+ tensor<fp16, [2, 2, 5, 32]> var_443_cast_fp16_0, tensor<fp16, [2, 2, 5, 32]> var_443_cast_fp16_1 = split(axis = var_443_axis_0, split_sizes = var_443_split_sizes_0, x = k_17_cast_fp16)[name = string("op_443_cast_fp16")];
302
+ fp16 const_26_promoted_to_fp16 = const()[name = string("const_26_promoted_to_fp16"), val = fp16(-0x1p+0)];
303
+ tensor<fp16, [2, 2, 5, 32]> var_445_cast_fp16 = mul(x = var_443_cast_fp16_1, y = const_26_promoted_to_fp16)[name = string("op_445_cast_fp16")];
304
+ bool var_447_interleave_0 = const()[name = string("op_447_interleave_0"), val = bool(false)];
305
+ tensor<fp16, [2, 2, 5, 64]> var_447_cast_fp16 = concat(axis = var_382, interleave = var_447_interleave_0, values = (var_445_cast_fp16, var_443_cast_fp16_0))[name = string("op_447_cast_fp16")];
306
+ tensor<fp16, [2, 2, 5, 64]> var_448_cast_fp16 = mul(x = var_447_cast_fp16, y = sin_to_fp16)[name = string("op_448_cast_fp16")];
307
+ tensor<fp16, [2, 2, 5, 64]> k_embed_5_cast_fp16 = add(x = var_442_cast_fp16, y = var_448_cast_fp16)[name = string("k_embed_5_cast_fp16")];
308
+ tensor<int32, [2]> var_455_split_sizes_0 = const()[name = string("op_455_split_sizes_0"), val = tensor<int32, [2]>([8, 8])];
309
+ int32 var_455_axis_0 = const()[name = string("op_455_axis_0"), val = int32(1)];
310
+ tensor<fp16, [2, 8, 5, 64]> var_455_cast_fp16_0, tensor<fp16, [2, 8, 5, 64]> var_455_cast_fp16_1 = split(axis = var_455_axis_0, split_sizes = var_455_split_sizes_0, x = q_embed_5_cast_fp16)[name = string("op_455_cast_fp16")];
311
+ tensor<int32, [2]> var_457_split_sizes_0 = const()[name = string("op_457_split_sizes_0"), val = tensor<int32, [2]>([1, 1])];
312
+ int32 var_457_axis_0 = const()[name = string("op_457_axis_0"), val = int32(1)];
313
+ tensor<fp16, [2, 1, 5, 64]> var_457_cast_fp16_0, tensor<fp16, [2, 1, 5, 64]> var_457_cast_fp16_1 = split(axis = var_457_axis_0, split_sizes = var_457_split_sizes_0, x = k_embed_5_cast_fp16)[name = string("op_457_cast_fp16")];
314
+ tensor<int32, [2]> var_459_split_sizes_0 = const()[name = string("op_459_split_sizes_0"), val = tensor<int32, [2]>([1, 1])];
315
+ int32 var_459_axis_0 = const()[name = string("op_459_axis_0"), val = int32(1)];
316
+ tensor<fp16, [2, 2, 5, 64]> value_states_15_cast_fp16 = transpose(perm = value_states_15_perm_0, x = var_430_cast_fp16)[name = string("transpose_6")];
317
+ tensor<fp16, [2, 1, 5, 64]> var_459_cast_fp16_0, tensor<fp16, [2, 1, 5, 64]> var_459_cast_fp16_1 = split(axis = var_459_axis_0, split_sizes = var_459_split_sizes_0, x = value_states_15_cast_fp16)[name = string("op_459_cast_fp16")];
318
+ tensor<int32, [4]> var_457_0_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_457_0_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
319
+ tensor<fp16, [2, 8, 5, 64]> var_457_0_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_457_0_broadcast_to_same_batch_dims_reps_0, x = var_457_cast_fp16_0)[name = string("op_457_0_broadcast_to_same_batch_dims_cast_fp16")];
320
+ tensor<int32, [4]> var_459_0_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_459_0_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
321
+ tensor<fp16, [2, 8, 5, 64]> var_459_0_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_459_0_broadcast_to_same_batch_dims_reps_0, x = var_459_cast_fp16_0)[name = string("op_459_0_broadcast_to_same_batch_dims_cast_fp16")];
322
+ tensor<fp16, [2, 8, 5, 64]> var_461_cast_fp16 = scaled_dot_product_attention(key = var_457_0_broadcast_to_same_batch_dims_cast_fp16, query = var_455_cast_fp16_0, value = var_459_0_broadcast_to_same_batch_dims_cast_fp16)[name = string("op_461_cast_fp16")];
323
+ tensor<int32, [4]> var_457_1_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_457_1_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
324
+ tensor<fp16, [2, 8, 5, 64]> var_457_1_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_457_1_broadcast_to_same_batch_dims_reps_0, x = var_457_cast_fp16_1)[name = string("op_457_1_broadcast_to_same_batch_dims_cast_fp16")];
325
+ tensor<int32, [4]> var_459_1_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_459_1_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
326
+ tensor<fp16, [2, 8, 5, 64]> var_459_1_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_459_1_broadcast_to_same_batch_dims_reps_0, x = var_459_cast_fp16_1)[name = string("op_459_1_broadcast_to_same_batch_dims_cast_fp16")];
327
+ tensor<fp16, [2, 8, 5, 64]> attn_output_13_cast_fp16 = scaled_dot_product_attention(key = var_457_1_broadcast_to_same_batch_dims_cast_fp16, query = var_455_cast_fp16_1, value = var_459_1_broadcast_to_same_batch_dims_cast_fp16)[name = string("attn_output_13_cast_fp16")];
328
+ bool attn_output_15_interleave_0 = const()[name = string("attn_output_15_interleave_0"), val = bool(false)];
329
+ tensor<fp16, [2, 16, 5, 64]> attn_output_15_cast_fp16 = concat(axis = var_377, interleave = attn_output_15_interleave_0, values = (var_461_cast_fp16, attn_output_13_cast_fp16))[name = string("attn_output_15_cast_fp16")];
330
+ tensor<int32, [4]> var_465_perm_0 = const()[name = string("op_465_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
331
+ tensor<int32, [3]> var_467 = const()[name = string("op_467"), val = tensor<int32, [3]>([2, 5, 1024])];
332
+ tensor<fp16, [2, 5, 16, 64]> var_465_cast_fp16 = transpose(perm = var_465_perm_0, x = attn_output_15_cast_fp16)[name = string("transpose_5")];
333
+ tensor<fp16, [2, 5, 1024]> input_33_cast_fp16 = reshape(shape = var_467, x = var_465_cast_fp16)[name = string("input_33_cast_fp16")];
334
+ tensor<fp16, [1024, 1024]> layer_decoder_layers_2_self_attn_layer_o_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_2_self_attn_layer_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71089216)))];
335
+ tensor<fp16, [2, 5, 1024]> linear_23_cast_fp16 = linear(bias = linear_6_bias_0_to_fp16, weight = layer_decoder_layers_2_self_attn_layer_o_proj_weight_to_fp16, x = input_33_cast_fp16)[name = string("linear_23_cast_fp16")];
336
+ tensor<fp16, [2, 5, 1024]> hidden_states_45_cast_fp16 = add(x = hidden_states_35_cast_fp16, y = linear_23_cast_fp16)[name = string("hidden_states_45_cast_fp16")];
337
+ fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)];
338
+ tensor<fp16, [2, 5, 1024]> var_474_cast_fp16 = mul(x = hidden_states_45_cast_fp16, y = const_27_promoted_to_fp16)[name = string("op_474_cast_fp16")];
339
+ bool hidden_states_47_interleave_0 = const()[name = string("hidden_states_47_interleave_0"), val = bool(false)];
340
+ tensor<fp16, [2, 5, 2048]> hidden_states_47_cast_fp16 = concat(axis = var_382, interleave = hidden_states_47_interleave_0, values = (hidden_states_45_cast_fp16, var_474_cast_fp16))[name = string("hidden_states_47_cast_fp16")];
341
+ tensor<int32, [1]> hidden_states_49_axes_0 = const()[name = string("hidden_states_49_axes_0"), val = tensor<int32, [1]>([-1])];
342
+ tensor<fp16, [2048]> weight_11_to_fp16 = const()[name = string("weight_11_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73186432)))];
343
+ tensor<fp16, [2, 5, 2048]> hidden_states_49_cast_fp16 = layer_norm(axes = hidden_states_49_axes_0, epsilon = var_385_to_fp16, gamma = weight_11_to_fp16, x = hidden_states_47_cast_fp16)[name = string("hidden_states_49_cast_fp16")];
344
+ tensor<int32, [2]> var_482_split_sizes_0 = const()[name = string("op_482_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
345
+ int32 var_482_axis_0 = const()[name = string("op_482_axis_0"), val = int32(-1)];
346
+ tensor<fp16, [2, 5, 1024]> var_482_cast_fp16_0, tensor<fp16, [2, 5, 1024]> var_482_cast_fp16_1 = split(axis = var_482_axis_0, split_sizes = var_482_split_sizes_0, x = hidden_states_49_cast_fp16)[name = string("op_482_cast_fp16")];
347
+ tensor<fp16, [4096, 1024]> layer_decoder_layers_2_mlp_gate_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_2_mlp_gate_proj_weight_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73190592)))];
348
+ tensor<fp16, [2, 5, 4096]> linear_24_cast_fp16 = linear(bias = linear_10_bias_0_to_fp16, weight = layer_decoder_layers_2_mlp_gate_proj_weight_to_fp16, x = var_482_cast_fp16_0)[name = string("linear_24_cast_fp16")];
349
+ tensor<fp16, [2, 5, 4096]> var_490_cast_fp16 = silu(x = linear_24_cast_fp16)[name = string("op_490_cast_fp16")];
350
+ tensor<fp16, [4096, 1024]> layer_decoder_layers_2_mlp_up_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_2_mlp_up_proj_weight_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81579264)))];
351
+ tensor<fp16, [2, 5, 4096]> linear_25_cast_fp16 = linear(bias = linear_10_bias_0_to_fp16, weight = layer_decoder_layers_2_mlp_up_proj_weight_to_fp16, x = var_482_cast_fp16_0)[name = string("linear_25_cast_fp16")];
352
+ tensor<fp16, [2, 5, 4096]> input_39_cast_fp16 = mul(x = var_490_cast_fp16, y = linear_25_cast_fp16)[name = string("input_39_cast_fp16")];
353
+ tensor<fp16, [1024, 4096]> layer_decoder_layers_2_mlp_down_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_2_mlp_down_proj_weight_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89967936)))];
354
+ tensor<fp16, [2, 5, 1024]> linear_26_cast_fp16 = linear(bias = linear_6_bias_0_to_fp16, weight = layer_decoder_layers_2_mlp_down_proj_weight_to_fp16, x = input_39_cast_fp16)[name = string("linear_26_cast_fp16")];
355
+ tensor<fp16, [2, 5, 1024]> hidden_states_53_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = linear_26_cast_fp16)[name = string("hidden_states_53_cast_fp16")];
356
+ int32 var_497 = const()[name = string("op_497"), val = int32(1)];
357
+ int32 var_502 = const()[name = string("op_502"), val = int32(-1)];
358
+ fp16 const_29_promoted_to_fp16 = const()[name = string("const_29_promoted_to_fp16"), val = fp16(-0x1p+0)];
359
+ tensor<fp16, [2, 5, 1024]> var_516_cast_fp16 = mul(x = hidden_states_53_cast_fp16, y = const_29_promoted_to_fp16)[name = string("op_516_cast_fp16")];
360
+ bool hidden_states_55_interleave_0 = const()[name = string("hidden_states_55_interleave_0"), val = bool(false)];
361
+ tensor<fp16, [2, 5, 2048]> hidden_states_55_cast_fp16 = concat(axis = var_502, interleave = hidden_states_55_interleave_0, values = (hidden_states_53_cast_fp16, var_516_cast_fp16))[name = string("hidden_states_55_cast_fp16")];
362
+ tensor<int32, [1]> hidden_states_57_axes_0 = const()[name = string("hidden_states_57_axes_0"), val = tensor<int32, [1]>([-1])];
363
+ tensor<fp16, [2048]> weight_13_to_fp16 = const()[name = string("weight_13_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98356608)))];
364
+ fp16 var_505_to_fp16 = const()[name = string("op_505_to_fp16"), val = fp16(0x1.5p-17)];
365
+ tensor<fp16, [2, 5, 2048]> hidden_states_57_cast_fp16 = layer_norm(axes = hidden_states_57_axes_0, epsilon = var_505_to_fp16, gamma = weight_13_to_fp16, x = hidden_states_55_cast_fp16)[name = string("hidden_states_57_cast_fp16")];
366
+ tensor<int32, [2]> var_524_split_sizes_0 = const()[name = string("op_524_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
367
+ int32 var_524_axis_0 = const()[name = string("op_524_axis_0"), val = int32(-1)];
368
+ tensor<fp16, [2, 5, 1024]> var_524_cast_fp16_0, tensor<fp16, [2, 5, 1024]> var_524_cast_fp16_1 = split(axis = var_524_axis_0, split_sizes = var_524_split_sizes_0, x = hidden_states_57_cast_fp16)[name = string("op_524_cast_fp16")];
369
+ tensor<fp16, [1024, 1024]> layer_decoder_layers_3_self_attn_layer_q_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_3_self_attn_layer_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98360768)))];
370
+ tensor<fp16, [2, 5, 1024]> linear_27_cast_fp16 = linear(bias = linear_6_bias_0_to_fp16, weight = layer_decoder_layers_3_self_attn_layer_q_proj_weight_to_fp16, x = var_524_cast_fp16_0)[name = string("linear_27_cast_fp16")];
371
+ tensor<fp16, [128, 1024]> layer_decoder_layers_3_self_attn_layer_k_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_3_self_attn_layer_k_proj_weight_to_fp16"), val = tensor<fp16, [128, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100457984)))];
372
+ tensor<fp16, [2, 5, 128]> linear_28_cast_fp16 = linear(bias = linear_7_bias_0_to_fp16, weight = layer_decoder_layers_3_self_attn_layer_k_proj_weight_to_fp16, x = var_524_cast_fp16_0)[name = string("linear_28_cast_fp16")];
373
+ tensor<fp16, [128, 1024]> layer_decoder_layers_3_self_attn_layer_v_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_3_self_attn_layer_v_proj_weight_to_fp16"), val = tensor<fp16, [128, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100720192)))];
374
+ tensor<fp16, [2, 5, 128]> linear_29_cast_fp16 = linear(bias = linear_7_bias_0_to_fp16, weight = layer_decoder_layers_3_self_attn_layer_v_proj_weight_to_fp16, x = var_524_cast_fp16_0)[name = string("linear_29_cast_fp16")];
375
+ tensor<int32, [4]> var_543 = const()[name = string("op_543"), val = tensor<int32, [4]>([2, 5, 16, 64])];
376
+ tensor<fp16, [2, 5, 16, 64]> var_544_cast_fp16 = reshape(shape = var_543, x = linear_27_cast_fp16)[name = string("op_544_cast_fp16")];
377
+ tensor<int32, [4]> q_25_perm_0 = const()[name = string("q_25_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
378
+ tensor<int32, [4]> var_546 = const()[name = string("op_546"), val = tensor<int32, [4]>([2, 5, 2, 64])];
379
+ tensor<fp16, [2, 5, 2, 64]> var_547_cast_fp16 = reshape(shape = var_546, x = linear_28_cast_fp16)[name = string("op_547_cast_fp16")];
380
+ tensor<int32, [4]> k_25_perm_0 = const()[name = string("k_25_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
381
+ tensor<int32, [4]> var_549 = const()[name = string("op_549"), val = tensor<int32, [4]>([2, 5, 2, 64])];
382
+ tensor<fp16, [2, 5, 2, 64]> var_550_cast_fp16 = reshape(shape = var_549, x = linear_29_cast_fp16)[name = string("op_550_cast_fp16")];
383
+ tensor<int32, [4]> value_states_21_perm_0 = const()[name = string("value_states_21_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
384
+ tensor<fp16, [2, 16, 5, 64]> q_25_cast_fp16 = transpose(perm = q_25_perm_0, x = var_544_cast_fp16)[name = string("transpose_4")];
385
+ tensor<fp16, [2, 16, 5, 64]> var_554_cast_fp16 = mul(x = q_25_cast_fp16, y = cos_to_fp16)[name = string("op_554_cast_fp16")];
386
+ tensor<int32, [2]> var_555_split_sizes_0 = const()[name = string("op_555_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
387
+ int32 var_555_axis_0 = const()[name = string("op_555_axis_0"), val = int32(-1)];
388
+ tensor<fp16, [2, 16, 5, 32]> var_555_cast_fp16_0, tensor<fp16, [2, 16, 5, 32]> var_555_cast_fp16_1 = split(axis = var_555_axis_0, split_sizes = var_555_split_sizes_0, x = q_25_cast_fp16)[name = string("op_555_cast_fp16")];
389
+ fp16 const_33_promoted_to_fp16 = const()[name = string("const_33_promoted_to_fp16"), val = fp16(-0x1p+0)];
390
+ tensor<fp16, [2, 16, 5, 32]> var_557_cast_fp16 = mul(x = var_555_cast_fp16_1, y = const_33_promoted_to_fp16)[name = string("op_557_cast_fp16")];
391
+ bool var_559_interleave_0 = const()[name = string("op_559_interleave_0"), val = bool(false)];
392
+ tensor<fp16, [2, 16, 5, 64]> var_559_cast_fp16 = concat(axis = var_502, interleave = var_559_interleave_0, values = (var_557_cast_fp16, var_555_cast_fp16_0))[name = string("op_559_cast_fp16")];
393
+ tensor<fp16, [2, 16, 5, 64]> var_560_cast_fp16 = mul(x = var_559_cast_fp16, y = sin_to_fp16)[name = string("op_560_cast_fp16")];
394
+ tensor<fp16, [2, 16, 5, 64]> q_embed_cast_fp16 = add(x = var_554_cast_fp16, y = var_560_cast_fp16)[name = string("q_embed_cast_fp16")];
395
+ tensor<fp16, [2, 2, 5, 64]> k_25_cast_fp16 = transpose(perm = k_25_perm_0, x = var_547_cast_fp16)[name = string("transpose_3")];
396
+ tensor<fp16, [2, 2, 5, 64]> var_562_cast_fp16 = mul(x = k_25_cast_fp16, y = cos_to_fp16)[name = string("op_562_cast_fp16")];
397
+ tensor<int32, [2]> var_563_split_sizes_0 = const()[name = string("op_563_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
398
+ int32 var_563_axis_0 = const()[name = string("op_563_axis_0"), val = int32(-1)];
399
+ tensor<fp16, [2, 2, 5, 32]> var_563_cast_fp16_0, tensor<fp16, [2, 2, 5, 32]> var_563_cast_fp16_1 = split(axis = var_563_axis_0, split_sizes = var_563_split_sizes_0, x = k_25_cast_fp16)[name = string("op_563_cast_fp16")];
400
+ fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)];
401
+ tensor<fp16, [2, 2, 5, 32]> var_565_cast_fp16 = mul(x = var_563_cast_fp16_1, y = const_34_promoted_to_fp16)[name = string("op_565_cast_fp16")];
402
+ bool var_567_interleave_0 = const()[name = string("op_567_interleave_0"), val = bool(false)];
403
+ tensor<fp16, [2, 2, 5, 64]> var_567_cast_fp16 = concat(axis = var_502, interleave = var_567_interleave_0, values = (var_565_cast_fp16, var_563_cast_fp16_0))[name = string("op_567_cast_fp16")];
404
+ tensor<fp16, [2, 2, 5, 64]> var_568_cast_fp16 = mul(x = var_567_cast_fp16, y = sin_to_fp16)[name = string("op_568_cast_fp16")];
405
+ tensor<fp16, [2, 2, 5, 64]> k_embed_cast_fp16 = add(x = var_562_cast_fp16, y = var_568_cast_fp16)[name = string("k_embed_cast_fp16")];
406
+ tensor<int32, [2]> var_575_split_sizes_0 = const()[name = string("op_575_split_sizes_0"), val = tensor<int32, [2]>([8, 8])];
407
+ int32 var_575_axis_0 = const()[name = string("op_575_axis_0"), val = int32(1)];
408
+ tensor<fp16, [2, 8, 5, 64]> var_575_cast_fp16_0, tensor<fp16, [2, 8, 5, 64]> var_575_cast_fp16_1 = split(axis = var_575_axis_0, split_sizes = var_575_split_sizes_0, x = q_embed_cast_fp16)[name = string("op_575_cast_fp16")];
409
+ tensor<int32, [2]> var_577_split_sizes_0 = const()[name = string("op_577_split_sizes_0"), val = tensor<int32, [2]>([1, 1])];
410
+ int32 var_577_axis_0 = const()[name = string("op_577_axis_0"), val = int32(1)];
411
+ tensor<fp16, [2, 1, 5, 64]> var_577_cast_fp16_0, tensor<fp16, [2, 1, 5, 64]> var_577_cast_fp16_1 = split(axis = var_577_axis_0, split_sizes = var_577_split_sizes_0, x = k_embed_cast_fp16)[name = string("op_577_cast_fp16")];
412
+ tensor<int32, [2]> var_579_split_sizes_0 = const()[name = string("op_579_split_sizes_0"), val = tensor<int32, [2]>([1, 1])];
413
+ int32 var_579_axis_0 = const()[name = string("op_579_axis_0"), val = int32(1)];
414
+ tensor<fp16, [2, 2, 5, 64]> value_states_21_cast_fp16 = transpose(perm = value_states_21_perm_0, x = var_550_cast_fp16)[name = string("transpose_2")];
415
+ tensor<fp16, [2, 1, 5, 64]> var_579_cast_fp16_0, tensor<fp16, [2, 1, 5, 64]> var_579_cast_fp16_1 = split(axis = var_579_axis_0, split_sizes = var_579_split_sizes_0, x = value_states_21_cast_fp16)[name = string("op_579_cast_fp16")];
416
+ tensor<int32, [4]> var_577_0_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_577_0_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
417
+ tensor<fp16, [2, 8, 5, 64]> var_577_0_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_577_0_broadcast_to_same_batch_dims_reps_0, x = var_577_cast_fp16_0)[name = string("op_577_0_broadcast_to_same_batch_dims_cast_fp16")];
418
+ tensor<int32, [4]> var_579_0_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_579_0_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
419
+ tensor<fp16, [2, 8, 5, 64]> var_579_0_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_579_0_broadcast_to_same_batch_dims_reps_0, x = var_579_cast_fp16_0)[name = string("op_579_0_broadcast_to_same_batch_dims_cast_fp16")];
420
+ tensor<fp16, [2, 8, 5, 64]> var_581_cast_fp16 = scaled_dot_product_attention(key = var_577_0_broadcast_to_same_batch_dims_cast_fp16, query = var_575_cast_fp16_0, value = var_579_0_broadcast_to_same_batch_dims_cast_fp16)[name = string("op_581_cast_fp16")];
421
+ tensor<int32, [4]> var_577_1_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_577_1_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
422
+ tensor<fp16, [2, 8, 5, 64]> var_577_1_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_577_1_broadcast_to_same_batch_dims_reps_0, x = var_577_cast_fp16_1)[name = string("op_577_1_broadcast_to_same_batch_dims_cast_fp16")];
423
+ tensor<int32, [4]> var_579_1_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_579_1_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
424
+ tensor<fp16, [2, 8, 5, 64]> var_579_1_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_579_1_broadcast_to_same_batch_dims_reps_0, x = var_579_cast_fp16_1)[name = string("op_579_1_broadcast_to_same_batch_dims_cast_fp16")];
425
+ tensor<fp16, [2, 8, 5, 64]> attn_output_19_cast_fp16 = scaled_dot_product_attention(key = var_577_1_broadcast_to_same_batch_dims_cast_fp16, query = var_575_cast_fp16_1, value = var_579_1_broadcast_to_same_batch_dims_cast_fp16)[name = string("attn_output_19_cast_fp16")];
426
+ bool attn_output_21_interleave_0 = const()[name = string("attn_output_21_interleave_0"), val = bool(false)];
427
+ tensor<fp16, [2, 16, 5, 64]> attn_output_21_cast_fp16 = concat(axis = var_497, interleave = attn_output_21_interleave_0, values = (var_581_cast_fp16, attn_output_19_cast_fp16))[name = string("attn_output_21_cast_fp16")];
428
+ tensor<int32, [4]> var_585_perm_0 = const()[name = string("op_585_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
429
+ tensor<int32, [3]> var_587 = const()[name = string("op_587"), val = tensor<int32, [3]>([2, 5, 1024])];
430
+ tensor<fp16, [2, 5, 16, 64]> var_585_cast_fp16 = transpose(perm = var_585_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_1")];
431
+ tensor<fp16, [2, 5, 1024]> input_41_cast_fp16 = reshape(shape = var_587, x = var_585_cast_fp16)[name = string("input_41_cast_fp16")];
432
+ tensor<fp16, [1024, 1024]> layer_decoder_layers_3_self_attn_layer_o_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_3_self_attn_layer_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100982400)))];
433
+ tensor<fp16, [2, 5, 1024]> linear_30_cast_fp16 = linear(bias = linear_6_bias_0_to_fp16, weight = layer_decoder_layers_3_self_attn_layer_o_proj_weight_to_fp16, x = input_41_cast_fp16)[name = string("linear_30_cast_fp16")];
434
+ tensor<fp16, [2, 5, 1024]> hidden_states_63_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = linear_30_cast_fp16)[name = string("hidden_states_63_cast_fp16")];
435
+ fp16 const_35_promoted_to_fp16 = const()[name = string("const_35_promoted_to_fp16"), val = fp16(-0x1p+0)];
436
+ tensor<fp16, [2, 5, 1024]> var_594_cast_fp16 = mul(x = hidden_states_63_cast_fp16, y = const_35_promoted_to_fp16)[name = string("op_594_cast_fp16")];
437
+ bool hidden_states_65_interleave_0 = const()[name = string("hidden_states_65_interleave_0"), val = bool(false)];
438
+ tensor<fp16, [2, 5, 2048]> hidden_states_65_cast_fp16 = concat(axis = var_502, interleave = hidden_states_65_interleave_0, values = (hidden_states_63_cast_fp16, var_594_cast_fp16))[name = string("hidden_states_65_cast_fp16")];
439
+ tensor<int32, [1]> hidden_states_67_axes_0 = const()[name = string("hidden_states_67_axes_0"), val = tensor<int32, [1]>([-1])];
440
+ tensor<fp16, [2048]> weight_15_to_fp16 = const()[name = string("weight_15_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103079616)))];
441
+ tensor<fp16, [2, 5, 2048]> hidden_states_67_cast_fp16 = layer_norm(axes = hidden_states_67_axes_0, epsilon = var_505_to_fp16, gamma = weight_15_to_fp16, x = hidden_states_65_cast_fp16)[name = string("hidden_states_67_cast_fp16")];
442
+ tensor<int32, [2]> var_602_split_sizes_0 = const()[name = string("op_602_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
443
+ int32 var_602_axis_0 = const()[name = string("op_602_axis_0"), val = int32(-1)];
444
+ tensor<fp16, [2, 5, 1024]> var_602_cast_fp16_0, tensor<fp16, [2, 5, 1024]> var_602_cast_fp16_1 = split(axis = var_602_axis_0, split_sizes = var_602_split_sizes_0, x = hidden_states_67_cast_fp16)[name = string("op_602_cast_fp16")];
445
+ tensor<fp16, [4096, 1024]> layer_decoder_layers_3_mlp_gate_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_3_mlp_gate_proj_weight_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103083776)))];
446
+ tensor<fp16, [2, 5, 4096]> linear_31_cast_fp16 = linear(bias = linear_10_bias_0_to_fp16, weight = layer_decoder_layers_3_mlp_gate_proj_weight_to_fp16, x = var_602_cast_fp16_0)[name = string("linear_31_cast_fp16")];
447
+ tensor<fp16, [2, 5, 4096]> var_610_cast_fp16 = silu(x = linear_31_cast_fp16)[name = string("op_610_cast_fp16")];
448
+ tensor<fp16, [4096, 1024]> layer_decoder_layers_3_mlp_up_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_3_mlp_up_proj_weight_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111472448)))];
449
+ tensor<fp16, [2, 5, 4096]> linear_32_cast_fp16 = linear(bias = linear_10_bias_0_to_fp16, weight = layer_decoder_layers_3_mlp_up_proj_weight_to_fp16, x = var_602_cast_fp16_0)[name = string("linear_32_cast_fp16")];
450
+ tensor<fp16, [2, 5, 4096]> input_47_cast_fp16 = mul(x = var_610_cast_fp16, y = linear_32_cast_fp16)[name = string("input_47_cast_fp16")];
451
+ tensor<fp16, [1024, 4096]> layer_decoder_layers_3_mlp_down_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_3_mlp_down_proj_weight_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119861120)))];
452
+ tensor<fp16, [2, 5, 1024]> linear_33_cast_fp16 = linear(bias = linear_6_bias_0_to_fp16, weight = layer_decoder_layers_3_mlp_down_proj_weight_to_fp16, x = input_47_cast_fp16)[name = string("linear_33_cast_fp16")];
453
+ tensor<fp16, [2, 5, 1024]> hidden_states_71_cast_fp16 = add(x = hidden_states_63_cast_fp16, y = linear_33_cast_fp16)[name = string("hidden_states_71_cast_fp16")];
454
+ int32 var_623 = const()[name = string("op_623"), val = int32(-1)];
455
+ fp16 const_37_promoted_to_fp16 = const()[name = string("const_37_promoted_to_fp16"), val = fp16(-0x1p+0)];
456
+ tensor<fp16, [2, 5, 1024]> var_626_cast_fp16 = mul(x = hidden_states_71_cast_fp16, y = const_37_promoted_to_fp16)[name = string("op_626_cast_fp16")];
457
+ bool hidden_states_73_interleave_0 = const()[name = string("hidden_states_73_interleave_0"), val = bool(false)];
458
+ tensor<fp16, [2, 5, 2048]> hidden_states_73_cast_fp16 = concat(axis = var_623, interleave = hidden_states_73_interleave_0, values = (hidden_states_71_cast_fp16, var_626_cast_fp16))[name = string("hidden_states_73_cast_fp16")];
459
+ tensor<int32, [1]> hidden_states_75_axes_0 = const()[name = string("hidden_states_75_axes_0"), val = tensor<int32, [1]>([-1])];
460
+ tensor<fp16, [2048]> weight_17_to_fp16 = const()[name = string("weight_17_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128249792)))];
461
+ fp16 var_620_to_fp16 = const()[name = string("op_620_to_fp16"), val = fp16(0x1.5p-17)];
462
+ tensor<fp16, [2, 5, 2048]> hidden_states_75_cast_fp16 = layer_norm(axes = hidden_states_75_axes_0, epsilon = var_620_to_fp16, gamma = weight_17_to_fp16, x = hidden_states_73_cast_fp16)[name = string("hidden_states_75_cast_fp16")];
463
+ tensor<int32, [2]> var_634_split_sizes_0 = const()[name = string("op_634_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
464
+ int32 var_634_axis_0 = const()[name = string("op_634_axis_0"), val = int32(-1)];
465
+ tensor<fp16, [2, 5, 1024]> var_634_cast_fp16_0, tensor<fp16, [2, 5, 1024]> var_634_cast_fp16_1 = split(axis = var_634_axis_0, split_sizes = var_634_split_sizes_0, x = hidden_states_75_cast_fp16)[name = string("op_634_cast_fp16")];
466
+ tensor<int32, [3]> var_652_begin_0 = const()[name = string("op_652_begin_0"), val = tensor<int32, [3]>([0, 3, 0])];
467
+ tensor<int32, [3]> var_652_end_0 = const()[name = string("op_652_end_0"), val = tensor<int32, [3]>([2, 5, 1024])];
468
+ tensor<bool, [3]> var_652_end_mask_0 = const()[name = string("op_652_end_mask_0"), val = tensor<bool, [3]>([true, true, true])];
469
+ tensor<fp16, [2, 2, 1024]> var_652_cast_fp16 = slice_by_index(begin = var_652_begin_0, end = var_652_end_0, end_mask = var_652_end_mask_0, x = var_634_cast_fp16_0)[name = string("op_652_cast_fp16")];
470
+ tensor<fp16, [64, 1024]> layer_out_proj_weight_to_fp16 = const()[name = string("layer_out_proj_weight_to_fp16"), val = tensor<fp16, [64, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128253952)))];
471
+ tensor<fp16, [64]> layer_out_proj_bias_to_fp16 = const()[name = string("layer_out_proj_bias_to_fp16"), val = tensor<fp16, [64]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128385088)))];
472
+ tensor<fp16, [2, 2, 64]> linear_34_cast_fp16 = linear(bias = layer_out_proj_bias_to_fp16, weight = layer_out_proj_weight_to_fp16, x = var_652_cast_fp16)[name = string("linear_34_cast_fp16")];
473
+ tensor<int32, [3]> var_663_perm_0 = const()[name = string("op_663_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
474
+ tensor<fp16, [2, 64, 2]> output = transpose(perm = var_663_perm_0, x = linear_34_cast_fp16)[name = string("transpose_0")];
475
+ } -> (output);
476
+ }
locdit_f16.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb2a8cbcb47f04d55895d79276eeaf6420517a39cd0b1dc7d018de1b06b21c48
3
+ size 128385280