Upload folder using huggingface_hub
Browse files
locdit_f16.mlmodelc/analytics/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ad769b50fccd64c66cff88a33a9885a5c00dab08ca80ebb9c7ddba3e6590ad81
|
| 3 |
+
size 243
|
locdit_f16.mlmodelc/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a4db82642a3ccac7e9163ea22bc326306cfd8dd6e204d5f9c7db8322a1a0603
|
| 3 |
+
size 441
|
locdit_f16.mlmodelc/model.mil
ADDED
|
@@ -0,0 +1,476 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
program(1.3)
|
| 2 |
+
[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3405.2.1"}, {"coremlc-version", "3405.2.1"}, {"coremltools-component-torch", "2.8.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0b1"}})]
|
| 3 |
+
{
|
| 4 |
+
func main<ios18>(tensor<fp16, [2, 64, 2]> cond, tensor<fp16, [2]> dt, tensor<fp16, [2, 1024]> mu, tensor<fp16, [2]> t, tensor<fp16, [2, 64, 2]> x) {
|
| 5 |
+
int32 var_38 = const()[name = string("op_38"), val = int32(-1)];
|
| 6 |
+
tensor<int32, [1]> var_50_axes_0 = const()[name = string("op_50_axes_0"), val = tensor<int32, [1]>([1])];
|
| 7 |
+
tensor<fp16, [2, 1]> var_50_cast_fp16 = expand_dims(axes = var_50_axes_0, x = t)[name = string("op_50_cast_fp16")];
|
| 8 |
+
fp16 var_51_promoted_to_fp16 = const()[name = string("op_51_promoted_to_fp16"), val = fp16(0x1.f4p+9)];
|
| 9 |
+
tensor<fp16, [2, 1]> var_52_cast_fp16 = mul(x = var_50_cast_fp16, y = var_51_promoted_to_fp16)[name = string("op_52_cast_fp16")];
|
| 10 |
+
tensor<fp16, [1, 512]> var_53_to_fp16 = const()[name = string("op_53_to_fp16"), val = tensor<fp16, [1, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
|
| 11 |
+
tensor<fp16, [2, 512]> emb_3_cast_fp16 = mul(x = var_52_cast_fp16, y = var_53_to_fp16)[name = string("emb_3_cast_fp16")];
|
| 12 |
+
tensor<fp16, [2, 512]> var_55_cast_fp16 = sin(x = emb_3_cast_fp16)[name = string("op_55_cast_fp16")];
|
| 13 |
+
tensor<fp16, [2, 512]> var_56_cast_fp16 = cos(x = emb_3_cast_fp16)[name = string("op_56_cast_fp16")];
|
| 14 |
+
bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)];
|
| 15 |
+
tensor<fp16, [2, 1024]> input_1_cast_fp16 = concat(axis = var_38, interleave = input_1_interleave_0, values = (var_55_cast_fp16, var_56_cast_fp16))[name = string("input_1_cast_fp16")];
|
| 16 |
+
int32 var_59 = const()[name = string("op_59"), val = int32(-1)];
|
| 17 |
+
tensor<int32, [1]> var_71_axes_0 = const()[name = string("op_71_axes_0"), val = tensor<int32, [1]>([1])];
|
| 18 |
+
tensor<fp16, [2, 1]> var_71_cast_fp16 = expand_dims(axes = var_71_axes_0, x = dt)[name = string("op_71_cast_fp16")];
|
| 19 |
+
fp16 var_72_promoted_to_fp16 = const()[name = string("op_72_promoted_to_fp16"), val = fp16(0x1.f4p+9)];
|
| 20 |
+
tensor<fp16, [2, 1]> var_73_cast_fp16 = mul(x = var_71_cast_fp16, y = var_72_promoted_to_fp16)[name = string("op_73_cast_fp16")];
|
| 21 |
+
tensor<fp16, [2, 512]> emb_cast_fp16 = mul(x = var_73_cast_fp16, y = var_53_to_fp16)[name = string("emb_cast_fp16")];
|
| 22 |
+
tensor<fp16, [2, 512]> var_76_cast_fp16 = sin(x = emb_cast_fp16)[name = string("op_76_cast_fp16")];
|
| 23 |
+
tensor<fp16, [2, 512]> var_77_cast_fp16 = cos(x = emb_cast_fp16)[name = string("op_77_cast_fp16")];
|
| 24 |
+
bool input_7_interleave_0 = const()[name = string("input_7_interleave_0"), val = bool(false)];
|
| 25 |
+
tensor<fp16, [2, 1024]> input_7_cast_fp16 = concat(axis = var_59, interleave = input_7_interleave_0, values = (var_76_cast_fp16, var_77_cast_fp16))[name = string("input_7_cast_fp16")];
|
| 26 |
+
tensor<fp16, [1024, 1024]> layer_time_mlp_linear_1_weight_to_fp16 = const()[name = string("layer_time_mlp_linear_1_weight_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1152)))];
|
| 27 |
+
tensor<fp16, [1024]> layer_time_mlp_linear_1_bias_to_fp16 = const()[name = string("layer_time_mlp_linear_1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2098368)))];
|
| 28 |
+
tensor<fp16, [2, 1024]> linear_0_cast_fp16 = linear(bias = layer_time_mlp_linear_1_bias_to_fp16, weight = layer_time_mlp_linear_1_weight_to_fp16, x = input_1_cast_fp16)[name = string("linear_0_cast_fp16")];
|
| 29 |
+
tensor<fp16, [2, 1024]> input_5_cast_fp16 = silu(x = linear_0_cast_fp16)[name = string("input_5_cast_fp16")];
|
| 30 |
+
tensor<fp16, [1024, 1024]> layer_time_mlp_linear_2_weight_to_fp16 = const()[name = string("layer_time_mlp_linear_2_weight_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2100480)))];
|
| 31 |
+
tensor<fp16, [1024]> layer_time_mlp_linear_2_bias_to_fp16 = const()[name = string("layer_time_mlp_linear_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4197696)))];
|
| 32 |
+
tensor<fp16, [2, 1024]> linear_1_cast_fp16 = linear(bias = layer_time_mlp_linear_2_bias_to_fp16, weight = layer_time_mlp_linear_2_weight_to_fp16, x = input_5_cast_fp16)[name = string("linear_1_cast_fp16")];
|
| 33 |
+
tensor<fp16, [1024, 1024]> layer_delta_time_mlp_linear_1_weight_to_fp16 = const()[name = string("layer_delta_time_mlp_linear_1_weight_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4199808)))];
|
| 34 |
+
tensor<fp16, [1024]> layer_delta_time_mlp_linear_1_bias_to_fp16 = const()[name = string("layer_delta_time_mlp_linear_1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6297024)))];
|
| 35 |
+
tensor<fp16, [2, 1024]> linear_2_cast_fp16 = linear(bias = layer_delta_time_mlp_linear_1_bias_to_fp16, weight = layer_delta_time_mlp_linear_1_weight_to_fp16, x = input_7_cast_fp16)[name = string("linear_2_cast_fp16")];
|
| 36 |
+
tensor<fp16, [2, 1024]> input_11_cast_fp16 = silu(x = linear_2_cast_fp16)[name = string("input_11_cast_fp16")];
|
| 37 |
+
tensor<fp16, [1024, 1024]> layer_delta_time_mlp_linear_2_weight_to_fp16 = const()[name = string("layer_delta_time_mlp_linear_2_weight_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6299136)))];
|
| 38 |
+
tensor<fp16, [1024]> layer_delta_time_mlp_linear_2_bias_to_fp16 = const()[name = string("layer_delta_time_mlp_linear_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8396352)))];
|
| 39 |
+
tensor<fp16, [2, 1024]> linear_3_cast_fp16 = linear(bias = layer_delta_time_mlp_linear_2_bias_to_fp16, weight = layer_delta_time_mlp_linear_2_weight_to_fp16, x = input_11_cast_fp16)[name = string("linear_3_cast_fp16")];
|
| 40 |
+
tensor<fp16, [2, 1024]> t_cast_fp16 = add(x = linear_1_cast_fp16, y = linear_3_cast_fp16)[name = string("t_cast_fp16")];
|
| 41 |
+
tensor<int32, [3]> var_119 = const()[name = string("op_119"), val = tensor<int32, [3]>([0, 2, 1])];
|
| 42 |
+
tensor<fp16, [1024, 64]> layer_in_proj_weight_to_fp16 = const()[name = string("layer_in_proj_weight_to_fp16"), val = tensor<fp16, [1024, 64]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8398464)))];
|
| 43 |
+
tensor<fp16, [1024]> layer_in_proj_bias_to_fp16 = const()[name = string("layer_in_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8529600)))];
|
| 44 |
+
tensor<fp16, [2, 2, 64]> input_13_cast_fp16 = transpose(perm = var_119, x = x)[name = string("transpose_18")];
|
| 45 |
+
tensor<fp16, [2, 2, 1024]> linear_4_cast_fp16 = linear(bias = layer_in_proj_bias_to_fp16, weight = layer_in_proj_weight_to_fp16, x = input_13_cast_fp16)[name = string("linear_4_cast_fp16")];
|
| 46 |
+
tensor<int32, [3]> input_15_perm_0 = const()[name = string("input_15_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
|
| 47 |
+
tensor<fp16, [1024, 64]> layer_cond_proj_weight_to_fp16 = const()[name = string("layer_cond_proj_weight_to_fp16"), val = tensor<fp16, [1024, 64]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8531712)))];
|
| 48 |
+
tensor<fp16, [1024]> layer_cond_proj_bias_to_fp16 = const()[name = string("layer_cond_proj_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8662848)))];
|
| 49 |
+
tensor<fp16, [2, 2, 64]> input_15_cast_fp16 = transpose(perm = input_15_perm_0, x = cond)[name = string("transpose_17")];
|
| 50 |
+
tensor<fp16, [2, 2, 1024]> linear_5_cast_fp16 = linear(bias = layer_cond_proj_bias_to_fp16, weight = layer_cond_proj_weight_to_fp16, x = input_15_cast_fp16)[name = string("linear_5_cast_fp16")];
|
| 51 |
+
tensor<fp16, [2, 1024]> var_131_cast_fp16 = add(x = mu, y = t_cast_fp16)[name = string("op_131_cast_fp16")];
|
| 52 |
+
tensor<int32, [1]> var_133_axes_0 = const()[name = string("op_133_axes_0"), val = tensor<int32, [1]>([1])];
|
| 53 |
+
tensor<fp16, [2, 1, 1024]> var_133_cast_fp16 = expand_dims(axes = var_133_axes_0, x = var_131_cast_fp16)[name = string("op_133_cast_fp16")];
|
| 54 |
+
int32 var_135 = const()[name = string("op_135"), val = int32(1)];
|
| 55 |
+
bool x_interleave_0 = const()[name = string("x_interleave_0"), val = bool(false)];
|
| 56 |
+
tensor<fp16, [2, 5, 1024]> x_cast_fp16 = concat(axis = var_135, interleave = x_interleave_0, values = (var_133_cast_fp16, linear_5_cast_fp16, linear_4_cast_fp16))[name = string("x_cast_fp16")];
|
| 57 |
+
int32 var_137 = const()[name = string("op_137"), val = int32(1)];
|
| 58 |
+
int32 var_142 = const()[name = string("op_142"), val = int32(-1)];
|
| 59 |
+
fp16 const_5_promoted_to_fp16 = const()[name = string("const_5_promoted_to_fp16"), val = fp16(-0x1p+0)];
|
| 60 |
+
tensor<fp16, [2, 5, 1024]> var_156_cast_fp16 = mul(x = x_cast_fp16, y = const_5_promoted_to_fp16)[name = string("op_156_cast_fp16")];
|
| 61 |
+
bool hidden_states_1_interleave_0 = const()[name = string("hidden_states_1_interleave_0"), val = bool(false)];
|
| 62 |
+
tensor<fp16, [2, 5, 2048]> hidden_states_1_cast_fp16 = concat(axis = var_142, interleave = hidden_states_1_interleave_0, values = (x_cast_fp16, var_156_cast_fp16))[name = string("hidden_states_1_cast_fp16")];
|
| 63 |
+
tensor<int32, [1]> hidden_states_3_axes_0 = const()[name = string("hidden_states_3_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 64 |
+
tensor<fp16, [2048]> weight_1_to_fp16 = const()[name = string("weight_1_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8664960)))];
|
| 65 |
+
fp16 var_145_to_fp16 = const()[name = string("op_145_to_fp16"), val = fp16(0x1.5p-17)];
|
| 66 |
+
tensor<fp16, [2, 5, 2048]> hidden_states_3_cast_fp16 = layer_norm(axes = hidden_states_3_axes_0, epsilon = var_145_to_fp16, gamma = weight_1_to_fp16, x = hidden_states_1_cast_fp16)[name = string("hidden_states_3_cast_fp16")];
|
| 67 |
+
tensor<int32, [2]> var_164_split_sizes_0 = const()[name = string("op_164_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
|
| 68 |
+
int32 var_164_axis_0 = const()[name = string("op_164_axis_0"), val = int32(-1)];
|
| 69 |
+
tensor<fp16, [2, 5, 1024]> var_164_cast_fp16_0, tensor<fp16, [2, 5, 1024]> var_164_cast_fp16_1 = split(axis = var_164_axis_0, split_sizes = var_164_split_sizes_0, x = hidden_states_3_cast_fp16)[name = string("op_164_cast_fp16")];
|
| 70 |
+
tensor<fp16, [1024, 1024]> layer_decoder_layers_0_self_attn_layer_q_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_0_self_attn_layer_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8669120)))];
|
| 71 |
+
tensor<fp16, [1024]> linear_6_bias_0_to_fp16 = const()[name = string("linear_6_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10766336)))];
|
| 72 |
+
tensor<fp16, [2, 5, 1024]> linear_6_cast_fp16 = linear(bias = linear_6_bias_0_to_fp16, weight = layer_decoder_layers_0_self_attn_layer_q_proj_weight_to_fp16, x = var_164_cast_fp16_0)[name = string("linear_6_cast_fp16")];
|
| 73 |
+
tensor<fp16, [128, 1024]> layer_decoder_layers_0_self_attn_layer_k_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_0_self_attn_layer_k_proj_weight_to_fp16"), val = tensor<fp16, [128, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10768448)))];
|
| 74 |
+
tensor<fp16, [128]> linear_7_bias_0_to_fp16 = const()[name = string("linear_7_bias_0_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11030656)))];
|
| 75 |
+
tensor<fp16, [2, 5, 128]> linear_7_cast_fp16 = linear(bias = linear_7_bias_0_to_fp16, weight = layer_decoder_layers_0_self_attn_layer_k_proj_weight_to_fp16, x = var_164_cast_fp16_0)[name = string("linear_7_cast_fp16")];
|
| 76 |
+
tensor<fp16, [128, 1024]> layer_decoder_layers_0_self_attn_layer_v_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_0_self_attn_layer_v_proj_weight_to_fp16"), val = tensor<fp16, [128, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11030976)))];
|
| 77 |
+
tensor<fp16, [2, 5, 128]> linear_8_cast_fp16 = linear(bias = linear_7_bias_0_to_fp16, weight = layer_decoder_layers_0_self_attn_layer_v_proj_weight_to_fp16, x = var_164_cast_fp16_0)[name = string("linear_8_cast_fp16")];
|
| 78 |
+
tensor<int32, [4]> var_183 = const()[name = string("op_183"), val = tensor<int32, [4]>([2, 5, 16, 64])];
|
| 79 |
+
tensor<fp16, [2, 5, 16, 64]> var_184_cast_fp16 = reshape(shape = var_183, x = linear_6_cast_fp16)[name = string("op_184_cast_fp16")];
|
| 80 |
+
tensor<int32, [4]> q_1_perm_0 = const()[name = string("q_1_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 81 |
+
tensor<int32, [4]> var_186 = const()[name = string("op_186"), val = tensor<int32, [4]>([2, 5, 2, 64])];
|
| 82 |
+
tensor<fp16, [2, 5, 2, 64]> var_187_cast_fp16 = reshape(shape = var_186, x = linear_7_cast_fp16)[name = string("op_187_cast_fp16")];
|
| 83 |
+
tensor<int32, [4]> k_1_perm_0 = const()[name = string("k_1_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 84 |
+
tensor<int32, [4]> var_189 = const()[name = string("op_189"), val = tensor<int32, [4]>([2, 5, 2, 64])];
|
| 85 |
+
tensor<fp16, [2, 5, 2, 64]> var_190_cast_fp16 = reshape(shape = var_189, x = linear_8_cast_fp16)[name = string("op_190_cast_fp16")];
|
| 86 |
+
tensor<int32, [4]> value_states_3_perm_0 = const()[name = string("value_states_3_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 87 |
+
tensor<fp16, [5, 64]> cos_to_fp16 = const()[name = string("cos_to_fp16"), val = tensor<fp16, [5, 64]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11293184)))];
|
| 88 |
+
tensor<fp16, [2, 16, 5, 64]> q_1_cast_fp16 = transpose(perm = q_1_perm_0, x = var_184_cast_fp16)[name = string("transpose_16")];
|
| 89 |
+
tensor<fp16, [2, 16, 5, 64]> var_194_cast_fp16 = mul(x = q_1_cast_fp16, y = cos_to_fp16)[name = string("op_194_cast_fp16")];
|
| 90 |
+
tensor<int32, [2]> var_195_split_sizes_0 = const()[name = string("op_195_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
|
| 91 |
+
int32 var_195_axis_0 = const()[name = string("op_195_axis_0"), val = int32(-1)];
|
| 92 |
+
tensor<fp16, [2, 16, 5, 32]> var_195_cast_fp16_0, tensor<fp16, [2, 16, 5, 32]> var_195_cast_fp16_1 = split(axis = var_195_axis_0, split_sizes = var_195_split_sizes_0, x = q_1_cast_fp16)[name = string("op_195_cast_fp16")];
|
| 93 |
+
fp16 const_9_promoted_to_fp16 = const()[name = string("const_9_promoted_to_fp16"), val = fp16(-0x1p+0)];
|
| 94 |
+
tensor<fp16, [2, 16, 5, 32]> var_197_cast_fp16 = mul(x = var_195_cast_fp16_1, y = const_9_promoted_to_fp16)[name = string("op_197_cast_fp16")];
|
| 95 |
+
bool var_199_interleave_0 = const()[name = string("op_199_interleave_0"), val = bool(false)];
|
| 96 |
+
tensor<fp16, [2, 16, 5, 64]> var_199_cast_fp16 = concat(axis = var_142, interleave = var_199_interleave_0, values = (var_197_cast_fp16, var_195_cast_fp16_0))[name = string("op_199_cast_fp16")];
|
| 97 |
+
tensor<fp16, [5, 64]> sin_to_fp16 = const()[name = string("sin_to_fp16"), val = tensor<fp16, [5, 64]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11293888)))];
|
| 98 |
+
tensor<fp16, [2, 16, 5, 64]> var_200_cast_fp16 = mul(x = var_199_cast_fp16, y = sin_to_fp16)[name = string("op_200_cast_fp16")];
|
| 99 |
+
tensor<fp16, [2, 16, 5, 64]> q_embed_1_cast_fp16 = add(x = var_194_cast_fp16, y = var_200_cast_fp16)[name = string("q_embed_1_cast_fp16")];
|
| 100 |
+
tensor<fp16, [2, 2, 5, 64]> k_1_cast_fp16 = transpose(perm = k_1_perm_0, x = var_187_cast_fp16)[name = string("transpose_15")];
|
| 101 |
+
tensor<fp16, [2, 2, 5, 64]> var_202_cast_fp16 = mul(x = k_1_cast_fp16, y = cos_to_fp16)[name = string("op_202_cast_fp16")];
|
| 102 |
+
tensor<int32, [2]> var_203_split_sizes_0 = const()[name = string("op_203_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
|
| 103 |
+
int32 var_203_axis_0 = const()[name = string("op_203_axis_0"), val = int32(-1)];
|
| 104 |
+
tensor<fp16, [2, 2, 5, 32]> var_203_cast_fp16_0, tensor<fp16, [2, 2, 5, 32]> var_203_cast_fp16_1 = split(axis = var_203_axis_0, split_sizes = var_203_split_sizes_0, x = k_1_cast_fp16)[name = string("op_203_cast_fp16")];
|
| 105 |
+
fp16 const_10_promoted_to_fp16 = const()[name = string("const_10_promoted_to_fp16"), val = fp16(-0x1p+0)];
|
| 106 |
+
tensor<fp16, [2, 2, 5, 32]> var_205_cast_fp16 = mul(x = var_203_cast_fp16_1, y = const_10_promoted_to_fp16)[name = string("op_205_cast_fp16")];
|
| 107 |
+
bool var_207_interleave_0 = const()[name = string("op_207_interleave_0"), val = bool(false)];
|
| 108 |
+
tensor<fp16, [2, 2, 5, 64]> var_207_cast_fp16 = concat(axis = var_142, interleave = var_207_interleave_0, values = (var_205_cast_fp16, var_203_cast_fp16_0))[name = string("op_207_cast_fp16")];
|
| 109 |
+
tensor<fp16, [2, 2, 5, 64]> var_208_cast_fp16 = mul(x = var_207_cast_fp16, y = sin_to_fp16)[name = string("op_208_cast_fp16")];
|
| 110 |
+
tensor<fp16, [2, 2, 5, 64]> k_embed_1_cast_fp16 = add(x = var_202_cast_fp16, y = var_208_cast_fp16)[name = string("k_embed_1_cast_fp16")];
|
| 111 |
+
tensor<int32, [2]> var_215_split_sizes_0 = const()[name = string("op_215_split_sizes_0"), val = tensor<int32, [2]>([8, 8])];
|
| 112 |
+
int32 var_215_axis_0 = const()[name = string("op_215_axis_0"), val = int32(1)];
|
| 113 |
+
tensor<fp16, [2, 8, 5, 64]> var_215_cast_fp16_0, tensor<fp16, [2, 8, 5, 64]> var_215_cast_fp16_1 = split(axis = var_215_axis_0, split_sizes = var_215_split_sizes_0, x = q_embed_1_cast_fp16)[name = string("op_215_cast_fp16")];
|
| 114 |
+
tensor<int32, [2]> var_217_split_sizes_0 = const()[name = string("op_217_split_sizes_0"), val = tensor<int32, [2]>([1, 1])];
|
| 115 |
+
int32 var_217_axis_0 = const()[name = string("op_217_axis_0"), val = int32(1)];
|
| 116 |
+
tensor<fp16, [2, 1, 5, 64]> var_217_cast_fp16_0, tensor<fp16, [2, 1, 5, 64]> var_217_cast_fp16_1 = split(axis = var_217_axis_0, split_sizes = var_217_split_sizes_0, x = k_embed_1_cast_fp16)[name = string("op_217_cast_fp16")];
|
| 117 |
+
tensor<int32, [2]> var_219_split_sizes_0 = const()[name = string("op_219_split_sizes_0"), val = tensor<int32, [2]>([1, 1])];
|
| 118 |
+
int32 var_219_axis_0 = const()[name = string("op_219_axis_0"), val = int32(1)];
|
| 119 |
+
tensor<fp16, [2, 2, 5, 64]> value_states_3_cast_fp16 = transpose(perm = value_states_3_perm_0, x = var_190_cast_fp16)[name = string("transpose_14")];
|
| 120 |
+
tensor<fp16, [2, 1, 5, 64]> var_219_cast_fp16_0, tensor<fp16, [2, 1, 5, 64]> var_219_cast_fp16_1 = split(axis = var_219_axis_0, split_sizes = var_219_split_sizes_0, x = value_states_3_cast_fp16)[name = string("op_219_cast_fp16")];
|
| 121 |
+
tensor<int32, [4]> var_217_0_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_217_0_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
|
| 122 |
+
tensor<fp16, [2, 8, 5, 64]> var_217_0_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_217_0_broadcast_to_same_batch_dims_reps_0, x = var_217_cast_fp16_0)[name = string("op_217_0_broadcast_to_same_batch_dims_cast_fp16")];
|
| 123 |
+
tensor<int32, [4]> var_219_0_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_219_0_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
|
| 124 |
+
tensor<fp16, [2, 8, 5, 64]> var_219_0_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_219_0_broadcast_to_same_batch_dims_reps_0, x = var_219_cast_fp16_0)[name = string("op_219_0_broadcast_to_same_batch_dims_cast_fp16")];
|
| 125 |
+
tensor<fp16, [2, 8, 5, 64]> var_221_cast_fp16 = scaled_dot_product_attention(key = var_217_0_broadcast_to_same_batch_dims_cast_fp16, query = var_215_cast_fp16_0, value = var_219_0_broadcast_to_same_batch_dims_cast_fp16)[name = string("op_221_cast_fp16")];
|
| 126 |
+
tensor<int32, [4]> var_217_1_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_217_1_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
|
| 127 |
+
tensor<fp16, [2, 8, 5, 64]> var_217_1_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_217_1_broadcast_to_same_batch_dims_reps_0, x = var_217_cast_fp16_1)[name = string("op_217_1_broadcast_to_same_batch_dims_cast_fp16")];
|
| 128 |
+
tensor<int32, [4]> var_219_1_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_219_1_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
|
| 129 |
+
tensor<fp16, [2, 8, 5, 64]> var_219_1_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_219_1_broadcast_to_same_batch_dims_reps_0, x = var_219_cast_fp16_1)[name = string("op_219_1_broadcast_to_same_batch_dims_cast_fp16")];
|
| 130 |
+
tensor<fp16, [2, 8, 5, 64]> attn_output_1_cast_fp16 = scaled_dot_product_attention(key = var_217_1_broadcast_to_same_batch_dims_cast_fp16, query = var_215_cast_fp16_1, value = var_219_1_broadcast_to_same_batch_dims_cast_fp16)[name = string("attn_output_1_cast_fp16")];
|
| 131 |
+
bool attn_output_3_interleave_0 = const()[name = string("attn_output_3_interleave_0"), val = bool(false)];
|
| 132 |
+
tensor<fp16, [2, 16, 5, 64]> attn_output_3_cast_fp16 = concat(axis = var_137, interleave = attn_output_3_interleave_0, values = (var_221_cast_fp16, attn_output_1_cast_fp16))[name = string("attn_output_3_cast_fp16")];
|
| 133 |
+
tensor<int32, [4]> var_225_perm_0 = const()[name = string("op_225_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 134 |
+
tensor<int32, [3]> var_227 = const()[name = string("op_227"), val = tensor<int32, [3]>([2, 5, 1024])];
|
| 135 |
+
tensor<fp16, [2, 5, 16, 64]> var_225_cast_fp16 = transpose(perm = var_225_perm_0, x = attn_output_3_cast_fp16)[name = string("transpose_13")];
|
| 136 |
+
tensor<fp16, [2, 5, 1024]> input_17_cast_fp16 = reshape(shape = var_227, x = var_225_cast_fp16)[name = string("input_17_cast_fp16")];
|
| 137 |
+
tensor<fp16, [1024, 1024]> layer_decoder_layers_0_self_attn_layer_o_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_0_self_attn_layer_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11294592)))];
|
| 138 |
+
tensor<fp16, [2, 5, 1024]> linear_9_cast_fp16 = linear(bias = linear_6_bias_0_to_fp16, weight = layer_decoder_layers_0_self_attn_layer_o_proj_weight_to_fp16, x = input_17_cast_fp16)[name = string("linear_9_cast_fp16")];
|
| 139 |
+
tensor<fp16, [2, 5, 1024]> hidden_states_9_cast_fp16 = add(x = x_cast_fp16, y = linear_9_cast_fp16)[name = string("hidden_states_9_cast_fp16")];
|
| 140 |
+
fp16 const_11_promoted_to_fp16 = const()[name = string("const_11_promoted_to_fp16"), val = fp16(-0x1p+0)];
|
| 141 |
+
tensor<fp16, [2, 5, 1024]> var_234_cast_fp16 = mul(x = hidden_states_9_cast_fp16, y = const_11_promoted_to_fp16)[name = string("op_234_cast_fp16")];
|
| 142 |
+
bool hidden_states_11_interleave_0 = const()[name = string("hidden_states_11_interleave_0"), val = bool(false)];
|
| 143 |
+
tensor<fp16, [2, 5, 2048]> hidden_states_11_cast_fp16 = concat(axis = var_142, interleave = hidden_states_11_interleave_0, values = (hidden_states_9_cast_fp16, var_234_cast_fp16))[name = string("hidden_states_11_cast_fp16")];
|
| 144 |
+
tensor<int32, [1]> hidden_states_13_axes_0 = const()[name = string("hidden_states_13_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 145 |
+
tensor<fp16, [2048]> weight_3_to_fp16 = const()[name = string("weight_3_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13391808)))];
|
| 146 |
+
tensor<fp16, [2, 5, 2048]> hidden_states_13_cast_fp16 = layer_norm(axes = hidden_states_13_axes_0, epsilon = var_145_to_fp16, gamma = weight_3_to_fp16, x = hidden_states_11_cast_fp16)[name = string("hidden_states_13_cast_fp16")];
|
| 147 |
+
tensor<int32, [2]> var_242_split_sizes_0 = const()[name = string("op_242_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
|
| 148 |
+
int32 var_242_axis_0 = const()[name = string("op_242_axis_0"), val = int32(-1)];
|
| 149 |
+
tensor<fp16, [2, 5, 1024]> var_242_cast_fp16_0, tensor<fp16, [2, 5, 1024]> var_242_cast_fp16_1 = split(axis = var_242_axis_0, split_sizes = var_242_split_sizes_0, x = hidden_states_13_cast_fp16)[name = string("op_242_cast_fp16")];
|
| 150 |
+
tensor<fp16, [4096, 1024]> layer_decoder_layers_0_mlp_gate_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_0_mlp_gate_proj_weight_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13395968)))];
|
| 151 |
+
tensor<fp16, [4096]> linear_10_bias_0_to_fp16 = const()[name = string("linear_10_bias_0_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21784640)))];
|
| 152 |
+
tensor<fp16, [2, 5, 4096]> linear_10_cast_fp16 = linear(bias = linear_10_bias_0_to_fp16, weight = layer_decoder_layers_0_mlp_gate_proj_weight_to_fp16, x = var_242_cast_fp16_0)[name = string("linear_10_cast_fp16")];
|
| 153 |
+
tensor<fp16, [2, 5, 4096]> var_250_cast_fp16 = silu(x = linear_10_cast_fp16)[name = string("op_250_cast_fp16")];
|
| 154 |
+
tensor<fp16, [4096, 1024]> layer_decoder_layers_0_mlp_up_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_0_mlp_up_proj_weight_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21792896)))];
|
| 155 |
+
tensor<fp16, [2, 5, 4096]> linear_11_cast_fp16 = linear(bias = linear_10_bias_0_to_fp16, weight = layer_decoder_layers_0_mlp_up_proj_weight_to_fp16, x = var_242_cast_fp16_0)[name = string("linear_11_cast_fp16")];
|
| 156 |
+
tensor<fp16, [2, 5, 4096]> input_23_cast_fp16 = mul(x = var_250_cast_fp16, y = linear_11_cast_fp16)[name = string("input_23_cast_fp16")];
|
| 157 |
+
tensor<fp16, [1024, 4096]> layer_decoder_layers_0_mlp_down_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_0_mlp_down_proj_weight_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30181568)))];
|
| 158 |
+
tensor<fp16, [2, 5, 1024]> linear_12_cast_fp16 = linear(bias = linear_6_bias_0_to_fp16, weight = layer_decoder_layers_0_mlp_down_proj_weight_to_fp16, x = input_23_cast_fp16)[name = string("linear_12_cast_fp16")];
|
| 159 |
+
tensor<fp16, [2, 5, 1024]> hidden_states_17_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_12_cast_fp16)[name = string("hidden_states_17_cast_fp16")];
|
| 160 |
+
int32 var_257 = const()[name = string("op_257"), val = int32(1)];
|
| 161 |
+
int32 var_262 = const()[name = string("op_262"), val = int32(-1)];
|
| 162 |
+
fp16 const_13_promoted_to_fp16 = const()[name = string("const_13_promoted_to_fp16"), val = fp16(-0x1p+0)];
|
| 163 |
+
tensor<fp16, [2, 5, 1024]> var_276_cast_fp16 = mul(x = hidden_states_17_cast_fp16, y = const_13_promoted_to_fp16)[name = string("op_276_cast_fp16")];
|
| 164 |
+
bool hidden_states_19_interleave_0 = const()[name = string("hidden_states_19_interleave_0"), val = bool(false)];
|
| 165 |
+
tensor<fp16, [2, 5, 2048]> hidden_states_19_cast_fp16 = concat(axis = var_262, interleave = hidden_states_19_interleave_0, values = (hidden_states_17_cast_fp16, var_276_cast_fp16))[name = string("hidden_states_19_cast_fp16")];
|
| 166 |
+
tensor<int32, [1]> hidden_states_21_axes_0 = const()[name = string("hidden_states_21_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 167 |
+
tensor<fp16, [2048]> weight_5_to_fp16 = const()[name = string("weight_5_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38570240)))];
|
| 168 |
+
fp16 var_265_to_fp16 = const()[name = string("op_265_to_fp16"), val = fp16(0x1.5p-17)];
|
| 169 |
+
tensor<fp16, [2, 5, 2048]> hidden_states_21_cast_fp16 = layer_norm(axes = hidden_states_21_axes_0, epsilon = var_265_to_fp16, gamma = weight_5_to_fp16, x = hidden_states_19_cast_fp16)[name = string("hidden_states_21_cast_fp16")];
|
| 170 |
+
tensor<int32, [2]> var_284_split_sizes_0 = const()[name = string("op_284_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
|
| 171 |
+
int32 var_284_axis_0 = const()[name = string("op_284_axis_0"), val = int32(-1)];
|
| 172 |
+
tensor<fp16, [2, 5, 1024]> var_284_cast_fp16_0, tensor<fp16, [2, 5, 1024]> var_284_cast_fp16_1 = split(axis = var_284_axis_0, split_sizes = var_284_split_sizes_0, x = hidden_states_21_cast_fp16)[name = string("op_284_cast_fp16")];
|
| 173 |
+
tensor<fp16, [1024, 1024]> layer_decoder_layers_1_self_attn_layer_q_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_1_self_attn_layer_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38574400)))];
|
| 174 |
+
tensor<fp16, [2, 5, 1024]> linear_13_cast_fp16 = linear(bias = linear_6_bias_0_to_fp16, weight = layer_decoder_layers_1_self_attn_layer_q_proj_weight_to_fp16, x = var_284_cast_fp16_0)[name = string("linear_13_cast_fp16")];
|
| 175 |
+
tensor<fp16, [128, 1024]> layer_decoder_layers_1_self_attn_layer_k_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_1_self_attn_layer_k_proj_weight_to_fp16"), val = tensor<fp16, [128, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40671616)))];
|
| 176 |
+
tensor<fp16, [2, 5, 128]> linear_14_cast_fp16 = linear(bias = linear_7_bias_0_to_fp16, weight = layer_decoder_layers_1_self_attn_layer_k_proj_weight_to_fp16, x = var_284_cast_fp16_0)[name = string("linear_14_cast_fp16")];
|
| 177 |
+
tensor<fp16, [128, 1024]> layer_decoder_layers_1_self_attn_layer_v_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_1_self_attn_layer_v_proj_weight_to_fp16"), val = tensor<fp16, [128, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40933824)))];
|
| 178 |
+
tensor<fp16, [2, 5, 128]> linear_15_cast_fp16 = linear(bias = linear_7_bias_0_to_fp16, weight = layer_decoder_layers_1_self_attn_layer_v_proj_weight_to_fp16, x = var_284_cast_fp16_0)[name = string("linear_15_cast_fp16")];
|
| 179 |
+
tensor<int32, [4]> var_303 = const()[name = string("op_303"), val = tensor<int32, [4]>([2, 5, 16, 64])];
|
| 180 |
+
tensor<fp16, [2, 5, 16, 64]> var_304_cast_fp16 = reshape(shape = var_303, x = linear_13_cast_fp16)[name = string("op_304_cast_fp16")];
|
| 181 |
+
tensor<int32, [4]> q_9_perm_0 = const()[name = string("q_9_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 182 |
+
tensor<int32, [4]> var_306 = const()[name = string("op_306"), val = tensor<int32, [4]>([2, 5, 2, 64])];
|
| 183 |
+
tensor<fp16, [2, 5, 2, 64]> var_307_cast_fp16 = reshape(shape = var_306, x = linear_14_cast_fp16)[name = string("op_307_cast_fp16")];
|
| 184 |
+
tensor<int32, [4]> k_9_perm_0 = const()[name = string("k_9_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 185 |
+
tensor<int32, [4]> var_309 = const()[name = string("op_309"), val = tensor<int32, [4]>([2, 5, 2, 64])];
|
| 186 |
+
tensor<fp16, [2, 5, 2, 64]> var_310_cast_fp16 = reshape(shape = var_309, x = linear_15_cast_fp16)[name = string("op_310_cast_fp16")];
|
| 187 |
+
tensor<int32, [4]> value_states_9_perm_0 = const()[name = string("value_states_9_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 188 |
+
tensor<fp16, [2, 16, 5, 64]> q_9_cast_fp16 = transpose(perm = q_9_perm_0, x = var_304_cast_fp16)[name = string("transpose_12")];
|
| 189 |
+
tensor<fp16, [2, 16, 5, 64]> var_314_cast_fp16 = mul(x = q_9_cast_fp16, y = cos_to_fp16)[name = string("op_314_cast_fp16")];
|
| 190 |
+
tensor<int32, [2]> var_315_split_sizes_0 = const()[name = string("op_315_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
|
| 191 |
+
int32 var_315_axis_0 = const()[name = string("op_315_axis_0"), val = int32(-1)];
|
| 192 |
+
tensor<fp16, [2, 16, 5, 32]> var_315_cast_fp16_0, tensor<fp16, [2, 16, 5, 32]> var_315_cast_fp16_1 = split(axis = var_315_axis_0, split_sizes = var_315_split_sizes_0, x = q_9_cast_fp16)[name = string("op_315_cast_fp16")];
|
| 193 |
+
fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)];
|
| 194 |
+
tensor<fp16, [2, 16, 5, 32]> var_317_cast_fp16 = mul(x = var_315_cast_fp16_1, y = const_17_promoted_to_fp16)[name = string("op_317_cast_fp16")];
|
| 195 |
+
bool var_319_interleave_0 = const()[name = string("op_319_interleave_0"), val = bool(false)];
|
| 196 |
+
tensor<fp16, [2, 16, 5, 64]> var_319_cast_fp16 = concat(axis = var_262, interleave = var_319_interleave_0, values = (var_317_cast_fp16, var_315_cast_fp16_0))[name = string("op_319_cast_fp16")];
|
| 197 |
+
tensor<fp16, [2, 16, 5, 64]> var_320_cast_fp16 = mul(x = var_319_cast_fp16, y = sin_to_fp16)[name = string("op_320_cast_fp16")];
|
| 198 |
+
tensor<fp16, [2, 16, 5, 64]> q_embed_3_cast_fp16 = add(x = var_314_cast_fp16, y = var_320_cast_fp16)[name = string("q_embed_3_cast_fp16")];
|
| 199 |
+
tensor<fp16, [2, 2, 5, 64]> k_9_cast_fp16 = transpose(perm = k_9_perm_0, x = var_307_cast_fp16)[name = string("transpose_11")];
|
| 200 |
+
tensor<fp16, [2, 2, 5, 64]> var_322_cast_fp16 = mul(x = k_9_cast_fp16, y = cos_to_fp16)[name = string("op_322_cast_fp16")];
|
| 201 |
+
tensor<int32, [2]> var_323_split_sizes_0 = const()[name = string("op_323_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
|
| 202 |
+
int32 var_323_axis_0 = const()[name = string("op_323_axis_0"), val = int32(-1)];
|
| 203 |
+
tensor<fp16, [2, 2, 5, 32]> var_323_cast_fp16_0, tensor<fp16, [2, 2, 5, 32]> var_323_cast_fp16_1 = split(axis = var_323_axis_0, split_sizes = var_323_split_sizes_0, x = k_9_cast_fp16)[name = string("op_323_cast_fp16")];
|
| 204 |
+
fp16 const_18_promoted_to_fp16 = const()[name = string("const_18_promoted_to_fp16"), val = fp16(-0x1p+0)];
|
| 205 |
+
tensor<fp16, [2, 2, 5, 32]> var_325_cast_fp16 = mul(x = var_323_cast_fp16_1, y = const_18_promoted_to_fp16)[name = string("op_325_cast_fp16")];
|
| 206 |
+
bool var_327_interleave_0 = const()[name = string("op_327_interleave_0"), val = bool(false)];
|
| 207 |
+
tensor<fp16, [2, 2, 5, 64]> var_327_cast_fp16 = concat(axis = var_262, interleave = var_327_interleave_0, values = (var_325_cast_fp16, var_323_cast_fp16_0))[name = string("op_327_cast_fp16")];
|
| 208 |
+
tensor<fp16, [2, 2, 5, 64]> var_328_cast_fp16 = mul(x = var_327_cast_fp16, y = sin_to_fp16)[name = string("op_328_cast_fp16")];
|
| 209 |
+
tensor<fp16, [2, 2, 5, 64]> k_embed_3_cast_fp16 = add(x = var_322_cast_fp16, y = var_328_cast_fp16)[name = string("k_embed_3_cast_fp16")];
|
| 210 |
+
tensor<int32, [2]> var_335_split_sizes_0 = const()[name = string("op_335_split_sizes_0"), val = tensor<int32, [2]>([8, 8])];
|
| 211 |
+
int32 var_335_axis_0 = const()[name = string("op_335_axis_0"), val = int32(1)];
|
| 212 |
+
tensor<fp16, [2, 8, 5, 64]> var_335_cast_fp16_0, tensor<fp16, [2, 8, 5, 64]> var_335_cast_fp16_1 = split(axis = var_335_axis_0, split_sizes = var_335_split_sizes_0, x = q_embed_3_cast_fp16)[name = string("op_335_cast_fp16")];
|
| 213 |
+
tensor<int32, [2]> var_337_split_sizes_0 = const()[name = string("op_337_split_sizes_0"), val = tensor<int32, [2]>([1, 1])];
|
| 214 |
+
int32 var_337_axis_0 = const()[name = string("op_337_axis_0"), val = int32(1)];
|
| 215 |
+
tensor<fp16, [2, 1, 5, 64]> var_337_cast_fp16_0, tensor<fp16, [2, 1, 5, 64]> var_337_cast_fp16_1 = split(axis = var_337_axis_0, split_sizes = var_337_split_sizes_0, x = k_embed_3_cast_fp16)[name = string("op_337_cast_fp16")];
|
| 216 |
+
tensor<int32, [2]> var_339_split_sizes_0 = const()[name = string("op_339_split_sizes_0"), val = tensor<int32, [2]>([1, 1])];
|
| 217 |
+
int32 var_339_axis_0 = const()[name = string("op_339_axis_0"), val = int32(1)];
|
| 218 |
+
tensor<fp16, [2, 2, 5, 64]> value_states_9_cast_fp16 = transpose(perm = value_states_9_perm_0, x = var_310_cast_fp16)[name = string("transpose_10")];
|
| 219 |
+
tensor<fp16, [2, 1, 5, 64]> var_339_cast_fp16_0, tensor<fp16, [2, 1, 5, 64]> var_339_cast_fp16_1 = split(axis = var_339_axis_0, split_sizes = var_339_split_sizes_0, x = value_states_9_cast_fp16)[name = string("op_339_cast_fp16")];
|
| 220 |
+
tensor<int32, [4]> var_337_0_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_337_0_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
|
| 221 |
+
tensor<fp16, [2, 8, 5, 64]> var_337_0_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_337_0_broadcast_to_same_batch_dims_reps_0, x = var_337_cast_fp16_0)[name = string("op_337_0_broadcast_to_same_batch_dims_cast_fp16")];
|
| 222 |
+
tensor<int32, [4]> var_339_0_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_339_0_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
|
| 223 |
+
tensor<fp16, [2, 8, 5, 64]> var_339_0_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_339_0_broadcast_to_same_batch_dims_reps_0, x = var_339_cast_fp16_0)[name = string("op_339_0_broadcast_to_same_batch_dims_cast_fp16")];
|
| 224 |
+
tensor<fp16, [2, 8, 5, 64]> var_341_cast_fp16 = scaled_dot_product_attention(key = var_337_0_broadcast_to_same_batch_dims_cast_fp16, query = var_335_cast_fp16_0, value = var_339_0_broadcast_to_same_batch_dims_cast_fp16)[name = string("op_341_cast_fp16")];
|
| 225 |
+
tensor<int32, [4]> var_337_1_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_337_1_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
|
| 226 |
+
tensor<fp16, [2, 8, 5, 64]> var_337_1_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_337_1_broadcast_to_same_batch_dims_reps_0, x = var_337_cast_fp16_1)[name = string("op_337_1_broadcast_to_same_batch_dims_cast_fp16")];
|
| 227 |
+
tensor<int32, [4]> var_339_1_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_339_1_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
|
| 228 |
+
tensor<fp16, [2, 8, 5, 64]> var_339_1_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_339_1_broadcast_to_same_batch_dims_reps_0, x = var_339_cast_fp16_1)[name = string("op_339_1_broadcast_to_same_batch_dims_cast_fp16")];
|
| 229 |
+
tensor<fp16, [2, 8, 5, 64]> attn_output_7_cast_fp16 = scaled_dot_product_attention(key = var_337_1_broadcast_to_same_batch_dims_cast_fp16, query = var_335_cast_fp16_1, value = var_339_1_broadcast_to_same_batch_dims_cast_fp16)[name = string("attn_output_7_cast_fp16")];
|
| 230 |
+
bool attn_output_9_interleave_0 = const()[name = string("attn_output_9_interleave_0"), val = bool(false)];
|
| 231 |
+
tensor<fp16, [2, 16, 5, 64]> attn_output_9_cast_fp16 = concat(axis = var_257, interleave = attn_output_9_interleave_0, values = (var_341_cast_fp16, attn_output_7_cast_fp16))[name = string("attn_output_9_cast_fp16")];
|
| 232 |
+
tensor<int32, [4]> var_345_perm_0 = const()[name = string("op_345_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 233 |
+
tensor<int32, [3]> var_347 = const()[name = string("op_347"), val = tensor<int32, [3]>([2, 5, 1024])];
|
| 234 |
+
tensor<fp16, [2, 5, 16, 64]> var_345_cast_fp16 = transpose(perm = var_345_perm_0, x = attn_output_9_cast_fp16)[name = string("transpose_9")];
|
| 235 |
+
tensor<fp16, [2, 5, 1024]> input_25_cast_fp16 = reshape(shape = var_347, x = var_345_cast_fp16)[name = string("input_25_cast_fp16")];
|
| 236 |
+
tensor<fp16, [1024, 1024]> layer_decoder_layers_1_self_attn_layer_o_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_1_self_attn_layer_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41196032)))];
|
| 237 |
+
tensor<fp16, [2, 5, 1024]> linear_16_cast_fp16 = linear(bias = linear_6_bias_0_to_fp16, weight = layer_decoder_layers_1_self_attn_layer_o_proj_weight_to_fp16, x = input_25_cast_fp16)[name = string("linear_16_cast_fp16")];
|
| 238 |
+
tensor<fp16, [2, 5, 1024]> hidden_states_27_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_16_cast_fp16)[name = string("hidden_states_27_cast_fp16")];
|
| 239 |
+
fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)];
|
| 240 |
+
tensor<fp16, [2, 5, 1024]> var_354_cast_fp16 = mul(x = hidden_states_27_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_354_cast_fp16")];
|
| 241 |
+
bool hidden_states_29_interleave_0 = const()[name = string("hidden_states_29_interleave_0"), val = bool(false)];
|
| 242 |
+
tensor<fp16, [2, 5, 2048]> hidden_states_29_cast_fp16 = concat(axis = var_262, interleave = hidden_states_29_interleave_0, values = (hidden_states_27_cast_fp16, var_354_cast_fp16))[name = string("hidden_states_29_cast_fp16")];
|
| 243 |
+
tensor<int32, [1]> hidden_states_31_axes_0 = const()[name = string("hidden_states_31_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 244 |
+
tensor<fp16, [2048]> weight_7_to_fp16 = const()[name = string("weight_7_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43293248)))];
|
| 245 |
+
tensor<fp16, [2, 5, 2048]> hidden_states_31_cast_fp16 = layer_norm(axes = hidden_states_31_axes_0, epsilon = var_265_to_fp16, gamma = weight_7_to_fp16, x = hidden_states_29_cast_fp16)[name = string("hidden_states_31_cast_fp16")];
|
| 246 |
+
tensor<int32, [2]> var_362_split_sizes_0 = const()[name = string("op_362_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
|
| 247 |
+
int32 var_362_axis_0 = const()[name = string("op_362_axis_0"), val = int32(-1)];
|
| 248 |
+
tensor<fp16, [2, 5, 1024]> var_362_cast_fp16_0, tensor<fp16, [2, 5, 1024]> var_362_cast_fp16_1 = split(axis = var_362_axis_0, split_sizes = var_362_split_sizes_0, x = hidden_states_31_cast_fp16)[name = string("op_362_cast_fp16")];
|
| 249 |
+
tensor<fp16, [4096, 1024]> layer_decoder_layers_1_mlp_gate_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_1_mlp_gate_proj_weight_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43297408)))];
|
| 250 |
+
tensor<fp16, [2, 5, 4096]> linear_17_cast_fp16 = linear(bias = linear_10_bias_0_to_fp16, weight = layer_decoder_layers_1_mlp_gate_proj_weight_to_fp16, x = var_362_cast_fp16_0)[name = string("linear_17_cast_fp16")];
|
| 251 |
+
tensor<fp16, [2, 5, 4096]> var_370_cast_fp16 = silu(x = linear_17_cast_fp16)[name = string("op_370_cast_fp16")];
|
| 252 |
+
tensor<fp16, [4096, 1024]> layer_decoder_layers_1_mlp_up_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_1_mlp_up_proj_weight_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51686080)))];
|
| 253 |
+
tensor<fp16, [2, 5, 4096]> linear_18_cast_fp16 = linear(bias = linear_10_bias_0_to_fp16, weight = layer_decoder_layers_1_mlp_up_proj_weight_to_fp16, x = var_362_cast_fp16_0)[name = string("linear_18_cast_fp16")];
|
| 254 |
+
tensor<fp16, [2, 5, 4096]> input_31_cast_fp16 = mul(x = var_370_cast_fp16, y = linear_18_cast_fp16)[name = string("input_31_cast_fp16")];
|
| 255 |
+
tensor<fp16, [1024, 4096]> layer_decoder_layers_1_mlp_down_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_1_mlp_down_proj_weight_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60074752)))];
|
| 256 |
+
tensor<fp16, [2, 5, 1024]> linear_19_cast_fp16 = linear(bias = linear_6_bias_0_to_fp16, weight = layer_decoder_layers_1_mlp_down_proj_weight_to_fp16, x = input_31_cast_fp16)[name = string("linear_19_cast_fp16")];
|
| 257 |
+
tensor<fp16, [2, 5, 1024]> hidden_states_35_cast_fp16 = add(x = hidden_states_27_cast_fp16, y = linear_19_cast_fp16)[name = string("hidden_states_35_cast_fp16")];
|
| 258 |
+
int32 var_377 = const()[name = string("op_377"), val = int32(1)];
|
| 259 |
+
int32 var_382 = const()[name = string("op_382"), val = int32(-1)];
|
| 260 |
+
fp16 const_21_promoted_to_fp16 = const()[name = string("const_21_promoted_to_fp16"), val = fp16(-0x1p+0)];
|
| 261 |
+
tensor<fp16, [2, 5, 1024]> var_396_cast_fp16 = mul(x = hidden_states_35_cast_fp16, y = const_21_promoted_to_fp16)[name = string("op_396_cast_fp16")];
|
| 262 |
+
bool hidden_states_37_interleave_0 = const()[name = string("hidden_states_37_interleave_0"), val = bool(false)];
|
| 263 |
+
tensor<fp16, [2, 5, 2048]> hidden_states_37_cast_fp16 = concat(axis = var_382, interleave = hidden_states_37_interleave_0, values = (hidden_states_35_cast_fp16, var_396_cast_fp16))[name = string("hidden_states_37_cast_fp16")];
|
| 264 |
+
tensor<int32, [1]> hidden_states_39_axes_0 = const()[name = string("hidden_states_39_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 265 |
+
tensor<fp16, [2048]> weight_9_to_fp16 = const()[name = string("weight_9_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68463424)))];
|
| 266 |
+
fp16 var_385_to_fp16 = const()[name = string("op_385_to_fp16"), val = fp16(0x1.5p-17)];
|
| 267 |
+
tensor<fp16, [2, 5, 2048]> hidden_states_39_cast_fp16 = layer_norm(axes = hidden_states_39_axes_0, epsilon = var_385_to_fp16, gamma = weight_9_to_fp16, x = hidden_states_37_cast_fp16)[name = string("hidden_states_39_cast_fp16")];
|
| 268 |
+
tensor<int32, [2]> var_404_split_sizes_0 = const()[name = string("op_404_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
|
| 269 |
+
int32 var_404_axis_0 = const()[name = string("op_404_axis_0"), val = int32(-1)];
|
| 270 |
+
tensor<fp16, [2, 5, 1024]> var_404_cast_fp16_0, tensor<fp16, [2, 5, 1024]> var_404_cast_fp16_1 = split(axis = var_404_axis_0, split_sizes = var_404_split_sizes_0, x = hidden_states_39_cast_fp16)[name = string("op_404_cast_fp16")];
|
| 271 |
+
tensor<fp16, [1024, 1024]> layer_decoder_layers_2_self_attn_layer_q_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_2_self_attn_layer_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68467584)))];
|
| 272 |
+
tensor<fp16, [2, 5, 1024]> linear_20_cast_fp16 = linear(bias = linear_6_bias_0_to_fp16, weight = layer_decoder_layers_2_self_attn_layer_q_proj_weight_to_fp16, x = var_404_cast_fp16_0)[name = string("linear_20_cast_fp16")];
|
| 273 |
+
tensor<fp16, [128, 1024]> layer_decoder_layers_2_self_attn_layer_k_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_2_self_attn_layer_k_proj_weight_to_fp16"), val = tensor<fp16, [128, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70564800)))];
|
| 274 |
+
tensor<fp16, [2, 5, 128]> linear_21_cast_fp16 = linear(bias = linear_7_bias_0_to_fp16, weight = layer_decoder_layers_2_self_attn_layer_k_proj_weight_to_fp16, x = var_404_cast_fp16_0)[name = string("linear_21_cast_fp16")];
|
| 275 |
+
tensor<fp16, [128, 1024]> layer_decoder_layers_2_self_attn_layer_v_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_2_self_attn_layer_v_proj_weight_to_fp16"), val = tensor<fp16, [128, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70827008)))];
|
| 276 |
+
tensor<fp16, [2, 5, 128]> linear_22_cast_fp16 = linear(bias = linear_7_bias_0_to_fp16, weight = layer_decoder_layers_2_self_attn_layer_v_proj_weight_to_fp16, x = var_404_cast_fp16_0)[name = string("linear_22_cast_fp16")];
|
| 277 |
+
tensor<int32, [4]> var_423 = const()[name = string("op_423"), val = tensor<int32, [4]>([2, 5, 16, 64])];
|
| 278 |
+
tensor<fp16, [2, 5, 16, 64]> var_424_cast_fp16 = reshape(shape = var_423, x = linear_20_cast_fp16)[name = string("op_424_cast_fp16")];
|
| 279 |
+
tensor<int32, [4]> q_17_perm_0 = const()[name = string("q_17_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 280 |
+
tensor<int32, [4]> var_426 = const()[name = string("op_426"), val = tensor<int32, [4]>([2, 5, 2, 64])];
|
| 281 |
+
tensor<fp16, [2, 5, 2, 64]> var_427_cast_fp16 = reshape(shape = var_426, x = linear_21_cast_fp16)[name = string("op_427_cast_fp16")];
|
| 282 |
+
tensor<int32, [4]> k_17_perm_0 = const()[name = string("k_17_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 283 |
+
tensor<int32, [4]> var_429 = const()[name = string("op_429"), val = tensor<int32, [4]>([2, 5, 2, 64])];
|
| 284 |
+
tensor<fp16, [2, 5, 2, 64]> var_430_cast_fp16 = reshape(shape = var_429, x = linear_22_cast_fp16)[name = string("op_430_cast_fp16")];
|
| 285 |
+
tensor<int32, [4]> value_states_15_perm_0 = const()[name = string("value_states_15_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 286 |
+
tensor<fp16, [2, 16, 5, 64]> q_17_cast_fp16 = transpose(perm = q_17_perm_0, x = var_424_cast_fp16)[name = string("transpose_8")];
|
| 287 |
+
tensor<fp16, [2, 16, 5, 64]> var_434_cast_fp16 = mul(x = q_17_cast_fp16, y = cos_to_fp16)[name = string("op_434_cast_fp16")];
|
| 288 |
+
tensor<int32, [2]> var_435_split_sizes_0 = const()[name = string("op_435_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
|
| 289 |
+
int32 var_435_axis_0 = const()[name = string("op_435_axis_0"), val = int32(-1)];
|
| 290 |
+
tensor<fp16, [2, 16, 5, 32]> var_435_cast_fp16_0, tensor<fp16, [2, 16, 5, 32]> var_435_cast_fp16_1 = split(axis = var_435_axis_0, split_sizes = var_435_split_sizes_0, x = q_17_cast_fp16)[name = string("op_435_cast_fp16")];
|
| 291 |
+
fp16 const_25_promoted_to_fp16 = const()[name = string("const_25_promoted_to_fp16"), val = fp16(-0x1p+0)];
|
| 292 |
+
tensor<fp16, [2, 16, 5, 32]> var_437_cast_fp16 = mul(x = var_435_cast_fp16_1, y = const_25_promoted_to_fp16)[name = string("op_437_cast_fp16")];
|
| 293 |
+
bool var_439_interleave_0 = const()[name = string("op_439_interleave_0"), val = bool(false)];
|
| 294 |
+
tensor<fp16, [2, 16, 5, 64]> var_439_cast_fp16 = concat(axis = var_382, interleave = var_439_interleave_0, values = (var_437_cast_fp16, var_435_cast_fp16_0))[name = string("op_439_cast_fp16")];
|
| 295 |
+
tensor<fp16, [2, 16, 5, 64]> var_440_cast_fp16 = mul(x = var_439_cast_fp16, y = sin_to_fp16)[name = string("op_440_cast_fp16")];
|
| 296 |
+
tensor<fp16, [2, 16, 5, 64]> q_embed_5_cast_fp16 = add(x = var_434_cast_fp16, y = var_440_cast_fp16)[name = string("q_embed_5_cast_fp16")];
|
| 297 |
+
tensor<fp16, [2, 2, 5, 64]> k_17_cast_fp16 = transpose(perm = k_17_perm_0, x = var_427_cast_fp16)[name = string("transpose_7")];
|
| 298 |
+
tensor<fp16, [2, 2, 5, 64]> var_442_cast_fp16 = mul(x = k_17_cast_fp16, y = cos_to_fp16)[name = string("op_442_cast_fp16")];
|
| 299 |
+
tensor<int32, [2]> var_443_split_sizes_0 = const()[name = string("op_443_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
|
| 300 |
+
int32 var_443_axis_0 = const()[name = string("op_443_axis_0"), val = int32(-1)];
|
| 301 |
+
tensor<fp16, [2, 2, 5, 32]> var_443_cast_fp16_0, tensor<fp16, [2, 2, 5, 32]> var_443_cast_fp16_1 = split(axis = var_443_axis_0, split_sizes = var_443_split_sizes_0, x = k_17_cast_fp16)[name = string("op_443_cast_fp16")];
|
| 302 |
+
fp16 const_26_promoted_to_fp16 = const()[name = string("const_26_promoted_to_fp16"), val = fp16(-0x1p+0)];
|
| 303 |
+
tensor<fp16, [2, 2, 5, 32]> var_445_cast_fp16 = mul(x = var_443_cast_fp16_1, y = const_26_promoted_to_fp16)[name = string("op_445_cast_fp16")];
|
| 304 |
+
bool var_447_interleave_0 = const()[name = string("op_447_interleave_0"), val = bool(false)];
|
| 305 |
+
tensor<fp16, [2, 2, 5, 64]> var_447_cast_fp16 = concat(axis = var_382, interleave = var_447_interleave_0, values = (var_445_cast_fp16, var_443_cast_fp16_0))[name = string("op_447_cast_fp16")];
|
| 306 |
+
tensor<fp16, [2, 2, 5, 64]> var_448_cast_fp16 = mul(x = var_447_cast_fp16, y = sin_to_fp16)[name = string("op_448_cast_fp16")];
|
| 307 |
+
tensor<fp16, [2, 2, 5, 64]> k_embed_5_cast_fp16 = add(x = var_442_cast_fp16, y = var_448_cast_fp16)[name = string("k_embed_5_cast_fp16")];
|
| 308 |
+
tensor<int32, [2]> var_455_split_sizes_0 = const()[name = string("op_455_split_sizes_0"), val = tensor<int32, [2]>([8, 8])];
|
| 309 |
+
int32 var_455_axis_0 = const()[name = string("op_455_axis_0"), val = int32(1)];
|
| 310 |
+
tensor<fp16, [2, 8, 5, 64]> var_455_cast_fp16_0, tensor<fp16, [2, 8, 5, 64]> var_455_cast_fp16_1 = split(axis = var_455_axis_0, split_sizes = var_455_split_sizes_0, x = q_embed_5_cast_fp16)[name = string("op_455_cast_fp16")];
|
| 311 |
+
tensor<int32, [2]> var_457_split_sizes_0 = const()[name = string("op_457_split_sizes_0"), val = tensor<int32, [2]>([1, 1])];
|
| 312 |
+
int32 var_457_axis_0 = const()[name = string("op_457_axis_0"), val = int32(1)];
|
| 313 |
+
tensor<fp16, [2, 1, 5, 64]> var_457_cast_fp16_0, tensor<fp16, [2, 1, 5, 64]> var_457_cast_fp16_1 = split(axis = var_457_axis_0, split_sizes = var_457_split_sizes_0, x = k_embed_5_cast_fp16)[name = string("op_457_cast_fp16")];
|
| 314 |
+
tensor<int32, [2]> var_459_split_sizes_0 = const()[name = string("op_459_split_sizes_0"), val = tensor<int32, [2]>([1, 1])];
|
| 315 |
+
int32 var_459_axis_0 = const()[name = string("op_459_axis_0"), val = int32(1)];
|
| 316 |
+
tensor<fp16, [2, 2, 5, 64]> value_states_15_cast_fp16 = transpose(perm = value_states_15_perm_0, x = var_430_cast_fp16)[name = string("transpose_6")];
|
| 317 |
+
tensor<fp16, [2, 1, 5, 64]> var_459_cast_fp16_0, tensor<fp16, [2, 1, 5, 64]> var_459_cast_fp16_1 = split(axis = var_459_axis_0, split_sizes = var_459_split_sizes_0, x = value_states_15_cast_fp16)[name = string("op_459_cast_fp16")];
|
| 318 |
+
tensor<int32, [4]> var_457_0_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_457_0_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
|
| 319 |
+
tensor<fp16, [2, 8, 5, 64]> var_457_0_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_457_0_broadcast_to_same_batch_dims_reps_0, x = var_457_cast_fp16_0)[name = string("op_457_0_broadcast_to_same_batch_dims_cast_fp16")];
|
| 320 |
+
tensor<int32, [4]> var_459_0_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_459_0_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
|
| 321 |
+
tensor<fp16, [2, 8, 5, 64]> var_459_0_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_459_0_broadcast_to_same_batch_dims_reps_0, x = var_459_cast_fp16_0)[name = string("op_459_0_broadcast_to_same_batch_dims_cast_fp16")];
|
| 322 |
+
tensor<fp16, [2, 8, 5, 64]> var_461_cast_fp16 = scaled_dot_product_attention(key = var_457_0_broadcast_to_same_batch_dims_cast_fp16, query = var_455_cast_fp16_0, value = var_459_0_broadcast_to_same_batch_dims_cast_fp16)[name = string("op_461_cast_fp16")];
|
| 323 |
+
tensor<int32, [4]> var_457_1_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_457_1_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
|
| 324 |
+
tensor<fp16, [2, 8, 5, 64]> var_457_1_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_457_1_broadcast_to_same_batch_dims_reps_0, x = var_457_cast_fp16_1)[name = string("op_457_1_broadcast_to_same_batch_dims_cast_fp16")];
|
| 325 |
+
tensor<int32, [4]> var_459_1_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_459_1_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
|
| 326 |
+
tensor<fp16, [2, 8, 5, 64]> var_459_1_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_459_1_broadcast_to_same_batch_dims_reps_0, x = var_459_cast_fp16_1)[name = string("op_459_1_broadcast_to_same_batch_dims_cast_fp16")];
|
| 327 |
+
tensor<fp16, [2, 8, 5, 64]> attn_output_13_cast_fp16 = scaled_dot_product_attention(key = var_457_1_broadcast_to_same_batch_dims_cast_fp16, query = var_455_cast_fp16_1, value = var_459_1_broadcast_to_same_batch_dims_cast_fp16)[name = string("attn_output_13_cast_fp16")];
|
| 328 |
+
bool attn_output_15_interleave_0 = const()[name = string("attn_output_15_interleave_0"), val = bool(false)];
|
| 329 |
+
tensor<fp16, [2, 16, 5, 64]> attn_output_15_cast_fp16 = concat(axis = var_377, interleave = attn_output_15_interleave_0, values = (var_461_cast_fp16, attn_output_13_cast_fp16))[name = string("attn_output_15_cast_fp16")];
|
| 330 |
+
tensor<int32, [4]> var_465_perm_0 = const()[name = string("op_465_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 331 |
+
tensor<int32, [3]> var_467 = const()[name = string("op_467"), val = tensor<int32, [3]>([2, 5, 1024])];
|
| 332 |
+
tensor<fp16, [2, 5, 16, 64]> var_465_cast_fp16 = transpose(perm = var_465_perm_0, x = attn_output_15_cast_fp16)[name = string("transpose_5")];
|
| 333 |
+
tensor<fp16, [2, 5, 1024]> input_33_cast_fp16 = reshape(shape = var_467, x = var_465_cast_fp16)[name = string("input_33_cast_fp16")];
|
| 334 |
+
tensor<fp16, [1024, 1024]> layer_decoder_layers_2_self_attn_layer_o_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_2_self_attn_layer_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71089216)))];
|
| 335 |
+
tensor<fp16, [2, 5, 1024]> linear_23_cast_fp16 = linear(bias = linear_6_bias_0_to_fp16, weight = layer_decoder_layers_2_self_attn_layer_o_proj_weight_to_fp16, x = input_33_cast_fp16)[name = string("linear_23_cast_fp16")];
|
| 336 |
+
tensor<fp16, [2, 5, 1024]> hidden_states_45_cast_fp16 = add(x = hidden_states_35_cast_fp16, y = linear_23_cast_fp16)[name = string("hidden_states_45_cast_fp16")];
|
| 337 |
+
fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)];
|
| 338 |
+
tensor<fp16, [2, 5, 1024]> var_474_cast_fp16 = mul(x = hidden_states_45_cast_fp16, y = const_27_promoted_to_fp16)[name = string("op_474_cast_fp16")];
|
| 339 |
+
bool hidden_states_47_interleave_0 = const()[name = string("hidden_states_47_interleave_0"), val = bool(false)];
|
| 340 |
+
tensor<fp16, [2, 5, 2048]> hidden_states_47_cast_fp16 = concat(axis = var_382, interleave = hidden_states_47_interleave_0, values = (hidden_states_45_cast_fp16, var_474_cast_fp16))[name = string("hidden_states_47_cast_fp16")];
|
| 341 |
+
tensor<int32, [1]> hidden_states_49_axes_0 = const()[name = string("hidden_states_49_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 342 |
+
tensor<fp16, [2048]> weight_11_to_fp16 = const()[name = string("weight_11_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73186432)))];
|
| 343 |
+
tensor<fp16, [2, 5, 2048]> hidden_states_49_cast_fp16 = layer_norm(axes = hidden_states_49_axes_0, epsilon = var_385_to_fp16, gamma = weight_11_to_fp16, x = hidden_states_47_cast_fp16)[name = string("hidden_states_49_cast_fp16")];
|
| 344 |
+
tensor<int32, [2]> var_482_split_sizes_0 = const()[name = string("op_482_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
|
| 345 |
+
int32 var_482_axis_0 = const()[name = string("op_482_axis_0"), val = int32(-1)];
|
| 346 |
+
tensor<fp16, [2, 5, 1024]> var_482_cast_fp16_0, tensor<fp16, [2, 5, 1024]> var_482_cast_fp16_1 = split(axis = var_482_axis_0, split_sizes = var_482_split_sizes_0, x = hidden_states_49_cast_fp16)[name = string("op_482_cast_fp16")];
|
| 347 |
+
tensor<fp16, [4096, 1024]> layer_decoder_layers_2_mlp_gate_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_2_mlp_gate_proj_weight_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73190592)))];
|
| 348 |
+
tensor<fp16, [2, 5, 4096]> linear_24_cast_fp16 = linear(bias = linear_10_bias_0_to_fp16, weight = layer_decoder_layers_2_mlp_gate_proj_weight_to_fp16, x = var_482_cast_fp16_0)[name = string("linear_24_cast_fp16")];
|
| 349 |
+
tensor<fp16, [2, 5, 4096]> var_490_cast_fp16 = silu(x = linear_24_cast_fp16)[name = string("op_490_cast_fp16")];
|
| 350 |
+
tensor<fp16, [4096, 1024]> layer_decoder_layers_2_mlp_up_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_2_mlp_up_proj_weight_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81579264)))];
|
| 351 |
+
tensor<fp16, [2, 5, 4096]> linear_25_cast_fp16 = linear(bias = linear_10_bias_0_to_fp16, weight = layer_decoder_layers_2_mlp_up_proj_weight_to_fp16, x = var_482_cast_fp16_0)[name = string("linear_25_cast_fp16")];
|
| 352 |
+
tensor<fp16, [2, 5, 4096]> input_39_cast_fp16 = mul(x = var_490_cast_fp16, y = linear_25_cast_fp16)[name = string("input_39_cast_fp16")];
|
| 353 |
+
tensor<fp16, [1024, 4096]> layer_decoder_layers_2_mlp_down_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_2_mlp_down_proj_weight_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89967936)))];
|
| 354 |
+
tensor<fp16, [2, 5, 1024]> linear_26_cast_fp16 = linear(bias = linear_6_bias_0_to_fp16, weight = layer_decoder_layers_2_mlp_down_proj_weight_to_fp16, x = input_39_cast_fp16)[name = string("linear_26_cast_fp16")];
|
| 355 |
+
tensor<fp16, [2, 5, 1024]> hidden_states_53_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = linear_26_cast_fp16)[name = string("hidden_states_53_cast_fp16")];
|
| 356 |
+
int32 var_497 = const()[name = string("op_497"), val = int32(1)];
|
| 357 |
+
int32 var_502 = const()[name = string("op_502"), val = int32(-1)];
|
| 358 |
+
fp16 const_29_promoted_to_fp16 = const()[name = string("const_29_promoted_to_fp16"), val = fp16(-0x1p+0)];
|
| 359 |
+
tensor<fp16, [2, 5, 1024]> var_516_cast_fp16 = mul(x = hidden_states_53_cast_fp16, y = const_29_promoted_to_fp16)[name = string("op_516_cast_fp16")];
|
| 360 |
+
bool hidden_states_55_interleave_0 = const()[name = string("hidden_states_55_interleave_0"), val = bool(false)];
|
| 361 |
+
tensor<fp16, [2, 5, 2048]> hidden_states_55_cast_fp16 = concat(axis = var_502, interleave = hidden_states_55_interleave_0, values = (hidden_states_53_cast_fp16, var_516_cast_fp16))[name = string("hidden_states_55_cast_fp16")];
|
| 362 |
+
tensor<int32, [1]> hidden_states_57_axes_0 = const()[name = string("hidden_states_57_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 363 |
+
tensor<fp16, [2048]> weight_13_to_fp16 = const()[name = string("weight_13_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98356608)))];
|
| 364 |
+
fp16 var_505_to_fp16 = const()[name = string("op_505_to_fp16"), val = fp16(0x1.5p-17)];
|
| 365 |
+
tensor<fp16, [2, 5, 2048]> hidden_states_57_cast_fp16 = layer_norm(axes = hidden_states_57_axes_0, epsilon = var_505_to_fp16, gamma = weight_13_to_fp16, x = hidden_states_55_cast_fp16)[name = string("hidden_states_57_cast_fp16")];
|
| 366 |
+
tensor<int32, [2]> var_524_split_sizes_0 = const()[name = string("op_524_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
|
| 367 |
+
int32 var_524_axis_0 = const()[name = string("op_524_axis_0"), val = int32(-1)];
|
| 368 |
+
tensor<fp16, [2, 5, 1024]> var_524_cast_fp16_0, tensor<fp16, [2, 5, 1024]> var_524_cast_fp16_1 = split(axis = var_524_axis_0, split_sizes = var_524_split_sizes_0, x = hidden_states_57_cast_fp16)[name = string("op_524_cast_fp16")];
|
| 369 |
+
tensor<fp16, [1024, 1024]> layer_decoder_layers_3_self_attn_layer_q_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_3_self_attn_layer_q_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98360768)))];
|
| 370 |
+
tensor<fp16, [2, 5, 1024]> linear_27_cast_fp16 = linear(bias = linear_6_bias_0_to_fp16, weight = layer_decoder_layers_3_self_attn_layer_q_proj_weight_to_fp16, x = var_524_cast_fp16_0)[name = string("linear_27_cast_fp16")];
|
| 371 |
+
tensor<fp16, [128, 1024]> layer_decoder_layers_3_self_attn_layer_k_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_3_self_attn_layer_k_proj_weight_to_fp16"), val = tensor<fp16, [128, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100457984)))];
|
| 372 |
+
tensor<fp16, [2, 5, 128]> linear_28_cast_fp16 = linear(bias = linear_7_bias_0_to_fp16, weight = layer_decoder_layers_3_self_attn_layer_k_proj_weight_to_fp16, x = var_524_cast_fp16_0)[name = string("linear_28_cast_fp16")];
|
| 373 |
+
tensor<fp16, [128, 1024]> layer_decoder_layers_3_self_attn_layer_v_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_3_self_attn_layer_v_proj_weight_to_fp16"), val = tensor<fp16, [128, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100720192)))];
|
| 374 |
+
tensor<fp16, [2, 5, 128]> linear_29_cast_fp16 = linear(bias = linear_7_bias_0_to_fp16, weight = layer_decoder_layers_3_self_attn_layer_v_proj_weight_to_fp16, x = var_524_cast_fp16_0)[name = string("linear_29_cast_fp16")];
|
| 375 |
+
tensor<int32, [4]> var_543 = const()[name = string("op_543"), val = tensor<int32, [4]>([2, 5, 16, 64])];
|
| 376 |
+
tensor<fp16, [2, 5, 16, 64]> var_544_cast_fp16 = reshape(shape = var_543, x = linear_27_cast_fp16)[name = string("op_544_cast_fp16")];
|
| 377 |
+
tensor<int32, [4]> q_25_perm_0 = const()[name = string("q_25_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 378 |
+
tensor<int32, [4]> var_546 = const()[name = string("op_546"), val = tensor<int32, [4]>([2, 5, 2, 64])];
|
| 379 |
+
tensor<fp16, [2, 5, 2, 64]> var_547_cast_fp16 = reshape(shape = var_546, x = linear_28_cast_fp16)[name = string("op_547_cast_fp16")];
|
| 380 |
+
tensor<int32, [4]> k_25_perm_0 = const()[name = string("k_25_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 381 |
+
tensor<int32, [4]> var_549 = const()[name = string("op_549"), val = tensor<int32, [4]>([2, 5, 2, 64])];
|
| 382 |
+
tensor<fp16, [2, 5, 2, 64]> var_550_cast_fp16 = reshape(shape = var_549, x = linear_29_cast_fp16)[name = string("op_550_cast_fp16")];
|
| 383 |
+
tensor<int32, [4]> value_states_21_perm_0 = const()[name = string("value_states_21_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 384 |
+
tensor<fp16, [2, 16, 5, 64]> q_25_cast_fp16 = transpose(perm = q_25_perm_0, x = var_544_cast_fp16)[name = string("transpose_4")];
|
| 385 |
+
tensor<fp16, [2, 16, 5, 64]> var_554_cast_fp16 = mul(x = q_25_cast_fp16, y = cos_to_fp16)[name = string("op_554_cast_fp16")];
|
| 386 |
+
tensor<int32, [2]> var_555_split_sizes_0 = const()[name = string("op_555_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
|
| 387 |
+
int32 var_555_axis_0 = const()[name = string("op_555_axis_0"), val = int32(-1)];
|
| 388 |
+
tensor<fp16, [2, 16, 5, 32]> var_555_cast_fp16_0, tensor<fp16, [2, 16, 5, 32]> var_555_cast_fp16_1 = split(axis = var_555_axis_0, split_sizes = var_555_split_sizes_0, x = q_25_cast_fp16)[name = string("op_555_cast_fp16")];
|
| 389 |
+
fp16 const_33_promoted_to_fp16 = const()[name = string("const_33_promoted_to_fp16"), val = fp16(-0x1p+0)];
|
| 390 |
+
tensor<fp16, [2, 16, 5, 32]> var_557_cast_fp16 = mul(x = var_555_cast_fp16_1, y = const_33_promoted_to_fp16)[name = string("op_557_cast_fp16")];
|
| 391 |
+
bool var_559_interleave_0 = const()[name = string("op_559_interleave_0"), val = bool(false)];
|
| 392 |
+
tensor<fp16, [2, 16, 5, 64]> var_559_cast_fp16 = concat(axis = var_502, interleave = var_559_interleave_0, values = (var_557_cast_fp16, var_555_cast_fp16_0))[name = string("op_559_cast_fp16")];
|
| 393 |
+
tensor<fp16, [2, 16, 5, 64]> var_560_cast_fp16 = mul(x = var_559_cast_fp16, y = sin_to_fp16)[name = string("op_560_cast_fp16")];
|
| 394 |
+
tensor<fp16, [2, 16, 5, 64]> q_embed_cast_fp16 = add(x = var_554_cast_fp16, y = var_560_cast_fp16)[name = string("q_embed_cast_fp16")];
|
| 395 |
+
tensor<fp16, [2, 2, 5, 64]> k_25_cast_fp16 = transpose(perm = k_25_perm_0, x = var_547_cast_fp16)[name = string("transpose_3")];
|
| 396 |
+
tensor<fp16, [2, 2, 5, 64]> var_562_cast_fp16 = mul(x = k_25_cast_fp16, y = cos_to_fp16)[name = string("op_562_cast_fp16")];
|
| 397 |
+
tensor<int32, [2]> var_563_split_sizes_0 = const()[name = string("op_563_split_sizes_0"), val = tensor<int32, [2]>([32, 32])];
|
| 398 |
+
int32 var_563_axis_0 = const()[name = string("op_563_axis_0"), val = int32(-1)];
|
| 399 |
+
tensor<fp16, [2, 2, 5, 32]> var_563_cast_fp16_0, tensor<fp16, [2, 2, 5, 32]> var_563_cast_fp16_1 = split(axis = var_563_axis_0, split_sizes = var_563_split_sizes_0, x = k_25_cast_fp16)[name = string("op_563_cast_fp16")];
|
| 400 |
+
fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)];
|
| 401 |
+
tensor<fp16, [2, 2, 5, 32]> var_565_cast_fp16 = mul(x = var_563_cast_fp16_1, y = const_34_promoted_to_fp16)[name = string("op_565_cast_fp16")];
|
| 402 |
+
bool var_567_interleave_0 = const()[name = string("op_567_interleave_0"), val = bool(false)];
|
| 403 |
+
tensor<fp16, [2, 2, 5, 64]> var_567_cast_fp16 = concat(axis = var_502, interleave = var_567_interleave_0, values = (var_565_cast_fp16, var_563_cast_fp16_0))[name = string("op_567_cast_fp16")];
|
| 404 |
+
tensor<fp16, [2, 2, 5, 64]> var_568_cast_fp16 = mul(x = var_567_cast_fp16, y = sin_to_fp16)[name = string("op_568_cast_fp16")];
|
| 405 |
+
tensor<fp16, [2, 2, 5, 64]> k_embed_cast_fp16 = add(x = var_562_cast_fp16, y = var_568_cast_fp16)[name = string("k_embed_cast_fp16")];
|
| 406 |
+
tensor<int32, [2]> var_575_split_sizes_0 = const()[name = string("op_575_split_sizes_0"), val = tensor<int32, [2]>([8, 8])];
|
| 407 |
+
int32 var_575_axis_0 = const()[name = string("op_575_axis_0"), val = int32(1)];
|
| 408 |
+
tensor<fp16, [2, 8, 5, 64]> var_575_cast_fp16_0, tensor<fp16, [2, 8, 5, 64]> var_575_cast_fp16_1 = split(axis = var_575_axis_0, split_sizes = var_575_split_sizes_0, x = q_embed_cast_fp16)[name = string("op_575_cast_fp16")];
|
| 409 |
+
tensor<int32, [2]> var_577_split_sizes_0 = const()[name = string("op_577_split_sizes_0"), val = tensor<int32, [2]>([1, 1])];
|
| 410 |
+
int32 var_577_axis_0 = const()[name = string("op_577_axis_0"), val = int32(1)];
|
| 411 |
+
tensor<fp16, [2, 1, 5, 64]> var_577_cast_fp16_0, tensor<fp16, [2, 1, 5, 64]> var_577_cast_fp16_1 = split(axis = var_577_axis_0, split_sizes = var_577_split_sizes_0, x = k_embed_cast_fp16)[name = string("op_577_cast_fp16")];
|
| 412 |
+
tensor<int32, [2]> var_579_split_sizes_0 = const()[name = string("op_579_split_sizes_0"), val = tensor<int32, [2]>([1, 1])];
|
| 413 |
+
int32 var_579_axis_0 = const()[name = string("op_579_axis_0"), val = int32(1)];
|
| 414 |
+
tensor<fp16, [2, 2, 5, 64]> value_states_21_cast_fp16 = transpose(perm = value_states_21_perm_0, x = var_550_cast_fp16)[name = string("transpose_2")];
|
| 415 |
+
tensor<fp16, [2, 1, 5, 64]> var_579_cast_fp16_0, tensor<fp16, [2, 1, 5, 64]> var_579_cast_fp16_1 = split(axis = var_579_axis_0, split_sizes = var_579_split_sizes_0, x = value_states_21_cast_fp16)[name = string("op_579_cast_fp16")];
|
| 416 |
+
tensor<int32, [4]> var_577_0_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_577_0_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
|
| 417 |
+
tensor<fp16, [2, 8, 5, 64]> var_577_0_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_577_0_broadcast_to_same_batch_dims_reps_0, x = var_577_cast_fp16_0)[name = string("op_577_0_broadcast_to_same_batch_dims_cast_fp16")];
|
| 418 |
+
tensor<int32, [4]> var_579_0_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_579_0_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
|
| 419 |
+
tensor<fp16, [2, 8, 5, 64]> var_579_0_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_579_0_broadcast_to_same_batch_dims_reps_0, x = var_579_cast_fp16_0)[name = string("op_579_0_broadcast_to_same_batch_dims_cast_fp16")];
|
| 420 |
+
tensor<fp16, [2, 8, 5, 64]> var_581_cast_fp16 = scaled_dot_product_attention(key = var_577_0_broadcast_to_same_batch_dims_cast_fp16, query = var_575_cast_fp16_0, value = var_579_0_broadcast_to_same_batch_dims_cast_fp16)[name = string("op_581_cast_fp16")];
|
| 421 |
+
tensor<int32, [4]> var_577_1_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_577_1_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
|
| 422 |
+
tensor<fp16, [2, 8, 5, 64]> var_577_1_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_577_1_broadcast_to_same_batch_dims_reps_0, x = var_577_cast_fp16_1)[name = string("op_577_1_broadcast_to_same_batch_dims_cast_fp16")];
|
| 423 |
+
tensor<int32, [4]> var_579_1_broadcast_to_same_batch_dims_reps_0 = const()[name = string("op_579_1_broadcast_to_same_batch_dims_reps_0"), val = tensor<int32, [4]>([1, 8, 1, 1])];
|
| 424 |
+
tensor<fp16, [2, 8, 5, 64]> var_579_1_broadcast_to_same_batch_dims_cast_fp16 = tile(reps = var_579_1_broadcast_to_same_batch_dims_reps_0, x = var_579_cast_fp16_1)[name = string("op_579_1_broadcast_to_same_batch_dims_cast_fp16")];
|
| 425 |
+
tensor<fp16, [2, 8, 5, 64]> attn_output_19_cast_fp16 = scaled_dot_product_attention(key = var_577_1_broadcast_to_same_batch_dims_cast_fp16, query = var_575_cast_fp16_1, value = var_579_1_broadcast_to_same_batch_dims_cast_fp16)[name = string("attn_output_19_cast_fp16")];
|
| 426 |
+
bool attn_output_21_interleave_0 = const()[name = string("attn_output_21_interleave_0"), val = bool(false)];
|
| 427 |
+
tensor<fp16, [2, 16, 5, 64]> attn_output_21_cast_fp16 = concat(axis = var_497, interleave = attn_output_21_interleave_0, values = (var_581_cast_fp16, attn_output_19_cast_fp16))[name = string("attn_output_21_cast_fp16")];
|
| 428 |
+
tensor<int32, [4]> var_585_perm_0 = const()[name = string("op_585_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 429 |
+
tensor<int32, [3]> var_587 = const()[name = string("op_587"), val = tensor<int32, [3]>([2, 5, 1024])];
|
| 430 |
+
tensor<fp16, [2, 5, 16, 64]> var_585_cast_fp16 = transpose(perm = var_585_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_1")];
|
| 431 |
+
tensor<fp16, [2, 5, 1024]> input_41_cast_fp16 = reshape(shape = var_587, x = var_585_cast_fp16)[name = string("input_41_cast_fp16")];
|
| 432 |
+
tensor<fp16, [1024, 1024]> layer_decoder_layers_3_self_attn_layer_o_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_3_self_attn_layer_o_proj_weight_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100982400)))];
|
| 433 |
+
tensor<fp16, [2, 5, 1024]> linear_30_cast_fp16 = linear(bias = linear_6_bias_0_to_fp16, weight = layer_decoder_layers_3_self_attn_layer_o_proj_weight_to_fp16, x = input_41_cast_fp16)[name = string("linear_30_cast_fp16")];
|
| 434 |
+
tensor<fp16, [2, 5, 1024]> hidden_states_63_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = linear_30_cast_fp16)[name = string("hidden_states_63_cast_fp16")];
|
| 435 |
+
fp16 const_35_promoted_to_fp16 = const()[name = string("const_35_promoted_to_fp16"), val = fp16(-0x1p+0)];
|
| 436 |
+
tensor<fp16, [2, 5, 1024]> var_594_cast_fp16 = mul(x = hidden_states_63_cast_fp16, y = const_35_promoted_to_fp16)[name = string("op_594_cast_fp16")];
|
| 437 |
+
bool hidden_states_65_interleave_0 = const()[name = string("hidden_states_65_interleave_0"), val = bool(false)];
|
| 438 |
+
tensor<fp16, [2, 5, 2048]> hidden_states_65_cast_fp16 = concat(axis = var_502, interleave = hidden_states_65_interleave_0, values = (hidden_states_63_cast_fp16, var_594_cast_fp16))[name = string("hidden_states_65_cast_fp16")];
|
| 439 |
+
tensor<int32, [1]> hidden_states_67_axes_0 = const()[name = string("hidden_states_67_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 440 |
+
tensor<fp16, [2048]> weight_15_to_fp16 = const()[name = string("weight_15_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103079616)))];
|
| 441 |
+
tensor<fp16, [2, 5, 2048]> hidden_states_67_cast_fp16 = layer_norm(axes = hidden_states_67_axes_0, epsilon = var_505_to_fp16, gamma = weight_15_to_fp16, x = hidden_states_65_cast_fp16)[name = string("hidden_states_67_cast_fp16")];
|
| 442 |
+
tensor<int32, [2]> var_602_split_sizes_0 = const()[name = string("op_602_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
|
| 443 |
+
int32 var_602_axis_0 = const()[name = string("op_602_axis_0"), val = int32(-1)];
|
| 444 |
+
tensor<fp16, [2, 5, 1024]> var_602_cast_fp16_0, tensor<fp16, [2, 5, 1024]> var_602_cast_fp16_1 = split(axis = var_602_axis_0, split_sizes = var_602_split_sizes_0, x = hidden_states_67_cast_fp16)[name = string("op_602_cast_fp16")];
|
| 445 |
+
tensor<fp16, [4096, 1024]> layer_decoder_layers_3_mlp_gate_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_3_mlp_gate_proj_weight_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103083776)))];
|
| 446 |
+
tensor<fp16, [2, 5, 4096]> linear_31_cast_fp16 = linear(bias = linear_10_bias_0_to_fp16, weight = layer_decoder_layers_3_mlp_gate_proj_weight_to_fp16, x = var_602_cast_fp16_0)[name = string("linear_31_cast_fp16")];
|
| 447 |
+
tensor<fp16, [2, 5, 4096]> var_610_cast_fp16 = silu(x = linear_31_cast_fp16)[name = string("op_610_cast_fp16")];
|
| 448 |
+
tensor<fp16, [4096, 1024]> layer_decoder_layers_3_mlp_up_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_3_mlp_up_proj_weight_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111472448)))];
|
| 449 |
+
tensor<fp16, [2, 5, 4096]> linear_32_cast_fp16 = linear(bias = linear_10_bias_0_to_fp16, weight = layer_decoder_layers_3_mlp_up_proj_weight_to_fp16, x = var_602_cast_fp16_0)[name = string("linear_32_cast_fp16")];
|
| 450 |
+
tensor<fp16, [2, 5, 4096]> input_47_cast_fp16 = mul(x = var_610_cast_fp16, y = linear_32_cast_fp16)[name = string("input_47_cast_fp16")];
|
| 451 |
+
tensor<fp16, [1024, 4096]> layer_decoder_layers_3_mlp_down_proj_weight_to_fp16 = const()[name = string("layer_decoder_layers_3_mlp_down_proj_weight_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119861120)))];
|
| 452 |
+
tensor<fp16, [2, 5, 1024]> linear_33_cast_fp16 = linear(bias = linear_6_bias_0_to_fp16, weight = layer_decoder_layers_3_mlp_down_proj_weight_to_fp16, x = input_47_cast_fp16)[name = string("linear_33_cast_fp16")];
|
| 453 |
+
tensor<fp16, [2, 5, 1024]> hidden_states_71_cast_fp16 = add(x = hidden_states_63_cast_fp16, y = linear_33_cast_fp16)[name = string("hidden_states_71_cast_fp16")];
|
| 454 |
+
int32 var_623 = const()[name = string("op_623"), val = int32(-1)];
|
| 455 |
+
fp16 const_37_promoted_to_fp16 = const()[name = string("const_37_promoted_to_fp16"), val = fp16(-0x1p+0)];
|
| 456 |
+
tensor<fp16, [2, 5, 1024]> var_626_cast_fp16 = mul(x = hidden_states_71_cast_fp16, y = const_37_promoted_to_fp16)[name = string("op_626_cast_fp16")];
|
| 457 |
+
bool hidden_states_73_interleave_0 = const()[name = string("hidden_states_73_interleave_0"), val = bool(false)];
|
| 458 |
+
tensor<fp16, [2, 5, 2048]> hidden_states_73_cast_fp16 = concat(axis = var_623, interleave = hidden_states_73_interleave_0, values = (hidden_states_71_cast_fp16, var_626_cast_fp16))[name = string("hidden_states_73_cast_fp16")];
|
| 459 |
+
tensor<int32, [1]> hidden_states_75_axes_0 = const()[name = string("hidden_states_75_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 460 |
+
tensor<fp16, [2048]> weight_17_to_fp16 = const()[name = string("weight_17_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128249792)))];
|
| 461 |
+
fp16 var_620_to_fp16 = const()[name = string("op_620_to_fp16"), val = fp16(0x1.5p-17)];
|
| 462 |
+
tensor<fp16, [2, 5, 2048]> hidden_states_75_cast_fp16 = layer_norm(axes = hidden_states_75_axes_0, epsilon = var_620_to_fp16, gamma = weight_17_to_fp16, x = hidden_states_73_cast_fp16)[name = string("hidden_states_75_cast_fp16")];
|
| 463 |
+
tensor<int32, [2]> var_634_split_sizes_0 = const()[name = string("op_634_split_sizes_0"), val = tensor<int32, [2]>([1024, 1024])];
|
| 464 |
+
int32 var_634_axis_0 = const()[name = string("op_634_axis_0"), val = int32(-1)];
|
| 465 |
+
tensor<fp16, [2, 5, 1024]> var_634_cast_fp16_0, tensor<fp16, [2, 5, 1024]> var_634_cast_fp16_1 = split(axis = var_634_axis_0, split_sizes = var_634_split_sizes_0, x = hidden_states_75_cast_fp16)[name = string("op_634_cast_fp16")];
|
| 466 |
+
tensor<int32, [3]> var_652_begin_0 = const()[name = string("op_652_begin_0"), val = tensor<int32, [3]>([0, 3, 0])];
|
| 467 |
+
tensor<int32, [3]> var_652_end_0 = const()[name = string("op_652_end_0"), val = tensor<int32, [3]>([2, 5, 1024])];
|
| 468 |
+
tensor<bool, [3]> var_652_end_mask_0 = const()[name = string("op_652_end_mask_0"), val = tensor<bool, [3]>([true, true, true])];
|
| 469 |
+
tensor<fp16, [2, 2, 1024]> var_652_cast_fp16 = slice_by_index(begin = var_652_begin_0, end = var_652_end_0, end_mask = var_652_end_mask_0, x = var_634_cast_fp16_0)[name = string("op_652_cast_fp16")];
|
| 470 |
+
tensor<fp16, [64, 1024]> layer_out_proj_weight_to_fp16 = const()[name = string("layer_out_proj_weight_to_fp16"), val = tensor<fp16, [64, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128253952)))];
|
| 471 |
+
tensor<fp16, [64]> layer_out_proj_bias_to_fp16 = const()[name = string("layer_out_proj_bias_to_fp16"), val = tensor<fp16, [64]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128385088)))];
|
| 472 |
+
tensor<fp16, [2, 2, 64]> linear_34_cast_fp16 = linear(bias = layer_out_proj_bias_to_fp16, weight = layer_out_proj_weight_to_fp16, x = var_652_cast_fp16)[name = string("linear_34_cast_fp16")];
|
| 473 |
+
tensor<int32, [3]> var_663_perm_0 = const()[name = string("op_663_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
|
| 474 |
+
tensor<fp16, [2, 64, 2]> output = transpose(perm = var_663_perm_0, x = linear_34_cast_fp16)[name = string("transpose_0")];
|
| 475 |
+
} -> (output);
|
| 476 |
+
}
|
locdit_f16.mlmodelc/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eb2a8cbcb47f04d55895d79276eeaf6420517a39cd0b1dc7d018de1b06b21c48
|
| 3 |
+
size 128385280
|