Add FSMN-VAD CoreML (preprocessor + FSMN scorer + vad_config + card)
Browse files- FsmnVad.mlmodelc/analytics/coremldata.bin +3 -0
- FsmnVad.mlmodelc/coremldata.bin +3 -0
- FsmnVad.mlmodelc/model.mil +122 -0
- FsmnVad.mlmodelc/weights/weight.bin +3 -0
- FsmnVadPreprocessor.mlmodelc/analytics/coremldata.bin +3 -0
- FsmnVadPreprocessor.mlmodelc/coremldata.bin +3 -0
- FsmnVadPreprocessor.mlmodelc/model.mil +95 -0
- FsmnVadPreprocessor.mlmodelc/weights/weight.bin +3 -0
- README.md +45 -0
- vad_config.json +1 -0
FsmnVad.mlmodelc/analytics/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43a93af27ff0d3de4350b046af669f1d19991f6ee00ea75dff62e518e5e176a6
|
| 3 |
+
size 243
|
FsmnVad.mlmodelc/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:075fba34ad2315b9d7d07b99c0dfca42c14fa604e98f7363fff749fab0f1eba1
|
| 3 |
+
size 315
|
FsmnVad.mlmodelc/model.mil
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
program(1.0)
|
| 2 |
+
[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}, {"coremltools-component-torch", "2.5.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
|
| 3 |
+
{
|
| 4 |
+
func main<ios17>(tensor<fp32, [1, 3000, 400]> feats) {
|
| 5 |
+
tensor<int32, []> var_3 = const()[name = tensor<string, []>("op_3"), val = tensor<int32, []>(-1)];
|
| 6 |
+
tensor<int32, []> var_12 = const()[name = tensor<string, []>("op_12"), val = tensor<int32, []>(2)];
|
| 7 |
+
tensor<string, []> feats_to_fp16_dtype_0 = const()[name = tensor<string, []>("feats_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
|
| 8 |
+
tensor<fp16, [140, 400]> net_in_linear1_linear_weight_to_fp16 = const()[name = tensor<string, []>("net_in_linear1_linear_weight_to_fp16"), val = tensor<fp16, [140, 400]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
|
| 9 |
+
tensor<fp16, [140]> net_in_linear1_linear_bias_to_fp16 = const()[name = tensor<string, []>("net_in_linear1_linear_bias_to_fp16"), val = tensor<fp16, [140]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(112128)))];
|
| 10 |
+
tensor<fp16, [1, 3000, 400]> feats_to_fp16 = cast(dtype = feats_to_fp16_dtype_0, x = feats)[name = tensor<string, []>("cast_1")];
|
| 11 |
+
tensor<fp16, [1, 3000, 140]> linear_0_cast_fp16 = linear(bias = net_in_linear1_linear_bias_to_fp16, weight = net_in_linear1_linear_weight_to_fp16, x = feats_to_fp16)[name = tensor<string, []>("linear_0_cast_fp16")];
|
| 12 |
+
tensor<fp16, [250, 140]> net_in_linear2_linear_weight_to_fp16 = const()[name = tensor<string, []>("net_in_linear2_linear_weight_to_fp16"), val = tensor<fp16, [250, 140]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(112512)))];
|
| 13 |
+
tensor<fp16, [250]> net_in_linear2_linear_bias_to_fp16 = const()[name = tensor<string, []>("net_in_linear2_linear_bias_to_fp16"), val = tensor<fp16, [250]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(182592)))];
|
| 14 |
+
tensor<fp16, [1, 3000, 250]> linear_1_cast_fp16 = linear(bias = net_in_linear2_linear_bias_to_fp16, weight = net_in_linear2_linear_weight_to_fp16, x = linear_0_cast_fp16)[name = tensor<string, []>("linear_1_cast_fp16")];
|
| 15 |
+
tensor<fp16, [1, 3000, 250]> input_5_cast_fp16 = relu(x = linear_1_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
|
| 16 |
+
tensor<fp16, [128, 250]> net_fsmn_0_linear_linear_weight_to_fp16 = const()[name = tensor<string, []>("net_fsmn_0_linear_linear_weight_to_fp16"), val = tensor<fp16, [128, 250]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(183168)))];
|
| 17 |
+
tensor<fp16, [128]> linear_2_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_2_bias_0_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(247232)))];
|
| 18 |
+
tensor<fp16, [1, 3000, 128]> linear_2_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = net_fsmn_0_linear_linear_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("linear_2_cast_fp16")];
|
| 19 |
+
tensor<int32, [1]> x_1_axes_0 = const()[name = tensor<string, []>("x_1_axes_0"), val = tensor<int32, [1]>([1])];
|
| 20 |
+
tensor<fp16, [1, 1, 3000, 128]> x_1_cast_fp16 = expand_dims(axes = x_1_axes_0, x = linear_2_cast_fp16)[name = tensor<string, []>("x_1_cast_fp16")];
|
| 21 |
+
tensor<int32, [4]> var_45 = const()[name = tensor<string, []>("op_45"), val = tensor<int32, [4]>([0, 3, 2, 1])];
|
| 22 |
+
tensor<bool, []> y_left_1_interleave_0 = const()[name = tensor<string, []>("y_left_1_interleave_0"), val = tensor<bool, []>(false)];
|
| 23 |
+
tensor<fp16, [1, 128, 19, 1]> const_2_to_fp16 = const()[name = tensor<string, []>("const_2_to_fp16"), val = tensor<fp16, [1, 128, 19, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(247552)))];
|
| 24 |
+
tensor<fp16, [1, 128, 3000, 1]> x_per_1_cast_fp16 = transpose(perm = var_45, x = x_1_cast_fp16)[name = tensor<string, []>("transpose_7")];
|
| 25 |
+
tensor<fp16, [1, 128, 3019, 1]> y_left_1_cast_fp16 = concat(axis = var_12, interleave = y_left_1_interleave_0, values = (const_2_to_fp16, x_per_1_cast_fp16))[name = tensor<string, []>("y_left_1_cast_fp16")];
|
| 26 |
+
tensor<string, []> y_left_3_pad_type_0 = const()[name = tensor<string, []>("y_left_3_pad_type_0"), val = tensor<string, []>("valid")];
|
| 27 |
+
tensor<int32, []> y_left_3_groups_0 = const()[name = tensor<string, []>("y_left_3_groups_0"), val = tensor<int32, []>(128)];
|
| 28 |
+
tensor<int32, [2]> y_left_3_strides_0 = const()[name = tensor<string, []>("y_left_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
|
| 29 |
+
tensor<int32, [4]> y_left_3_pad_0 = const()[name = tensor<string, []>("y_left_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
|
| 30 |
+
tensor<int32, [2]> y_left_3_dilations_0 = const()[name = tensor<string, []>("y_left_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
|
| 31 |
+
tensor<fp16, [128, 1, 20, 1]> net_fsmn_0_fsmn_block_conv_left_weight_to_fp16 = const()[name = tensor<string, []>("net_fsmn_0_fsmn_block_conv_left_weight_to_fp16"), val = tensor<fp16, [128, 1, 20, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(252480)))];
|
| 32 |
+
tensor<fp16, [1, 128, 3000, 1]> y_left_3_cast_fp16 = conv(dilations = y_left_3_dilations_0, groups = y_left_3_groups_0, pad = y_left_3_pad_0, pad_type = y_left_3_pad_type_0, strides = y_left_3_strides_0, weight = net_fsmn_0_fsmn_block_conv_left_weight_to_fp16, x = y_left_1_cast_fp16)[name = tensor<string, []>("y_left_3_cast_fp16")];
|
| 33 |
+
tensor<fp16, [1, 128, 3000, 1]> out_1_cast_fp16 = add(x = x_per_1_cast_fp16, y = y_left_3_cast_fp16)[name = tensor<string, []>("out_1_cast_fp16")];
|
| 34 |
+
tensor<int32, [4]> var_57 = const()[name = tensor<string, []>("op_57"), val = tensor<int32, [4]>([0, 3, 2, 1])];
|
| 35 |
+
tensor<int32, [1]> input_7_axes_0 = const()[name = tensor<string, []>("input_7_axes_0"), val = tensor<int32, [1]>([1])];
|
| 36 |
+
tensor<fp16, [1, 1, 3000, 128]> out_per_1_cast_fp16 = transpose(perm = var_57, x = out_1_cast_fp16)[name = tensor<string, []>("transpose_6")];
|
| 37 |
+
tensor<fp16, [1, 3000, 128]> input_7_cast_fp16 = squeeze(axes = input_7_axes_0, x = out_per_1_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
|
| 38 |
+
tensor<fp16, [250, 128]> net_fsmn_0_affine_linear_weight_to_fp16 = const()[name = tensor<string, []>("net_fsmn_0_affine_linear_weight_to_fp16"), val = tensor<fp16, [250, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(257664)))];
|
| 39 |
+
tensor<fp16, [250]> net_fsmn_0_affine_linear_bias_to_fp16 = const()[name = tensor<string, []>("net_fsmn_0_affine_linear_bias_to_fp16"), val = tensor<fp16, [250]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(321728)))];
|
| 40 |
+
tensor<fp16, [1, 3000, 250]> linear_3_cast_fp16 = linear(bias = net_fsmn_0_affine_linear_bias_to_fp16, weight = net_fsmn_0_affine_linear_weight_to_fp16, x = input_7_cast_fp16)[name = tensor<string, []>("linear_3_cast_fp16")];
|
| 41 |
+
tensor<fp16, [1, 3000, 250]> input_11_cast_fp16 = relu(x = linear_3_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
|
| 42 |
+
tensor<fp16, [128, 250]> net_fsmn_1_linear_linear_weight_to_fp16 = const()[name = tensor<string, []>("net_fsmn_1_linear_linear_weight_to_fp16"), val = tensor<fp16, [128, 250]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(322304)))];
|
| 43 |
+
tensor<fp16, [1, 3000, 128]> linear_4_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = net_fsmn_1_linear_linear_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("linear_4_cast_fp16")];
|
| 44 |
+
tensor<int32, [1]> x_3_axes_0 = const()[name = tensor<string, []>("x_3_axes_0"), val = tensor<int32, [1]>([1])];
|
| 45 |
+
tensor<fp16, [1, 1, 3000, 128]> x_3_cast_fp16 = expand_dims(axes = x_3_axes_0, x = linear_4_cast_fp16)[name = tensor<string, []>("x_3_cast_fp16")];
|
| 46 |
+
tensor<int32, [4]> var_77 = const()[name = tensor<string, []>("op_77"), val = tensor<int32, [4]>([0, 3, 2, 1])];
|
| 47 |
+
tensor<bool, []> y_left_5_interleave_0 = const()[name = tensor<string, []>("y_left_5_interleave_0"), val = tensor<bool, []>(false)];
|
| 48 |
+
tensor<fp16, [1, 128, 3000, 1]> x_per_3_cast_fp16 = transpose(perm = var_77, x = x_3_cast_fp16)[name = tensor<string, []>("transpose_5")];
|
| 49 |
+
tensor<fp16, [1, 128, 3019, 1]> y_left_5_cast_fp16 = concat(axis = var_12, interleave = y_left_5_interleave_0, values = (const_2_to_fp16, x_per_3_cast_fp16))[name = tensor<string, []>("y_left_5_cast_fp16")];
|
| 50 |
+
tensor<string, []> y_left_7_pad_type_0 = const()[name = tensor<string, []>("y_left_7_pad_type_0"), val = tensor<string, []>("valid")];
|
| 51 |
+
tensor<int32, []> y_left_7_groups_0 = const()[name = tensor<string, []>("y_left_7_groups_0"), val = tensor<int32, []>(128)];
|
| 52 |
+
tensor<int32, [2]> y_left_7_strides_0 = const()[name = tensor<string, []>("y_left_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
|
| 53 |
+
tensor<int32, [4]> y_left_7_pad_0 = const()[name = tensor<string, []>("y_left_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
|
| 54 |
+
tensor<int32, [2]> y_left_7_dilations_0 = const()[name = tensor<string, []>("y_left_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
|
| 55 |
+
tensor<fp16, [128, 1, 20, 1]> net_fsmn_1_fsmn_block_conv_left_weight_to_fp16 = const()[name = tensor<string, []>("net_fsmn_1_fsmn_block_conv_left_weight_to_fp16"), val = tensor<fp16, [128, 1, 20, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(386368)))];
|
| 56 |
+
tensor<fp16, [1, 128, 3000, 1]> y_left_7_cast_fp16 = conv(dilations = y_left_7_dilations_0, groups = y_left_7_groups_0, pad = y_left_7_pad_0, pad_type = y_left_7_pad_type_0, strides = y_left_7_strides_0, weight = net_fsmn_1_fsmn_block_conv_left_weight_to_fp16, x = y_left_5_cast_fp16)[name = tensor<string, []>("y_left_7_cast_fp16")];
|
| 57 |
+
tensor<fp16, [1, 128, 3000, 1]> out_3_cast_fp16 = add(x = x_per_3_cast_fp16, y = y_left_7_cast_fp16)[name = tensor<string, []>("out_3_cast_fp16")];
|
| 58 |
+
tensor<int32, [4]> var_89 = const()[name = tensor<string, []>("op_89"), val = tensor<int32, [4]>([0, 3, 2, 1])];
|
| 59 |
+
tensor<int32, [1]> input_13_axes_0 = const()[name = tensor<string, []>("input_13_axes_0"), val = tensor<int32, [1]>([1])];
|
| 60 |
+
tensor<fp16, [1, 1, 3000, 128]> out_per_3_cast_fp16 = transpose(perm = var_89, x = out_3_cast_fp16)[name = tensor<string, []>("transpose_4")];
|
| 61 |
+
tensor<fp16, [1, 3000, 128]> input_13_cast_fp16 = squeeze(axes = input_13_axes_0, x = out_per_3_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
|
| 62 |
+
tensor<fp16, [250, 128]> net_fsmn_1_affine_linear_weight_to_fp16 = const()[name = tensor<string, []>("net_fsmn_1_affine_linear_weight_to_fp16"), val = tensor<fp16, [250, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(391552)))];
|
| 63 |
+
tensor<fp16, [250]> net_fsmn_1_affine_linear_bias_to_fp16 = const()[name = tensor<string, []>("net_fsmn_1_affine_linear_bias_to_fp16"), val = tensor<fp16, [250]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(455616)))];
|
| 64 |
+
tensor<fp16, [1, 3000, 250]> linear_5_cast_fp16 = linear(bias = net_fsmn_1_affine_linear_bias_to_fp16, weight = net_fsmn_1_affine_linear_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("linear_5_cast_fp16")];
|
| 65 |
+
tensor<fp16, [1, 3000, 250]> input_17_cast_fp16 = relu(x = linear_5_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
|
| 66 |
+
tensor<fp16, [128, 250]> net_fsmn_2_linear_linear_weight_to_fp16 = const()[name = tensor<string, []>("net_fsmn_2_linear_linear_weight_to_fp16"), val = tensor<fp16, [128, 250]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(456192)))];
|
| 67 |
+
tensor<fp16, [1, 3000, 128]> linear_6_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = net_fsmn_2_linear_linear_weight_to_fp16, x = input_17_cast_fp16)[name = tensor<string, []>("linear_6_cast_fp16")];
|
| 68 |
+
tensor<int32, [1]> x_5_axes_0 = const()[name = tensor<string, []>("x_5_axes_0"), val = tensor<int32, [1]>([1])];
|
| 69 |
+
tensor<fp16, [1, 1, 3000, 128]> x_5_cast_fp16 = expand_dims(axes = x_5_axes_0, x = linear_6_cast_fp16)[name = tensor<string, []>("x_5_cast_fp16")];
|
| 70 |
+
tensor<int32, [4]> var_109 = const()[name = tensor<string, []>("op_109"), val = tensor<int32, [4]>([0, 3, 2, 1])];
|
| 71 |
+
tensor<bool, []> y_left_9_interleave_0 = const()[name = tensor<string, []>("y_left_9_interleave_0"), val = tensor<bool, []>(false)];
|
| 72 |
+
tensor<fp16, [1, 128, 3000, 1]> x_per_5_cast_fp16 = transpose(perm = var_109, x = x_5_cast_fp16)[name = tensor<string, []>("transpose_3")];
|
| 73 |
+
tensor<fp16, [1, 128, 3019, 1]> y_left_9_cast_fp16 = concat(axis = var_12, interleave = y_left_9_interleave_0, values = (const_2_to_fp16, x_per_5_cast_fp16))[name = tensor<string, []>("y_left_9_cast_fp16")];
|
| 74 |
+
tensor<string, []> y_left_11_pad_type_0 = const()[name = tensor<string, []>("y_left_11_pad_type_0"), val = tensor<string, []>("valid")];
|
| 75 |
+
tensor<int32, []> y_left_11_groups_0 = const()[name = tensor<string, []>("y_left_11_groups_0"), val = tensor<int32, []>(128)];
|
| 76 |
+
tensor<int32, [2]> y_left_11_strides_0 = const()[name = tensor<string, []>("y_left_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
|
| 77 |
+
tensor<int32, [4]> y_left_11_pad_0 = const()[name = tensor<string, []>("y_left_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
|
| 78 |
+
tensor<int32, [2]> y_left_11_dilations_0 = const()[name = tensor<string, []>("y_left_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
|
| 79 |
+
tensor<fp16, [128, 1, 20, 1]> net_fsmn_2_fsmn_block_conv_left_weight_to_fp16 = const()[name = tensor<string, []>("net_fsmn_2_fsmn_block_conv_left_weight_to_fp16"), val = tensor<fp16, [128, 1, 20, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(520256)))];
|
| 80 |
+
tensor<fp16, [1, 128, 3000, 1]> y_left_11_cast_fp16 = conv(dilations = y_left_11_dilations_0, groups = y_left_11_groups_0, pad = y_left_11_pad_0, pad_type = y_left_11_pad_type_0, strides = y_left_11_strides_0, weight = net_fsmn_2_fsmn_block_conv_left_weight_to_fp16, x = y_left_9_cast_fp16)[name = tensor<string, []>("y_left_11_cast_fp16")];
|
| 81 |
+
tensor<fp16, [1, 128, 3000, 1]> out_5_cast_fp16 = add(x = x_per_5_cast_fp16, y = y_left_11_cast_fp16)[name = tensor<string, []>("out_5_cast_fp16")];
|
| 82 |
+
tensor<int32, [4]> var_121 = const()[name = tensor<string, []>("op_121"), val = tensor<int32, [4]>([0, 3, 2, 1])];
|
| 83 |
+
tensor<int32, [1]> input_19_axes_0 = const()[name = tensor<string, []>("input_19_axes_0"), val = tensor<int32, [1]>([1])];
|
| 84 |
+
tensor<fp16, [1, 1, 3000, 128]> out_per_5_cast_fp16 = transpose(perm = var_121, x = out_5_cast_fp16)[name = tensor<string, []>("transpose_2")];
|
| 85 |
+
tensor<fp16, [1, 3000, 128]> input_19_cast_fp16 = squeeze(axes = input_19_axes_0, x = out_per_5_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
|
| 86 |
+
tensor<fp16, [250, 128]> net_fsmn_2_affine_linear_weight_to_fp16 = const()[name = tensor<string, []>("net_fsmn_2_affine_linear_weight_to_fp16"), val = tensor<fp16, [250, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(525440)))];
|
| 87 |
+
tensor<fp16, [250]> net_fsmn_2_affine_linear_bias_to_fp16 = const()[name = tensor<string, []>("net_fsmn_2_affine_linear_bias_to_fp16"), val = tensor<fp16, [250]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(589504)))];
|
| 88 |
+
tensor<fp16, [1, 3000, 250]> linear_7_cast_fp16 = linear(bias = net_fsmn_2_affine_linear_bias_to_fp16, weight = net_fsmn_2_affine_linear_weight_to_fp16, x = input_19_cast_fp16)[name = tensor<string, []>("linear_7_cast_fp16")];
|
| 89 |
+
tensor<fp16, [1, 3000, 250]> input_23_cast_fp16 = relu(x = linear_7_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
|
| 90 |
+
tensor<fp16, [128, 250]> net_fsmn_3_linear_linear_weight_to_fp16 = const()[name = tensor<string, []>("net_fsmn_3_linear_linear_weight_to_fp16"), val = tensor<fp16, [128, 250]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(590080)))];
|
| 91 |
+
tensor<fp16, [1, 3000, 128]> linear_8_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = net_fsmn_3_linear_linear_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("linear_8_cast_fp16")];
|
| 92 |
+
tensor<int32, [1]> x_axes_0 = const()[name = tensor<string, []>("x_axes_0"), val = tensor<int32, [1]>([1])];
|
| 93 |
+
tensor<fp16, [1, 1, 3000, 128]> x_cast_fp16 = expand_dims(axes = x_axes_0, x = linear_8_cast_fp16)[name = tensor<string, []>("x_cast_fp16")];
|
| 94 |
+
tensor<int32, [4]> var_141 = const()[name = tensor<string, []>("op_141"), val = tensor<int32, [4]>([0, 3, 2, 1])];
|
| 95 |
+
tensor<bool, []> y_left_13_interleave_0 = const()[name = tensor<string, []>("y_left_13_interleave_0"), val = tensor<bool, []>(false)];
|
| 96 |
+
tensor<fp16, [1, 128, 3000, 1]> x_per_cast_fp16 = transpose(perm = var_141, x = x_cast_fp16)[name = tensor<string, []>("transpose_1")];
|
| 97 |
+
tensor<fp16, [1, 128, 3019, 1]> y_left_13_cast_fp16 = concat(axis = var_12, interleave = y_left_13_interleave_0, values = (const_2_to_fp16, x_per_cast_fp16))[name = tensor<string, []>("y_left_13_cast_fp16")];
|
| 98 |
+
tensor<string, []> y_left_pad_type_0 = const()[name = tensor<string, []>("y_left_pad_type_0"), val = tensor<string, []>("valid")];
|
| 99 |
+
tensor<int32, []> y_left_groups_0 = const()[name = tensor<string, []>("y_left_groups_0"), val = tensor<int32, []>(128)];
|
| 100 |
+
tensor<int32, [2]> y_left_strides_0 = const()[name = tensor<string, []>("y_left_strides_0"), val = tensor<int32, [2]>([1, 1])];
|
| 101 |
+
tensor<int32, [4]> y_left_pad_0 = const()[name = tensor<string, []>("y_left_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
|
| 102 |
+
tensor<int32, [2]> y_left_dilations_0 = const()[name = tensor<string, []>("y_left_dilations_0"), val = tensor<int32, [2]>([1, 1])];
|
| 103 |
+
tensor<fp16, [128, 1, 20, 1]> net_fsmn_3_fsmn_block_conv_left_weight_to_fp16 = const()[name = tensor<string, []>("net_fsmn_3_fsmn_block_conv_left_weight_to_fp16"), val = tensor<fp16, [128, 1, 20, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(654144)))];
|
| 104 |
+
tensor<fp16, [1, 128, 3000, 1]> y_left_cast_fp16 = conv(dilations = y_left_dilations_0, groups = y_left_groups_0, pad = y_left_pad_0, pad_type = y_left_pad_type_0, strides = y_left_strides_0, weight = net_fsmn_3_fsmn_block_conv_left_weight_to_fp16, x = y_left_13_cast_fp16)[name = tensor<string, []>("y_left_cast_fp16")];
|
| 105 |
+
tensor<fp16, [1, 128, 3000, 1]> out_cast_fp16 = add(x = x_per_cast_fp16, y = y_left_cast_fp16)[name = tensor<string, []>("out_cast_fp16")];
|
| 106 |
+
tensor<int32, [4]> var_153 = const()[name = tensor<string, []>("op_153"), val = tensor<int32, [4]>([0, 3, 2, 1])];
|
| 107 |
+
tensor<int32, [1]> input_25_axes_0 = const()[name = tensor<string, []>("input_25_axes_0"), val = tensor<int32, [1]>([1])];
|
| 108 |
+
tensor<fp16, [1, 1, 3000, 128]> out_per_cast_fp16 = transpose(perm = var_153, x = out_cast_fp16)[name = tensor<string, []>("transpose_0")];
|
| 109 |
+
tensor<fp16, [1, 3000, 128]> input_25_cast_fp16 = squeeze(axes = input_25_axes_0, x = out_per_cast_fp16)[name = tensor<string, []>("input_25_cast_fp16")];
|
| 110 |
+
tensor<fp16, [250, 128]> net_fsmn_3_affine_linear_weight_to_fp16 = const()[name = tensor<string, []>("net_fsmn_3_affine_linear_weight_to_fp16"), val = tensor<fp16, [250, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(659328)))];
|
| 111 |
+
tensor<fp16, [250]> net_fsmn_3_affine_linear_bias_to_fp16 = const()[name = tensor<string, []>("net_fsmn_3_affine_linear_bias_to_fp16"), val = tensor<fp16, [250]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(723392)))];
|
| 112 |
+
tensor<fp16, [1, 3000, 250]> linear_9_cast_fp16 = linear(bias = net_fsmn_3_affine_linear_bias_to_fp16, weight = net_fsmn_3_affine_linear_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("linear_9_cast_fp16")];
|
| 113 |
+
tensor<fp16, [1, 3000, 250]> input_29_cast_fp16 = relu(x = linear_9_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
|
| 114 |
+
tensor<fp16, [140, 250]> net_out_linear1_linear_weight_to_fp16 = const()[name = tensor<string, []>("net_out_linear1_linear_weight_to_fp16"), val = tensor<fp16, [140, 250]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(723968)))];
|
| 115 |
+
tensor<fp16, [140]> net_out_linear1_linear_bias_to_fp16 = const()[name = tensor<string, []>("net_out_linear1_linear_bias_to_fp16"), val = tensor<fp16, [140]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(794048)))];
|
| 116 |
+
tensor<fp16, [1, 3000, 140]> linear_10_cast_fp16 = linear(bias = net_out_linear1_linear_bias_to_fp16, weight = net_out_linear1_linear_weight_to_fp16, x = input_29_cast_fp16)[name = tensor<string, []>("linear_10_cast_fp16")];
|
| 117 |
+
tensor<fp16, [248, 140]> net_out_linear2_linear_weight_to_fp16 = const()[name = tensor<string, []>("net_out_linear2_linear_weight_to_fp16"), val = tensor<fp16, [248, 140]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(794432)))];
|
| 118 |
+
tensor<fp16, [248]> net_out_linear2_linear_bias_to_fp16 = const()[name = tensor<string, []>("net_out_linear2_linear_bias_to_fp16"), val = tensor<fp16, [248]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(863936)))];
|
| 119 |
+
tensor<fp16, [1, 3000, 248]> linear_11_cast_fp16 = linear(bias = net_out_linear2_linear_bias_to_fp16, weight = net_out_linear2_linear_weight_to_fp16, x = linear_10_cast_fp16)[name = tensor<string, []>("linear_11_cast_fp16")];
|
| 120 |
+
tensor<fp16, [1, 3000, 248]> scores = softmax(axis = var_3, x = linear_11_cast_fp16)[name = tensor<string, []>("op_169_cast_fp16")];
|
| 121 |
+
} -> (scores);
|
| 122 |
+
}
|
FsmnVad.mlmodelc/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b115e3a6ff7b89778fe9c2cd6d65f739ab64438b8ce1eb18de056707898b3ec
|
| 3 |
+
size 864496
|
FsmnVadPreprocessor.mlmodelc/analytics/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc1415dd703db292d58b11d8835b8cbf106f4d3fb6c6ada2b23cdbd3f65686b0
|
| 3 |
+
size 243
|
FsmnVadPreprocessor.mlmodelc/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:000b61a371786be363c00a8fa5542f399cdb32e21c30935d18396ed21c24ec13
|
| 3 |
+
size 331
|
FsmnVadPreprocessor.mlmodelc/model.mil
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
program(1.0)
|
| 2 |
+
[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}, {"coremltools-component-torch", "2.5.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
|
| 3 |
+
{
|
| 4 |
+
func main<ios17>(tensor<fp32, [1, ?]> waveform) [FlexibleShapeInformation = tuple<tuple<tensor<string, []>, dict<tensor<string, []>, tensor<int32, [?]>>>, tuple<tensor<string, []>, dict<tensor<string, []>, list<tensor<int32, [2]>, ?>>>>((("DefaultShapes", {{"waveform", [1, 160000]}}), ("RangeDims", {{"waveform", [[1, 1], [8000, 4800000]]}})))] {
|
| 5 |
+
tensor<fp32, [400]> cmvn_inv_std = const()[name = tensor<string, []>("cmvn_inv_std"), val = tensor<fp32, [400]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
|
| 6 |
+
tensor<fp32, [400]> cmvn_neg_mean = const()[name = tensor<string, []>("cmvn_neg_mean"), val = tensor<fp32, [400]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1728)))];
|
| 7 |
+
tensor<fp32, [400, 80, 5]> lfr_kernel = const()[name = tensor<string, []>("lfr_kernel"), val = tensor<fp32, [400, 80, 5]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3392)))];
|
| 8 |
+
tensor<fp32, [400]> window = const()[name = tensor<string, []>("window"), val = tensor<fp32, [400]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(643456)))];
|
| 9 |
+
tensor<fp32, [400, 1, 400]> frame_kernel = const()[name = tensor<string, []>("frame_kernel"), val = tensor<fp32, [400, 1, 400]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(645120)))];
|
| 10 |
+
tensor<int32, [1]> var_11_axes_0 = const()[name = tensor<string, []>("op_11_axes_0"), val = tensor<int32, [1]>([1])];
|
| 11 |
+
tensor<fp32, [1, 1, ?]> var_11 = expand_dims(axes = var_11_axes_0, x = waveform)[name = tensor<string, []>("op_11")];
|
| 12 |
+
tensor<string, []> var_27_pad_type_0 = const()[name = tensor<string, []>("op_27_pad_type_0"), val = tensor<string, []>("valid")];
|
| 13 |
+
tensor<int32, [1]> var_27_strides_0 = const()[name = tensor<string, []>("op_27_strides_0"), val = tensor<int32, [1]>([160])];
|
| 14 |
+
tensor<int32, [2]> var_27_pad_0 = const()[name = tensor<string, []>("op_27_pad_0"), val = tensor<int32, [2]>([0, 0])];
|
| 15 |
+
tensor<int32, [1]> var_27_dilations_0 = const()[name = tensor<string, []>("op_27_dilations_0"), val = tensor<int32, [1]>([1])];
|
| 16 |
+
tensor<int32, []> var_27_groups_0 = const()[name = tensor<string, []>("op_27_groups_0"), val = tensor<int32, []>(1)];
|
| 17 |
+
tensor<fp32, [1, 400, ?]> var_27 = conv(dilations = var_27_dilations_0, groups = var_27_groups_0, pad = var_27_pad_0, pad_type = var_27_pad_type_0, strides = var_27_strides_0, weight = frame_kernel, x = var_11)[name = tensor<string, []>("op_27")];
|
| 18 |
+
tensor<int32, [3]> var_30_begin_0 = const()[name = tensor<string, []>("op_30_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
|
| 19 |
+
tensor<int32, [3]> var_30_end_0 = const()[name = tensor<string, []>("op_30_end_0"), val = tensor<int32, [3]>([1, 400, 0])];
|
| 20 |
+
tensor<bool, [3]> var_30_end_mask_0 = const()[name = tensor<string, []>("op_30_end_mask_0"), val = tensor<bool, [3]>([false, true, true])];
|
| 21 |
+
tensor<bool, [3]> var_30_squeeze_mask_0 = const()[name = tensor<string, []>("op_30_squeeze_mask_0"), val = tensor<bool, [3]>([true, false, false])];
|
| 22 |
+
tensor<fp32, [400, ?]> var_30 = slice_by_index(begin = var_30_begin_0, end = var_30_end_0, end_mask = var_30_end_mask_0, squeeze_mask = var_30_squeeze_mask_0, x = var_27)[name = tensor<string, []>("op_30")];
|
| 23 |
+
tensor<int32, [2]> frames_1_perm_0 = const()[name = tensor<string, []>("frames_1_perm_0"), val = tensor<int32, [2]>([1, 0])];
|
| 24 |
+
tensor<int32, [1]> var_36_axes_0 = const()[name = tensor<string, []>("op_36_axes_0"), val = tensor<int32, [1]>([1])];
|
| 25 |
+
tensor<bool, []> var_36_keep_dims_0 = const()[name = tensor<string, []>("op_36_keep_dims_0"), val = tensor<bool, []>(true)];
|
| 26 |
+
tensor<fp32, [?, 400]> frames_1 = transpose(perm = frames_1_perm_0, x = var_30)[name = tensor<string, []>("transpose_5")];
|
| 27 |
+
tensor<fp32, [?, 1]> var_36 = reduce_mean(axes = var_36_axes_0, keep_dims = var_36_keep_dims_0, x = frames_1)[name = tensor<string, []>("op_36")];
|
| 28 |
+
tensor<fp32, [?, 400]> frames_3 = sub(x = frames_1, y = var_36)[name = tensor<string, []>("frames_3")];
|
| 29 |
+
tensor<int32, [2]> var_48_begin_0 = const()[name = tensor<string, []>("op_48_begin_0"), val = tensor<int32, [2]>([0, 0])];
|
| 30 |
+
tensor<int32, [2]> var_48_end_0 = const()[name = tensor<string, []>("op_48_end_0"), val = tensor<int32, [2]>([0, 1])];
|
| 31 |
+
tensor<bool, [2]> var_48_end_mask_0 = const()[name = tensor<string, []>("op_48_end_mask_0"), val = tensor<bool, [2]>([true, false])];
|
| 32 |
+
tensor<fp32, [?, 1]> var_48 = slice_by_index(begin = var_48_begin_0, end = var_48_end_0, end_mask = var_48_end_mask_0, x = frames_3)[name = tensor<string, []>("op_48")];
|
| 33 |
+
tensor<int32, [2]> var_58_begin_0 = const()[name = tensor<string, []>("op_58_begin_0"), val = tensor<int32, [2]>([0, 0])];
|
| 34 |
+
tensor<int32, [2]> var_58_end_0 = const()[name = tensor<string, []>("op_58_end_0"), val = tensor<int32, [2]>([0, 399])];
|
| 35 |
+
tensor<bool, [2]> var_58_end_mask_0 = const()[name = tensor<string, []>("op_58_end_mask_0"), val = tensor<bool, [2]>([true, false])];
|
| 36 |
+
tensor<fp32, [?, 399]> var_58 = slice_by_index(begin = var_58_begin_0, end = var_58_end_0, end_mask = var_58_end_mask_0, x = frames_3)[name = tensor<string, []>("op_58")];
|
| 37 |
+
tensor<int32, []> var_60 = const()[name = tensor<string, []>("op_60"), val = tensor<int32, []>(1)];
|
| 38 |
+
tensor<bool, []> shifted_interleave_0 = const()[name = tensor<string, []>("shifted_interleave_0"), val = tensor<bool, []>(false)];
|
| 39 |
+
tensor<fp32, [?, 400]> shifted = concat(axis = var_60, interleave = shifted_interleave_0, values = (var_48, var_58))[name = tensor<string, []>("shifted")];
|
| 40 |
+
tensor<fp32, []> var_62 = const()[name = tensor<string, []>("op_62"), val = tensor<fp32, []>(0x1.f0a3d8p-1)];
|
| 41 |
+
tensor<fp32, [?, 400]> var_63 = mul(x = shifted, y = var_62)[name = tensor<string, []>("op_63")];
|
| 42 |
+
tensor<fp32, [?, 400]> frames_5 = sub(x = frames_3, y = var_63)[name = tensor<string, []>("frames_5")];
|
| 43 |
+
tensor<fp32, [?, 400]> input = mul(x = frames_5, y = window)[name = tensor<string, []>("input")];
|
| 44 |
+
tensor<fp32, []> const_0 = const()[name = tensor<string, []>("const_0"), val = tensor<fp32, []>(0x0p+0)];
|
| 45 |
+
tensor<int32, [4]> frames_pad_0 = const()[name = tensor<string, []>("frames_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 112])];
|
| 46 |
+
tensor<string, []> frames_mode_0 = const()[name = tensor<string, []>("frames_mode_0"), val = tensor<string, []>("constant")];
|
| 47 |
+
tensor<fp32, [?, 512]> frames = pad(constant_val = const_0, mode = frames_mode_0, pad = frames_pad_0, x = input)[name = tensor<string, []>("frames")];
|
| 48 |
+
tensor<fp32, [257, 512]> transpose_0 = const()[name = tensor<string, []>("transpose_0"), val = tensor<fp32, [257, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1285184)))];
|
| 49 |
+
tensor<fp32, [257]> re_bias_0 = const()[name = tensor<string, []>("re_bias_0"), val = tensor<fp32, [257]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1811584)))];
|
| 50 |
+
tensor<fp32, [?, 257]> re = linear(bias = re_bias_0, weight = transpose_0, x = frames)[name = tensor<string, []>("re")];
|
| 51 |
+
tensor<fp32, [257, 512]> transpose_1 = const()[name = tensor<string, []>("transpose_1"), val = tensor<fp32, [257, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1812736)))];
|
| 52 |
+
tensor<fp32, [?, 257]> im = linear(bias = re_bias_0, weight = transpose_1, x = frames)[name = tensor<string, []>("im")];
|
| 53 |
+
tensor<fp32, [?, 257]> var_75 = mul(x = re, y = re)[name = tensor<string, []>("op_75")];
|
| 54 |
+
tensor<fp32, [?, 257]> var_76 = mul(x = im, y = im)[name = tensor<string, []>("op_76")];
|
| 55 |
+
tensor<fp32, [?, 257]> power = add(x = var_75, y = var_76)[name = tensor<string, []>("power")];
|
| 56 |
+
tensor<fp32, [80, 257]> transpose_2 = const()[name = tensor<string, []>("transpose_2"), val = tensor<fp32, [80, 257]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2339136)))];
|
| 57 |
+
tensor<fp32, [80]> var_79_bias_0 = const()[name = tensor<string, []>("op_79_bias_0"), val = tensor<fp32, [80]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2421440)))];
|
| 58 |
+
tensor<fp32, [?, 80]> var_79 = linear(bias = var_79_bias_0, weight = transpose_2, x = power)[name = tensor<string, []>("op_79")];
|
| 59 |
+
tensor<fp32, []> var_80 = const()[name = tensor<string, []>("op_80"), val = tensor<fp32, []>(0x1p-23)];
|
| 60 |
+
tensor<fp32, []> const_1 = const()[name = tensor<string, []>("const_1"), val = tensor<fp32, []>(0x1.fffffep+127)];
|
| 61 |
+
tensor<fp32, [?, 80]> clip_0 = clip(alpha = var_80, beta = const_1, x = var_79)[name = tensor<string, []>("clip_0")];
|
| 62 |
+
tensor<fp32, []> fbank_1_epsilon_0 = const()[name = tensor<string, []>("fbank_1_epsilon_0"), val = tensor<fp32, []>(0x1p-149)];
|
| 63 |
+
tensor<fp32, [?, 80]> fbank_1 = log(epsilon = fbank_1_epsilon_0, x = clip_0)[name = tensor<string, []>("fbank_1")];
|
| 64 |
+
tensor<int32, [2]> var_88_begin_0 = const()[name = tensor<string, []>("op_88_begin_0"), val = tensor<int32, [2]>([0, 0])];
|
| 65 |
+
tensor<int32, [2]> var_88_end_0 = const()[name = tensor<string, []>("op_88_end_0"), val = tensor<int32, [2]>([1, 80])];
|
| 66 |
+
tensor<bool, [2]> var_88_end_mask_0 = const()[name = tensor<string, []>("op_88_end_mask_0"), val = tensor<bool, [2]>([false, true])];
|
| 67 |
+
tensor<fp32, [1, 80]> var_88 = slice_by_index(begin = var_88_begin_0, end = var_88_end_0, end_mask = var_88_end_mask_0, x = fbank_1)[name = tensor<string, []>("op_88")];
|
| 68 |
+
tensor<int32, [2]> var_91 = const()[name = tensor<string, []>("op_91"), val = tensor<int32, [2]>([2, 1])];
|
| 69 |
+
tensor<fp32, [2, 80]> var_92 = tile(reps = var_91, x = var_88)[name = tensor<string, []>("op_92")];
|
| 70 |
+
tensor<int32, []> var_94 = const()[name = tensor<string, []>("op_94"), val = tensor<int32, []>(0)];
|
| 71 |
+
tensor<bool, []> fbank_interleave_0 = const()[name = tensor<string, []>("fbank_interleave_0"), val = tensor<bool, []>(false)];
|
| 72 |
+
tensor<fp32, [?, 80]> fbank = concat(axis = var_94, interleave = fbank_interleave_0, values = (var_92, fbank_1))[name = tensor<string, []>("fbank")];
|
| 73 |
+
tensor<int32, [2]> var_96_perm_0 = const()[name = tensor<string, []>("op_96_perm_0"), val = tensor<int32, [2]>([1, 0])];
|
| 74 |
+
tensor<int32, [1]> var_98_axes_0 = const()[name = tensor<string, []>("op_98_axes_0"), val = tensor<int32, [1]>([0])];
|
| 75 |
+
tensor<fp32, [80, ?]> var_96 = transpose(perm = var_96_perm_0, x = fbank)[name = tensor<string, []>("transpose_4")];
|
| 76 |
+
tensor<fp32, [1, 80, ?]> var_98 = expand_dims(axes = var_98_axes_0, x = var_96)[name = tensor<string, []>("op_98")];
|
| 77 |
+
tensor<string, []> var_114_pad_type_0 = const()[name = tensor<string, []>("op_114_pad_type_0"), val = tensor<string, []>("valid")];
|
| 78 |
+
tensor<int32, [1]> var_114_strides_0 = const()[name = tensor<string, []>("op_114_strides_0"), val = tensor<int32, [1]>([1])];
|
| 79 |
+
tensor<int32, [2]> var_114_pad_0 = const()[name = tensor<string, []>("op_114_pad_0"), val = tensor<int32, [2]>([0, 0])];
|
| 80 |
+
tensor<int32, [1]> var_114_dilations_0 = const()[name = tensor<string, []>("op_114_dilations_0"), val = tensor<int32, [1]>([1])];
|
| 81 |
+
tensor<int32, []> var_114_groups_0 = const()[name = tensor<string, []>("op_114_groups_0"), val = tensor<int32, []>(1)];
|
| 82 |
+
tensor<fp32, [1, 400, ?]> var_114 = conv(dilations = var_114_dilations_0, groups = var_114_groups_0, pad = var_114_pad_0, pad_type = var_114_pad_type_0, strides = var_114_strides_0, weight = lfr_kernel, x = var_98)[name = tensor<string, []>("op_114")];
|
| 83 |
+
tensor<int32, [3]> var_117_begin_0 = const()[name = tensor<string, []>("op_117_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
|
| 84 |
+
tensor<int32, [3]> var_117_end_0 = const()[name = tensor<string, []>("op_117_end_0"), val = tensor<int32, [3]>([1, 400, 0])];
|
| 85 |
+
tensor<bool, [3]> var_117_end_mask_0 = const()[name = tensor<string, []>("op_117_end_mask_0"), val = tensor<bool, [3]>([false, true, true])];
|
| 86 |
+
tensor<bool, [3]> var_117_squeeze_mask_0 = const()[name = tensor<string, []>("op_117_squeeze_mask_0"), val = tensor<bool, [3]>([true, false, false])];
|
| 87 |
+
tensor<fp32, [400, ?]> var_117 = slice_by_index(begin = var_117_begin_0, end = var_117_end_0, end_mask = var_117_end_mask_0, squeeze_mask = var_117_squeeze_mask_0, x = var_114)[name = tensor<string, []>("op_117")];
|
| 88 |
+
tensor<int32, [2]> lfr_perm_0 = const()[name = tensor<string, []>("lfr_perm_0"), val = tensor<int32, [2]>([1, 0])];
|
| 89 |
+
tensor<fp32, [?, 400]> lfr = transpose(perm = lfr_perm_0, x = var_117)[name = tensor<string, []>("transpose_3")];
|
| 90 |
+
tensor<fp32, [?, 400]> var_120 = add(x = lfr, y = cmvn_neg_mean)[name = tensor<string, []>("op_120")];
|
| 91 |
+
tensor<fp32, [?, 400]> feats = mul(x = var_120, y = cmvn_inv_std)[name = tensor<string, []>("feats")];
|
| 92 |
+
tensor<int32, [1]> var_123_axes_0 = const()[name = tensor<string, []>("op_123_axes_0"), val = tensor<int32, [1]>([0])];
|
| 93 |
+
tensor<fp32, [1, ?, 400]> features = expand_dims(axes = var_123_axes_0, x = feats)[name = tensor<string, []>("op_123")];
|
| 94 |
+
} -> (features);
|
| 95 |
+
}
|
FsmnVadPreprocessor.mlmodelc/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df55956d6e7b7fa79bad191b79aa5074911bae55081d6f7862f6ca06305accdf
|
| 3 |
+
size 2421824
|
README.md
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: other
|
| 3 |
+
license_name: fsmn-vad-upstream
|
| 4 |
+
license_link: https://github.com/modelscope/FunASR
|
| 5 |
+
language: [zh]
|
| 6 |
+
library_name: coreml
|
| 7 |
+
tags: [coreml, ane, voice-activity-detection, fsmn, funasr, fluidaudio]
|
| 8 |
+
pipeline_tag: voice-activity-detection
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# FSMN-VAD — CoreML (Apple Neural Engine)
|
| 12 |
+
|
| 13 |
+
CoreML conversion of FunASR's **FSMN-VAD** (~5.2M params), for on-device voice
|
| 14 |
+
activity detection on Apple Silicon. Upstream:
|
| 15 |
+
[iic/speech_fsmn_vad_zh-cn-16k-common-pytorch](https://www.modelscope.cn/models/iic/speech_fsmn_vad_zh-cn-16k-common-pytorch).
|
| 16 |
+
|
| 17 |
+
## Files
|
| 18 |
+
|
| 19 |
+
| File | Precision | Compute unit | Role |
|
| 20 |
+
|------|-----------|--------------|------|
|
| 21 |
+
| `FsmnVadPreprocessor.mlmodelc` | FP32 | CPU | waveform → 400-d features (fbank80 + LFR m=5,n=1 + CMVN) |
|
| 22 |
+
| `FsmnVad.mlmodelc` | FP16 | ANE | FSMN scorer → per-frame scores `[1, T, 248]` |
|
| 23 |
+
| `vad_config.json` | — | — | decision params (`sil_pdf_ids`, thresholds) |
|
| 24 |
+
|
| 25 |
+
## Pipeline
|
| 26 |
+
|
| 27 |
+
```
|
| 28 |
+
waveform → [Preprocessor fp32/CPU] → features [1,T,400]
|
| 29 |
+
→ [FSMN fp16/ANE] → scores [1,T,248]
|
| 30 |
+
→ host: silence_prob = softmax(scores)[:, sil_pdf_ids].sum() (sil_pdf_ids=[0])
|
| 31 |
+
→ state machine (thresholds in vad_config) → speech segments [start_ms, end_ms]
|
| 32 |
+
```
|
| 33 |
+
|
| 34 |
+
- Frame rate: 10 ms (LFR n=1, no downsampling).
|
| 35 |
+
- The segment **decision logic** (FunASR `FsmnVADStreaming`) runs on the host:
|
| 36 |
+
silence/speech hysteresis with `max_end_silence_time` (800 ms),
|
| 37 |
+
`max_start_silence_time` (3000 ms), `max_single_segment_time` (60 s),
|
| 38 |
+
`sil_to_speech_time_thres` (150 ms). See `vad_config.json`.
|
| 39 |
+
|
| 40 |
+
Parity: preprocessor matches FunASR `WavFrontendOnline` to max|Δ|≈3e-5; FSMN scorer
|
| 41 |
+
torch↔CoreML max|Δ| 0.0016.
|
| 42 |
+
|
| 43 |
+
## License
|
| 44 |
+
|
| 45 |
+
Weights derive from FunASR's FSMN-VAD; upstream license applies. Format conversion only.
|
vad_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"sil_pdf_ids": [0], "speech_2_noise_ratio": 1.0, "fe_prior_thres": 0.0001, "sil_to_speech_time_thres": 150, "max_end_silence_time": 800, "max_start_silence_time": 3000, "max_single_segment_time": 60000, "do_start_point_detection": true, "do_end_point_detection": true, "frame_in_ms": 10, "frame_length_ms": 25, "sample_rate": 16000, "decibel_thres": -100.0, "snr_thres": -100.0}
|