bweng commited on
Commit
271382d
·
verified ·
1 Parent(s): 6d3f7a2

Upload 10 files

Browse files
silero-vad-unified-256ms-v6.0.0.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:718705a52f6031e9a10814115fdfa694d60ddf349a3867915620e67e37f5a9ba
3
+ size 243
silero-vad-unified-256ms-v6.0.0.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28efb7187c42b378ea90183a363742c5c0123ae17dd08de8db2e369f40280602
3
+ size 625
silero-vad-unified-256ms-v6.0.0.mlmodelc/metadata.json ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "shortDescription" : "Silero VAD Unified Model 256ms (STFT + Encoder + Decoder) with noisy-OR aggregation",
4
+ "metadataOutputVersion" : "3.0",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float32",
10
+ "formattedType" : "MultiArray (Float32 1 × 1 × 1)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 1, 1]",
13
+ "name" : "vad_output",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float32",
20
+ "formattedType" : "MultiArray (Float32 1 × 128)",
21
+ "shortDescription" : "",
22
+ "shape" : "[1, 128]",
23
+ "name" : "new_hidden_state",
24
+ "type" : "MultiArray"
25
+ },
26
+ {
27
+ "hasShapeFlexibility" : "0",
28
+ "isOptional" : "0",
29
+ "dataType" : "Float32",
30
+ "formattedType" : "MultiArray (Float32 1 × 128)",
31
+ "shortDescription" : "",
32
+ "shape" : "[1, 128]",
33
+ "name" : "new_cell_state",
34
+ "type" : "MultiArray"
35
+ }
36
+ ],
37
+ "version" : "6.0.0",
38
+ "modelParameters" : [
39
+
40
+ ],
41
+ "author" : "Fluid Infernece + Silero Team",
42
+ "specificationVersion" : 6,
43
+ "storagePrecision" : "Mixed (Float16, Float32)",
44
+ "mlProgramOperationTypeHistogram" : {
45
+ "Log" : 8,
46
+ "Concat" : 9,
47
+ "Lstm" : 8,
48
+ "SliceByIndex" : 41,
49
+ "Clip" : 32,
50
+ "Pow" : 16,
51
+ "Transpose" : 16,
52
+ "Sub" : 2,
53
+ "Relu" : 32,
54
+ "Squeeze" : 18,
55
+ "Cast" : 54,
56
+ "Sigmoid" : 8,
57
+ "Add" : 24,
58
+ "Sqrt" : 8,
59
+ "ExpandDims" : 26,
60
+ "ReduceMean" : 8,
61
+ "Conv" : 48,
62
+ "Mul" : 7
63
+ },
64
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
65
+ "stateSchema" : [
66
+
67
+ ],
68
+ "isUpdatable" : "0",
69
+ "availability" : {
70
+ "macOS" : "12.0",
71
+ "tvOS" : "15.0",
72
+ "visionOS" : "1.0",
73
+ "watchOS" : "8.0",
74
+ "iOS" : "15.0",
75
+ "macCatalyst" : "15.0"
76
+ },
77
+ "modelType" : {
78
+ "name" : "MLModelType_mlProgram"
79
+ },
80
+ "inputSchema" : [
81
+ {
82
+ "hasShapeFlexibility" : "0",
83
+ "isOptional" : "0",
84
+ "dataType" : "Float32",
85
+ "formattedType" : "MultiArray (Float32 1 × 4160)",
86
+ "shortDescription" : "",
87
+ "shape" : "[1, 4160]",
88
+ "name" : "audio_input",
89
+ "type" : "MultiArray"
90
+ },
91
+ {
92
+ "hasShapeFlexibility" : "0",
93
+ "isOptional" : "0",
94
+ "dataType" : "Float32",
95
+ "formattedType" : "MultiArray (Float32 1 × 128)",
96
+ "shortDescription" : "",
97
+ "shape" : "[1, 128]",
98
+ "name" : "hidden_state",
99
+ "type" : "MultiArray"
100
+ },
101
+ {
102
+ "hasShapeFlexibility" : "0",
103
+ "isOptional" : "0",
104
+ "dataType" : "Float32",
105
+ "formattedType" : "MultiArray (Float32 1 × 128)",
106
+ "shortDescription" : "",
107
+ "shape" : "[1, 128]",
108
+ "name" : "cell_state",
109
+ "type" : "MultiArray"
110
+ }
111
+ ],
112
+ "userDefinedMetadata" : {
113
+ "com.github.apple.coremltools.conversion_date" : "2025-09-15",
114
+ "com.github.apple.coremltools.source" : "torch==2.7.0",
115
+ "com.github.apple.coremltools.version" : "9.0b1",
116
+ "com.github.apple.coremltools.source_dialect" : "TorchScript"
117
+ },
118
+ "generatedClassName" : "silero_vad_unified_256ms_v6_0_0",
119
+ "method" : "predict"
120
+ }
121
+ ]
silero-vad-unified-256ms-v6.0.0.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
silero-vad-unified-256ms-v6.0.0.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:853cf34740d3f5061f977ebe2976f7c921b064261c9c4753b3a1196f2dba42b4
3
+ size 882304
silero-vad-unified-v6.0.0.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d622bd31042ab8fdd009a40a45c2cd8c9611927841bdd7bfc8ad40d16b6e3f7e
3
+ size 243
silero-vad-unified-v6.0.0.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43933b65aab49a674b94a61e6af794f1fae327961a02160a0823dbfb174e91ce
3
+ size 593
silero-vad-unified-v6.0.0.mlmodelc/metadata.json ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "shortDescription" : "Silero VAD Unified Model (STFT + Encoder + Decoder)",
4
+ "metadataOutputVersion" : "3.0",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float32",
10
+ "formattedType" : "MultiArray (Float32 1 × 1 × 1)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 1, 1]",
13
+ "name" : "vad_output",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float32",
20
+ "formattedType" : "MultiArray (Float32 1 × 128)",
21
+ "shortDescription" : "",
22
+ "shape" : "[1, 128]",
23
+ "name" : "new_hidden_state",
24
+ "type" : "MultiArray"
25
+ },
26
+ {
27
+ "hasShapeFlexibility" : "0",
28
+ "isOptional" : "0",
29
+ "dataType" : "Float32",
30
+ "formattedType" : "MultiArray (Float32 1 × 128)",
31
+ "shortDescription" : "",
32
+ "shape" : "[1, 128]",
33
+ "name" : "new_cell_state",
34
+ "type" : "MultiArray"
35
+ }
36
+ ],
37
+ "version" : "6.0.0",
38
+ "modelParameters" : [
39
+
40
+ ],
41
+ "author" : "Fluid Infernece + Silero Team",
42
+ "specificationVersion" : 6,
43
+ "storagePrecision" : "Mixed (Float16, Float32)",
44
+ "mlProgramOperationTypeHistogram" : {
45
+ "Log" : 1,
46
+ "Lstm" : 1,
47
+ "SliceByIndex" : 2,
48
+ "Clip" : 4,
49
+ "Transpose" : 2,
50
+ "Pow" : 2,
51
+ "Relu" : 4,
52
+ "Squeeze" : 4,
53
+ "Cast" : 12,
54
+ "Sigmoid" : 1,
55
+ "Add" : 3,
56
+ "Sqrt" : 1,
57
+ "ExpandDims" : 5,
58
+ "ReduceMean" : 1,
59
+ "Conv" : 6
60
+ },
61
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
62
+ "stateSchema" : [
63
+
64
+ ],
65
+ "isUpdatable" : "0",
66
+ "availability" : {
67
+ "macOS" : "12.0",
68
+ "tvOS" : "15.0",
69
+ "visionOS" : "1.0",
70
+ "watchOS" : "8.0",
71
+ "iOS" : "15.0",
72
+ "macCatalyst" : "15.0"
73
+ },
74
+ "modelType" : {
75
+ "name" : "MLModelType_mlProgram"
76
+ },
77
+ "inputSchema" : [
78
+ {
79
+ "hasShapeFlexibility" : "0",
80
+ "isOptional" : "0",
81
+ "dataType" : "Float32",
82
+ "formattedType" : "MultiArray (Float32 1 × 576)",
83
+ "shortDescription" : "",
84
+ "shape" : "[1, 576]",
85
+ "name" : "audio_input",
86
+ "type" : "MultiArray"
87
+ },
88
+ {
89
+ "hasShapeFlexibility" : "0",
90
+ "isOptional" : "0",
91
+ "dataType" : "Float32",
92
+ "formattedType" : "MultiArray (Float32 1 × 128)",
93
+ "shortDescription" : "",
94
+ "shape" : "[1, 128]",
95
+ "name" : "hidden_state",
96
+ "type" : "MultiArray"
97
+ },
98
+ {
99
+ "hasShapeFlexibility" : "0",
100
+ "isOptional" : "0",
101
+ "dataType" : "Float32",
102
+ "formattedType" : "MultiArray (Float32 1 × 128)",
103
+ "shortDescription" : "",
104
+ "shape" : "[1, 128]",
105
+ "name" : "cell_state",
106
+ "type" : "MultiArray"
107
+ }
108
+ ],
109
+ "userDefinedMetadata" : {
110
+ "com.github.apple.coremltools.conversion_date" : "2025-09-15",
111
+ "com.github.apple.coremltools.source" : "torch==2.7.0",
112
+ "com.github.apple.coremltools.version" : "9.0b1",
113
+ "com.github.apple.coremltools.source_dialect" : "TorchScript"
114
+ },
115
+ "generatedClassName" : "silero_vad_unified_v6_0_0",
116
+ "method" : "predict"
117
+ }
118
+ ]
silero-vad-unified-v6.0.0.mlmodelc/model.mil ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ program(1.0)
2
+ [buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.7.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0b1"}})]
3
+ {
4
+ func main<ios15>(tensor<fp32, [1, 576]> audio_input, tensor<fp32, [1, 128]> cell_state, tensor<fp32, [1, 128]> hidden_state) {
5
+ tensor<int32, [1]> x_1_axes_0 = const()[name = tensor<string, []>("x_1_axes_0"), val = tensor<int32, [1]>([1])];
6
+ tensor<string, []> audio_input_to_fp16_dtype_0 = const()[name = tensor<string, []>("audio_input_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
7
+ tensor<fp16, [1, 576]> audio_input_to_fp16 = cast(dtype = audio_input_to_fp16_dtype_0, x = audio_input)[name = tensor<string, []>("cast_11")];
8
+ tensor<fp16, [1, 1, 576]> x_1_cast_fp16 = expand_dims(axes = x_1_axes_0, x = audio_input_to_fp16)[name = tensor<string, []>("x_1_cast_fp16")];
9
+ tensor<string, []> stft_out_pad_type_0 = const()[name = tensor<string, []>("stft_out_pad_type_0"), val = tensor<string, []>("custom")];
10
+ tensor<int32, [2]> stft_out_pad_0 = const()[name = tensor<string, []>("stft_out_pad_0"), val = tensor<int32, [2]>([128, 128])];
11
+ tensor<int32, [1]> stft_out_strides_0 = const()[name = tensor<string, []>("stft_out_strides_0"), val = tensor<int32, [1]>([256])];
12
+ tensor<int32, [1]> stft_out_dilations_0 = const()[name = tensor<string, []>("stft_out_dilations_0"), val = tensor<int32, [1]>([1])];
13
+ tensor<int32, []> stft_out_groups_0 = const()[name = tensor<string, []>("stft_out_groups_0"), val = tensor<int32, []>(1)];
14
+ tensor<fp16, [258, 1, 256]> stft_forward_basis_to_fp16 = const()[name = tensor<string, []>("stft_forward_basis_to_fp16"), val = tensor<fp16, [258, 1, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
15
+ tensor<fp16, [1, 258, 3]> stft_out_cast_fp16 = conv(dilations = stft_out_dilations_0, groups = stft_out_groups_0, pad = stft_out_pad_0, pad_type = stft_out_pad_type_0, strides = stft_out_strides_0, weight = stft_forward_basis_to_fp16, x = x_1_cast_fp16)[name = tensor<string, []>("stft_out_cast_fp16")];
16
+ tensor<int32, [3]> var_25_begin_0 = const()[name = tensor<string, []>("op_25_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
17
+ tensor<int32, [3]> var_25_end_0 = const()[name = tensor<string, []>("op_25_end_0"), val = tensor<int32, [3]>([1, 129, 3])];
18
+ tensor<bool, [3]> var_25_end_mask_0 = const()[name = tensor<string, []>("op_25_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
19
+ tensor<fp16, [1, 129, 3]> var_25_cast_fp16 = slice_by_index(begin = var_25_begin_0, end = var_25_end_0, end_mask = var_25_end_mask_0, x = stft_out_cast_fp16)[name = tensor<string, []>("op_25_cast_fp16")];
20
+ tensor<int32, [3]> var_28_begin_0 = const()[name = tensor<string, []>("op_28_begin_0"), val = tensor<int32, [3]>([0, 129, 0])];
21
+ tensor<int32, [3]> var_28_end_0 = const()[name = tensor<string, []>("op_28_end_0"), val = tensor<int32, [3]>([1, 258, 3])];
22
+ tensor<bool, [3]> var_28_end_mask_0 = const()[name = tensor<string, []>("op_28_end_mask_0"), val = tensor<bool, [3]>([true, true, true])];
23
+ tensor<fp16, [1, 129, 3]> var_28_cast_fp16 = slice_by_index(begin = var_28_begin_0, end = var_28_end_0, end_mask = var_28_end_mask_0, x = stft_out_cast_fp16)[name = tensor<string, []>("op_28_cast_fp16")];
24
+ tensor<fp16, []> var_7_promoted_to_fp16 = const()[name = tensor<string, []>("op_7_promoted_to_fp16"), val = tensor<fp16, []>(0x1p+1)];
25
+ tensor<fp16, [1, 129, 3]> var_30_cast_fp16 = pow(x = var_25_cast_fp16, y = var_7_promoted_to_fp16)[name = tensor<string, []>("op_30_cast_fp16")];
26
+ tensor<fp16, []> var_7_promoted_1_to_fp16 = const()[name = tensor<string, []>("op_7_promoted_1_to_fp16"), val = tensor<fp16, []>(0x1p+1)];
27
+ tensor<fp16, [1, 129, 3]> var_31_cast_fp16 = pow(x = var_28_cast_fp16, y = var_7_promoted_1_to_fp16)[name = tensor<string, []>("op_31_cast_fp16")];
28
+ tensor<fp16, [1, 129, 3]> var_32_cast_fp16 = add(x = var_30_cast_fp16, y = var_31_cast_fp16)[name = tensor<string, []>("op_32_cast_fp16")];
29
+ tensor<fp16, []> var_33_to_fp16 = const()[name = tensor<string, []>("op_33_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
30
+ tensor<fp16, [1, 129, 3]> var_34_cast_fp16 = add(x = var_32_cast_fp16, y = var_33_to_fp16)[name = tensor<string, []>("op_34_cast_fp16")];
31
+ tensor<fp16, [1, 129, 3]> magnitude_cast_fp16 = sqrt(x = var_34_cast_fp16)[name = tensor<string, []>("magnitude_cast_fp16")];
32
+ tensor<fp16, []> var_36_to_fp16 = const()[name = tensor<string, []>("op_36_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
33
+ tensor<fp16, [1, 129, 3]> var_37_cast_fp16 = add(x = magnitude_cast_fp16, y = var_36_to_fp16)[name = tensor<string, []>("op_37_cast_fp16")];
34
+ tensor<fp16, []> input_1_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_1_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
35
+ tensor<fp16, [1, 129, 3]> input_1_cast_fp16 = log(epsilon = input_1_epsilon_0_to_fp16, x = var_37_cast_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
36
+ tensor<string, []> input_3_pad_type_0 = const()[name = tensor<string, []>("input_3_pad_type_0"), val = tensor<string, []>("custom")];
37
+ tensor<int32, [2]> input_3_pad_0 = const()[name = tensor<string, []>("input_3_pad_0"), val = tensor<int32, [2]>([1, 1])];
38
+ tensor<int32, [1]> input_3_strides_0 = const()[name = tensor<string, []>("input_3_strides_0"), val = tensor<int32, [1]>([1])];
39
+ tensor<int32, [1]> input_3_dilations_0 = const()[name = tensor<string, []>("input_3_dilations_0"), val = tensor<int32, [1]>([1])];
40
+ tensor<int32, []> input_3_groups_0 = const()[name = tensor<string, []>("input_3_groups_0"), val = tensor<int32, []>(1)];
41
+ tensor<fp16, [128, 129, 3]> encoder_layers_0_weight_to_fp16 = const()[name = tensor<string, []>("encoder_layers_0_weight_to_fp16"), val = tensor<fp16, [128, 129, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132224)))];
42
+ tensor<fp16, [128]> encoder_layers_0_bias_to_fp16 = const()[name = tensor<string, []>("encoder_layers_0_bias_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(231360)))];
43
+ tensor<fp16, [1, 128, 3]> input_3_cast_fp16 = conv(bias = encoder_layers_0_bias_to_fp16, dilations = input_3_dilations_0, groups = input_3_groups_0, pad = input_3_pad_0, pad_type = input_3_pad_type_0, strides = input_3_strides_0, weight = encoder_layers_0_weight_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
44
+ tensor<fp16, [1, 128, 3]> x_3_cast_fp16 = relu(x = input_3_cast_fp16)[name = tensor<string, []>("x_3_cast_fp16")];
45
+ tensor<fp16, []> const_0_to_fp16 = const()[name = tensor<string, []>("const_0_to_fp16"), val = tensor<fp16, []>(-inf)];
46
+ tensor<fp16, []> var_39_to_fp16 = const()[name = tensor<string, []>("op_39_to_fp16"), val = tensor<fp16, []>(0x1.388p+13)];
47
+ tensor<fp16, [1, 128, 3]> clip_0_cast_fp16 = clip(alpha = const_0_to_fp16, beta = var_39_to_fp16, x = x_3_cast_fp16)[name = tensor<string, []>("clip_0_cast_fp16")];
48
+ tensor<string, []> input_7_pad_type_0 = const()[name = tensor<string, []>("input_7_pad_type_0"), val = tensor<string, []>("custom")];
49
+ tensor<int32, [2]> input_7_pad_0 = const()[name = tensor<string, []>("input_7_pad_0"), val = tensor<int32, [2]>([1, 1])];
50
+ tensor<int32, [1]> input_7_strides_0 = const()[name = tensor<string, []>("input_7_strides_0"), val = tensor<int32, [1]>([1])];
51
+ tensor<int32, [1]> input_7_dilations_0 = const()[name = tensor<string, []>("input_7_dilations_0"), val = tensor<int32, [1]>([1])];
52
+ tensor<int32, []> input_7_groups_0 = const()[name = tensor<string, []>("input_7_groups_0"), val = tensor<int32, []>(1)];
53
+ tensor<fp16, [64, 128, 3]> encoder_layers_2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_layers_2_weight_to_fp16"), val = tensor<fp16, [64, 128, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(231680)))];
54
+ tensor<fp16, [64]> encoder_layers_2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_layers_2_bias_to_fp16"), val = tensor<fp16, [64]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(280896)))];
55
+ tensor<fp16, [1, 64, 3]> input_7_cast_fp16 = conv(bias = encoder_layers_2_bias_to_fp16, dilations = input_7_dilations_0, groups = input_7_groups_0, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = input_7_strides_0, weight = encoder_layers_2_weight_to_fp16, x = clip_0_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
56
+ tensor<fp16, [1, 64, 3]> x_5_cast_fp16 = relu(x = input_7_cast_fp16)[name = tensor<string, []>("x_5_cast_fp16")];
57
+ tensor<fp16, []> const_1_to_fp16 = const()[name = tensor<string, []>("const_1_to_fp16"), val = tensor<fp16, []>(-inf)];
58
+ tensor<fp16, [1, 64, 3]> clip_1_cast_fp16 = clip(alpha = const_1_to_fp16, beta = var_39_to_fp16, x = x_5_cast_fp16)[name = tensor<string, []>("clip_1_cast_fp16")];
59
+ tensor<string, []> input_11_pad_type_0 = const()[name = tensor<string, []>("input_11_pad_type_0"), val = tensor<string, []>("custom")];
60
+ tensor<int32, [2]> input_11_pad_0 = const()[name = tensor<string, []>("input_11_pad_0"), val = tensor<int32, [2]>([1, 1])];
61
+ tensor<int32, [1]> input_11_strides_0 = const()[name = tensor<string, []>("input_11_strides_0"), val = tensor<int32, [1]>([1])];
62
+ tensor<int32, [1]> input_11_dilations_0 = const()[name = tensor<string, []>("input_11_dilations_0"), val = tensor<int32, [1]>([1])];
63
+ tensor<int32, []> input_11_groups_0 = const()[name = tensor<string, []>("input_11_groups_0"), val = tensor<int32, []>(1)];
64
+ tensor<fp16, [64, 64, 3]> encoder_layers_4_weight_to_fp16 = const()[name = tensor<string, []>("encoder_layers_4_weight_to_fp16"), val = tensor<fp16, [64, 64, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(281088)))];
65
+ tensor<fp16, [64]> encoder_layers_4_bias_to_fp16 = const()[name = tensor<string, []>("encoder_layers_4_bias_to_fp16"), val = tensor<fp16, [64]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(305728)))];
66
+ tensor<fp16, [1, 64, 3]> input_11_cast_fp16 = conv(bias = encoder_layers_4_bias_to_fp16, dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = encoder_layers_4_weight_to_fp16, x = clip_1_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
67
+ tensor<fp16, [1, 64, 3]> x_7_cast_fp16 = relu(x = input_11_cast_fp16)[name = tensor<string, []>("x_7_cast_fp16")];
68
+ tensor<fp16, []> const_2_to_fp16 = const()[name = tensor<string, []>("const_2_to_fp16"), val = tensor<fp16, []>(-inf)];
69
+ tensor<fp16, [1, 64, 3]> clip_2_cast_fp16 = clip(alpha = const_2_to_fp16, beta = var_39_to_fp16, x = x_7_cast_fp16)[name = tensor<string, []>("clip_2_cast_fp16")];
70
+ tensor<string, []> input_15_pad_type_0 = const()[name = tensor<string, []>("input_15_pad_type_0"), val = tensor<string, []>("custom")];
71
+ tensor<int32, [2]> input_15_pad_0 = const()[name = tensor<string, []>("input_15_pad_0"), val = tensor<int32, [2]>([1, 1])];
72
+ tensor<int32, [1]> input_15_strides_0 = const()[name = tensor<string, []>("input_15_strides_0"), val = tensor<int32, [1]>([1])];
73
+ tensor<int32, [1]> input_15_dilations_0 = const()[name = tensor<string, []>("input_15_dilations_0"), val = tensor<int32, [1]>([1])];
74
+ tensor<int32, []> input_15_groups_0 = const()[name = tensor<string, []>("input_15_groups_0"), val = tensor<int32, []>(1)];
75
+ tensor<fp16, [128, 64, 3]> encoder_layers_6_weight_to_fp16 = const()[name = tensor<string, []>("encoder_layers_6_weight_to_fp16"), val = tensor<fp16, [128, 64, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(305920)))];
76
+ tensor<fp16, [128]> encoder_layers_6_bias_to_fp16 = const()[name = tensor<string, []>("encoder_layers_6_bias_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(355136)))];
77
+ tensor<fp16, [1, 128, 3]> input_15_cast_fp16 = conv(bias = encoder_layers_6_bias_to_fp16, dilations = input_15_dilations_0, groups = input_15_groups_0, pad = input_15_pad_0, pad_type = input_15_pad_type_0, strides = input_15_strides_0, weight = encoder_layers_6_weight_to_fp16, x = clip_2_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
78
+ tensor<fp16, [1, 128, 3]> x_9_cast_fp16 = relu(x = input_15_cast_fp16)[name = tensor<string, []>("x_9_cast_fp16")];
79
+ tensor<fp16, []> const_3_to_fp16 = const()[name = tensor<string, []>("const_3_to_fp16"), val = tensor<fp16, []>(-inf)];
80
+ tensor<fp16, [1, 128, 3]> clip_3_cast_fp16 = clip(alpha = const_3_to_fp16, beta = var_39_to_fp16, x = x_9_cast_fp16)[name = tensor<string, []>("clip_3_cast_fp16")];
81
+ tensor<int32, [3]> transpose_0_perm_0 = const()[name = tensor<string, []>("transpose_0_perm_0"), val = tensor<int32, [3]>([2, 0, 1])];
82
+ tensor<string, []> transpose_0_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("transpose_0_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
83
+ tensor<int32, [1]> hx_1_axes_0 = const()[name = tensor<string, []>("hx_1_axes_0"), val = tensor<int32, [1]>([0])];
84
+ tensor<string, []> hidden_state_to_fp16_dtype_0 = const()[name = tensor<string, []>("hidden_state_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
85
+ tensor<fp16, [1, 128]> hidden_state_to_fp16 = cast(dtype = hidden_state_to_fp16_dtype_0, x = hidden_state)[name = tensor<string, []>("cast_9")];
86
+ tensor<fp16, [1, 1, 128]> hx_1_cast_fp16 = expand_dims(axes = hx_1_axes_0, x = hidden_state_to_fp16)[name = tensor<string, []>("hx_1_cast_fp16")];
87
+ tensor<int32, [1]> hx_axes_0 = const()[name = tensor<string, []>("hx_axes_0"), val = tensor<int32, [1]>([0])];
88
+ tensor<string, []> cell_state_to_fp16_dtype_0 = const()[name = tensor<string, []>("cell_state_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
89
+ tensor<fp16, [1, 128]> cell_state_to_fp16 = cast(dtype = cell_state_to_fp16_dtype_0, x = cell_state)[name = tensor<string, []>("cast_8")];
90
+ tensor<fp16, [1, 1, 128]> hx_cast_fp16 = expand_dims(axes = hx_axes_0, x = cell_state_to_fp16)[name = tensor<string, []>("hx_cast_fp16")];
91
+ tensor<fp32, [512]> concat_0 = const()[name = tensor<string, []>("concat_0"), val = tensor<fp32, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(355456)))];
92
+ tensor<fp32, [512, 128]> concat_1 = const()[name = tensor<string, []>("concat_1"), val = tensor<fp32, [512, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(357568)))];
93
+ tensor<fp32, [512, 128]> concat_2 = const()[name = tensor<string, []>("concat_2"), val = tensor<fp32, [512, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(619776)))];
94
+ tensor<int32, [1]> lstm_out_batch_first_lstm_h0_squeeze_axes_0 = const()[name = tensor<string, []>("lstm_out_batch_first_lstm_h0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
95
+ tensor<fp16, [1, 128]> lstm_out_batch_first_lstm_h0_squeeze_cast_fp16 = squeeze(axes = lstm_out_batch_first_lstm_h0_squeeze_axes_0, x = hx_1_cast_fp16)[name = tensor<string, []>("lstm_out_batch_first_lstm_h0_squeeze_cast_fp16")];
96
+ tensor<string, []> lstm_out_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("lstm_out_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
97
+ tensor<int32, [1]> lstm_out_batch_first_lstm_c0_squeeze_axes_0 = const()[name = tensor<string, []>("lstm_out_batch_first_lstm_c0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
98
+ tensor<fp16, [1, 128]> lstm_out_batch_first_lstm_c0_squeeze_cast_fp16 = squeeze(axes = lstm_out_batch_first_lstm_c0_squeeze_axes_0, x = hx_cast_fp16)[name = tensor<string, []>("lstm_out_batch_first_lstm_c0_squeeze_cast_fp16")];
99
+ tensor<string, []> lstm_out_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("lstm_out_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
100
+ tensor<string, []> lstm_out_batch_first_direction_0 = const()[name = tensor<string, []>("lstm_out_batch_first_direction_0"), val = tensor<string, []>("forward")];
101
+ tensor<bool, []> lstm_out_batch_first_output_sequence_0 = const()[name = tensor<string, []>("lstm_out_batch_first_output_sequence_0"), val = tensor<bool, []>(true)];
102
+ tensor<string, []> lstm_out_batch_first_recurrent_activation_0 = const()[name = tensor<string, []>("lstm_out_batch_first_recurrent_activation_0"), val = tensor<string, []>("sigmoid")];
103
+ tensor<string, []> lstm_out_batch_first_cell_activation_0 = const()[name = tensor<string, []>("lstm_out_batch_first_cell_activation_0"), val = tensor<string, []>("tanh")];
104
+ tensor<string, []> lstm_out_batch_first_activation_0 = const()[name = tensor<string, []>("lstm_out_batch_first_activation_0"), val = tensor<string, []>("tanh")];
105
+ tensor<fp32, [1, 128]> lstm_out_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32 = cast(dtype = lstm_out_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0, x = lstm_out_batch_first_lstm_c0_squeeze_cast_fp16)[name = tensor<string, []>("cast_6")];
106
+ tensor<fp32, [1, 128]> lstm_out_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32 = cast(dtype = lstm_out_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0, x = lstm_out_batch_first_lstm_h0_squeeze_cast_fp16)[name = tensor<string, []>("cast_7")];
107
+ tensor<fp16, [3, 1, 128]> transpose_0_cast_fp16 = transpose(perm = transpose_0_perm_0, x = clip_3_cast_fp16)[name = tensor<string, []>("transpose_3")];
108
+ tensor<fp32, [3, 1, 128]> transpose_0_cast_fp16_to_fp32 = cast(dtype = transpose_0_cast_fp16_to_fp32_dtype_0, x = transpose_0_cast_fp16)[name = tensor<string, []>("cast_10")];
109
+ tensor<fp32, [3, 1, 128]> lstm_out_batch_first_0, tensor<fp32, [1, 128]> lstm_out_batch_first_1, tensor<fp32, [1, 128]> lstm_out_batch_first_2 = lstm(activation = lstm_out_batch_first_activation_0, bias = concat_0, cell_activation = lstm_out_batch_first_cell_activation_0, direction = lstm_out_batch_first_direction_0, initial_c = lstm_out_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32, initial_h = lstm_out_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32, output_sequence = lstm_out_batch_first_output_sequence_0, recurrent_activation = lstm_out_batch_first_recurrent_activation_0, weight_hh = concat_2, weight_ih = concat_1, x = transpose_0_cast_fp16_to_fp32)[name = tensor<string, []>("lstm_out_batch_first")];
110
+ tensor<int32, [3]> transpose_1_perm_0 = const()[name = tensor<string, []>("transpose_1_perm_0"), val = tensor<int32, [3]>([1, 2, 0])];
111
+ tensor<string, []> lstm_out_batch_first_0_to_fp16_dtype_0 = const()[name = tensor<string, []>("lstm_out_batch_first_0_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
112
+ tensor<int32, [1]> hn_axes_0 = const()[name = tensor<string, []>("hn_axes_0"), val = tensor<int32, [1]>([0])];
113
+ tensor<string, []> lstm_out_batch_first_1_to_fp16_dtype_0 = const()[name = tensor<string, []>("lstm_out_batch_first_1_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
114
+ tensor<fp16, [1, 128]> lstm_out_batch_first_1_to_fp16 = cast(dtype = lstm_out_batch_first_1_to_fp16_dtype_0, x = lstm_out_batch_first_1)[name = tensor<string, []>("cast_4")];
115
+ tensor<fp16, [1, 1, 128]> hn_cast_fp16 = expand_dims(axes = hn_axes_0, x = lstm_out_batch_first_1_to_fp16)[name = tensor<string, []>("hn_cast_fp16")];
116
+ tensor<int32, [1]> cn_axes_0 = const()[name = tensor<string, []>("cn_axes_0"), val = tensor<int32, [1]>([0])];
117
+ tensor<string, []> lstm_out_batch_first_2_to_fp16_dtype_0 = const()[name = tensor<string, []>("lstm_out_batch_first_2_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
118
+ tensor<fp16, [1, 128]> lstm_out_batch_first_2_to_fp16 = cast(dtype = lstm_out_batch_first_2_to_fp16_dtype_0, x = lstm_out_batch_first_2)[name = tensor<string, []>("cast_3")];
119
+ tensor<fp16, [1, 1, 128]> cn_cast_fp16 = expand_dims(axes = cn_axes_0, x = lstm_out_batch_first_2_to_fp16)[name = tensor<string, []>("cn_cast_fp16")];
120
+ tensor<string, []> input_pad_type_0 = const()[name = tensor<string, []>("input_pad_type_0"), val = tensor<string, []>("valid")];
121
+ tensor<int32, [1]> input_strides_0 = const()[name = tensor<string, []>("input_strides_0"), val = tensor<int32, [1]>([1])];
122
+ tensor<int32, [2]> input_pad_0 = const()[name = tensor<string, []>("input_pad_0"), val = tensor<int32, [2]>([0, 0])];
123
+ tensor<int32, [1]> input_dilations_0 = const()[name = tensor<string, []>("input_dilations_0"), val = tensor<int32, [1]>([1])];
124
+ tensor<int32, []> input_groups_0 = const()[name = tensor<string, []>("input_groups_0"), val = tensor<int32, []>(1)];
125
+ tensor<fp16, [1, 128, 1]> decoder_final_conv_weight_to_fp16 = const()[name = tensor<string, []>("decoder_final_conv_weight_to_fp16"), val = tensor<fp16, [1, 128, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(881984)))];
126
+ tensor<fp16, [1]> decoder_final_conv_bias_to_fp16 = const()[name = tensor<string, []>("decoder_final_conv_bias_to_fp16"), val = tensor<fp16, [1]>([0x1.dfp-5])];
127
+ tensor<fp16, [3, 1, 128]> lstm_out_batch_first_0_to_fp16 = cast(dtype = lstm_out_batch_first_0_to_fp16_dtype_0, x = lstm_out_batch_first_0)[name = tensor<string, []>("cast_5")];
128
+ tensor<fp16, [1, 128, 3]> transpose_1_cast_fp16 = transpose(perm = transpose_1_perm_0, x = lstm_out_batch_first_0_to_fp16)[name = tensor<string, []>("transpose_2")];
129
+ tensor<fp16, [1, 1, 3]> input_cast_fp16 = conv(bias = decoder_final_conv_bias_to_fp16, dilations = input_dilations_0, groups = input_groups_0, pad = input_pad_0, pad_type = input_pad_type_0, strides = input_strides_0, weight = decoder_final_conv_weight_to_fp16, x = transpose_1_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
130
+ tensor<fp16, [1, 1, 3]> out_cast_fp16 = sigmoid(x = input_cast_fp16)[name = tensor<string, []>("out_cast_fp16")];
131
+ tensor<int32, [1]> var_124_axes_0 = const()[name = tensor<string, []>("op_124_axes_0"), val = tensor<int32, [1]>([2])];
132
+ tensor<bool, []> var_124_keep_dims_0 = const()[name = tensor<string, []>("op_124_keep_dims_0"), val = tensor<bool, []>(true)];
133
+ tensor<fp16, [1, 1, 1]> var_124_cast_fp16 = reduce_mean(axes = var_124_axes_0, keep_dims = var_124_keep_dims_0, x = out_cast_fp16)[name = tensor<string, []>("op_124_cast_fp16")];
134
+ tensor<string, []> var_124_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("op_124_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
135
+ tensor<int32, [1]> var_125_axes_0 = const()[name = tensor<string, []>("op_125_axes_0"), val = tensor<int32, [1]>([0])];
136
+ tensor<fp16, [1, 128]> var_125_cast_fp16 = squeeze(axes = var_125_axes_0, x = hn_cast_fp16)[name = tensor<string, []>("op_125_cast_fp16")];
137
+ tensor<string, []> var_125_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("op_125_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
138
+ tensor<int32, [1]> var_126_axes_0 = const()[name = tensor<string, []>("op_126_axes_0"), val = tensor<int32, [1]>([0])];
139
+ tensor<fp16, [1, 128]> var_126_cast_fp16 = squeeze(axes = var_126_axes_0, x = cn_cast_fp16)[name = tensor<string, []>("op_126_cast_fp16")];
140
+ tensor<string, []> var_126_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("op_126_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
141
+ tensor<fp32, [1, 128]> new_cell_state = cast(dtype = var_126_cast_fp16_to_fp32_dtype_0, x = var_126_cast_fp16)[name = tensor<string, []>("cast_0")];
142
+ tensor<fp32, [1, 128]> new_hidden_state = cast(dtype = var_125_cast_fp16_to_fp32_dtype_0, x = var_125_cast_fp16)[name = tensor<string, []>("cast_1")];
143
+ tensor<fp32, [1, 1, 1]> vad_output = cast(dtype = var_124_cast_fp16_to_fp32_dtype_0, x = var_124_cast_fp16)[name = tensor<string, []>("cast_2")];
144
+ } -> (vad_output, new_hidden_state, new_cell_state);
145
+ }
silero-vad-unified-v6.0.0.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:853cf34740d3f5061f977ebe2976f7c921b064261c9c4753b3a1196f2dba42b4
3
+ size 882304