bweng commited on
Commit
7b400f0
·
verified ·
1 Parent(s): 9396c11

Split FBank and Emebdding

Browse files
Embedding.mlmodelc/analytics/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f5edc9b97ea81d5d526c4190bb546a83b6d3cb3c861a2caf8f296d6d7cc96c1
3
  size 243
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab79be8f578f5b54b085cb38190c91e01bf1e71ab9b1189a19fe009f70dbbf52
3
  size 243
Embedding.mlmodelc/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3eedfaf32d775ddf6f06ae784b21177d7683ab647509b7ce4857548cbd112a73
3
- size 603
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef4e9c40ab61e1f3c1f04e05133ff2cedb385d75207823c15fdcce82f8a296a1
3
+ size 614
Embedding.mlmodelc/metadata.json CHANGED
@@ -1,15 +1,15 @@
1
  [
2
  {
3
- "shortDescription" : "pyannote community-1 speaker embedding (10 s WeSpeaker ResNet34, interpolates 589-frame weights to pooling layer size internally)",
4
  "metadataOutputVersion" : "3.0",
5
  "outputSchema" : [
6
  {
7
  "hasShapeFlexibility" : "0",
8
  "isOptional" : "0",
9
  "dataType" : "Float32",
10
- "formattedType" : "MultiArray (Float32 1 × 256)",
11
  "shortDescription" : "",
12
- "shape" : "[1, 256]",
13
  "name" : "embedding",
14
  "type" : "MultiArray"
15
  }
@@ -20,33 +20,27 @@
20
  ],
21
  "author" : "Fluid Inference",
22
  "specificationVersion" : 8,
23
- "storagePrecision" : "Float32",
24
  "license" : "CC-BY-4.0",
25
  "mlProgramOperationTypeHistogram" : {
26
- "Ios16.reduceL2Norm" : 1,
27
- "Ios17.reshape" : 1,
28
- "Ios16.reduceMean" : 2,
29
- "Ios17.transpose" : 2,
30
- "Ios17.expandDims" : 8,
31
- "Ios17.add" : 20,
32
- "Ios17.gatherAlongAxis" : 2,
33
- "Ios17.sliceByIndex" : 1,
34
- "Ios16.reduceSum" : 4,
35
- "Ios17.squeeze" : 4,
36
- "Pad" : 2,
37
- "Ios17.log" : 1,
38
  "Ios17.sqrt" : 1,
39
- "Ios17.sub" : 5,
40
  "Ios17.conv" : 37,
41
- "Ios17.clip" : 3,
42
- "Ios16.relu" : 33,
43
- "Ios17.linear" : 4,
44
- "Ios17.pow" : 2,
45
- "Ios17.realDiv" : 4,
46
  "Ios17.concat" : 1,
47
- "Ios17.mul" : 9
 
 
 
 
 
 
 
 
 
 
48
  },
49
- "computePrecision" : "Mixed (Float32, Int32)",
50
  "stateSchema" : [
51
 
52
  ],
@@ -64,28 +58,32 @@
64
  },
65
  "inputSchema" : [
66
  {
67
- "hasShapeFlexibility" : "0",
68
- "isOptional" : "0",
69
  "dataType" : "Float32",
70
- "formattedType" : "MultiArray (Float32 1 × 1 × 160000)",
71
- "shortDescription" : "",
72
- "shape" : "[1, 1, 160000]",
73
- "name" : "audio",
74
- "type" : "MultiArray"
 
 
 
 
75
  },
76
  {
77
- "hasShapeFlexibility" : "0",
78
- "isOptional" : "0",
79
  "dataType" : "Float32",
 
 
 
 
80
  "formattedType" : "MultiArray (Float32 1 × 589)",
81
- "shortDescription" : "",
82
  "shape" : "[1, 589]",
83
  "name" : "weights",
84
- "type" : "MultiArray"
85
  }
86
  ],
87
  "userDefinedMetadata" : {
88
- "com.github.apple.coremltools.conversion_date" : "2025-10-03",
89
  "com.github.apple.coremltools.source" : "torch==2.8.0",
90
  "com.github.apple.coremltools.version" : "9.0b1",
91
  "com.github.apple.coremltools.source_dialect" : "TorchScript"
 
1
  [
2
  {
3
+ "shortDescription" : "pyannote community-1 speaker embedding (10 s WeSpeaker ResNet34, FBANK + segmentation weights input)",
4
  "metadataOutputVersion" : "3.0",
5
  "outputSchema" : [
6
  {
7
  "hasShapeFlexibility" : "0",
8
  "isOptional" : "0",
9
  "dataType" : "Float32",
10
+ "formattedType" : "MultiArray (Float32)",
11
  "shortDescription" : "",
12
+ "shape" : "[]",
13
  "name" : "embedding",
14
  "type" : "MultiArray"
15
  }
 
20
  ],
21
  "author" : "Fluid Inference",
22
  "specificationVersion" : 8,
23
+ "storagePrecision" : "Float16",
24
  "license" : "CC-BY-4.0",
25
  "mlProgramOperationTypeHistogram" : {
26
+ "Ios17.mul" : 4,
 
 
 
 
 
 
 
 
 
 
 
27
  "Ios17.sqrt" : 1,
28
+ "Ios17.sub" : 2,
29
  "Ios17.conv" : 37,
 
 
 
 
 
30
  "Ios17.concat" : 1,
31
+ "Ios17.add" : 18,
32
+ "Ios17.realDiv" : 4,
33
+ "UpsampleNearestNeighbor" : 1,
34
+ "Ios16.relu" : 33,
35
+ "Ios17.clip" : 2,
36
+ "Ios17.expandDims" : 7,
37
+ "Ios16.reduceSum" : 4,
38
+ "Ios16.reduceL2Norm" : 1,
39
+ "Ios17.squeeze" : 2,
40
+ "Ios17.reshape" : 2,
41
+ "Ios17.cast" : 13
42
  },
43
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
44
  "stateSchema" : [
45
 
46
  ],
 
58
  },
59
  "inputSchema" : [
60
  {
 
 
61
  "dataType" : "Float32",
62
+ "hasShapeFlexibility" : "1",
63
+ "isOptional" : "0",
64
+ "shapeFlexibility" : "1...32 × 1 × 80 × 998",
65
+ "shapeRange" : "[[1, 32], [1, 1], [80, 80], [998, 998]]",
66
+ "formattedType" : "MultiArray (Float32 1 × 1 × 80 × 998)",
67
+ "type" : "MultiArray",
68
+ "shape" : "[1, 1, 80, 998]",
69
+ "name" : "fbank",
70
+ "shortDescription" : ""
71
  },
72
  {
 
 
73
  "dataType" : "Float32",
74
+ "hasShapeFlexibility" : "1",
75
+ "isOptional" : "0",
76
+ "shapeFlexibility" : "1...32 × 589",
77
+ "shapeRange" : "[[1, 32], [589, 589]]",
78
  "formattedType" : "MultiArray (Float32 1 × 589)",
79
+ "type" : "MultiArray",
80
  "shape" : "[1, 589]",
81
  "name" : "weights",
82
+ "shortDescription" : ""
83
  }
84
  ],
85
  "userDefinedMetadata" : {
86
+ "com.github.apple.coremltools.conversion_date" : "2025-10-13",
87
  "com.github.apple.coremltools.source" : "torch==2.8.0",
88
  "com.github.apple.coremltools.version" : "9.0b1",
89
  "com.github.apple.coremltools.source_dialect" : "TorchScript"
Embedding.mlmodelc/model.mil CHANGED
The diff for this file is too large to render. See raw diff
 
Embedding.mlmodelc/weights/weight.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a771617a3a551b18874c78ae5f92f6f660a14e84e55c0121e6402725f88d6e2
3
- size 28304640
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dba18a57a81b1e872802ca4def29541bb7900ccff430d9b2040092cadd7d688
3
+ size 13264960
FBank.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a57eb96c61ea3470b3c509eafd8085e3a05d023219096414554700bc31e3f25
3
+ size 243
FBank.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33b0f747fa880e3446f676078fd1b33f7333db5e94cb956b321c24f14b2c12bf
3
+ size 538
FBank.mlmodelc/metadata.json ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "shortDescription" : "pyannote community-1 FBANK front-end (10 s audio -> 998 frames x 80 bins)",
4
+ "metadataOutputVersion" : "3.0",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float32",
10
+ "formattedType" : "MultiArray (Float32)",
11
+ "shortDescription" : "",
12
+ "shape" : "[]",
13
+ "name" : "fbank",
14
+ "type" : "MultiArray"
15
+ }
16
+ ],
17
+ "version" : "pyannote-speaker-diarization-community-1",
18
+ "modelParameters" : [
19
+
20
+ ],
21
+ "author" : "Fluid Inference",
22
+ "specificationVersion" : 8,
23
+ "storagePrecision" : "Float16",
24
+ "license" : "CC-BY-4.0",
25
+ "mlProgramOperationTypeHistogram" : {
26
+ "Ios17.mul" : 2,
27
+ "Ios17.transpose" : 1,
28
+ "Ios17.sub" : 2,
29
+ "Ios17.conv" : 4,
30
+ "Ios17.log" : 1,
31
+ "Ios17.sliceByIndex" : 1,
32
+ "Ios16.reduceMean" : 1,
33
+ "Ios17.add" : 1,
34
+ "Ios17.clip" : 1,
35
+ "Ios17.pow" : 2,
36
+ "Ios17.expandDims" : 3,
37
+ "Ios17.squeeze" : 4,
38
+ "Ios17.reshape" : 2,
39
+ "Ios17.cast" : 6,
40
+ "Pad" : 2
41
+ },
42
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
43
+ "stateSchema" : [
44
+
45
+ ],
46
+ "isUpdatable" : "0",
47
+ "availability" : {
48
+ "macOS" : "14.0",
49
+ "tvOS" : "17.0",
50
+ "visionOS" : "1.0",
51
+ "watchOS" : "10.0",
52
+ "iOS" : "17.0",
53
+ "macCatalyst" : "17.0"
54
+ },
55
+ "modelType" : {
56
+ "name" : "MLModelType_mlProgram"
57
+ },
58
+ "inputSchema" : [
59
+ {
60
+ "dataType" : "Float32",
61
+ "hasShapeFlexibility" : "1",
62
+ "isOptional" : "0",
63
+ "shapeFlexibility" : "1...32 × 1 × 160000",
64
+ "shapeRange" : "[[1, 32], [1, 1], [160000, 160000]]",
65
+ "formattedType" : "MultiArray (Float32 1 × 1 × 160000)",
66
+ "type" : "MultiArray",
67
+ "shape" : "[1, 1, 160000]",
68
+ "name" : "audio",
69
+ "shortDescription" : ""
70
+ }
71
+ ],
72
+ "userDefinedMetadata" : {
73
+ "com.github.apple.coremltools.conversion_date" : "2025-10-13",
74
+ "com.github.apple.coremltools.source" : "torch==2.8.0",
75
+ "com.github.apple.coremltools.version" : "9.0b1",
76
+ "com.github.apple.coremltools.source_dialect" : "TorchScript"
77
+ },
78
+ "generatedClassName" : "fbank_community_1",
79
+ "method" : "predict"
80
+ }
81
+ ]
FBank.mlmodelc/model.mil ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ program(1.0)
2
+ [buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.8.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0b1"}})]
3
+ {
4
+ func main<ios17>(tensor<fp32, [?, 1, 160000]> audio) [FlexibleShapeInformation = tuple<tuple<tensor<string, []>, dict<tensor<string, []>, tensor<int32, [?]>>>, tuple<tensor<string, []>, dict<tensor<string, []>, list<tensor<int32, [2]>, ?>>>>((("DefaultShapes", {{"audio", [1, 1, 160000]}}), ("RangeDims", {{"audio", [[1, 32], [1, 1], [160000, 160000]]}})))] {
5
+ tensor<string, []> audio_to_fp16_dtype_0 = const()[name = tensor<string, []>("audio_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
6
+ tensor<string, []> frames_1_pad_type_0 = const()[name = tensor<string, []>("frames_1_pad_type_0"), val = tensor<string, []>("valid")];
7
+ tensor<int32, [1]> frames_1_strides_0 = const()[name = tensor<string, []>("frames_1_strides_0"), val = tensor<int32, [1]>([160])];
8
+ tensor<int32, [2]> frames_1_pad_0 = const()[name = tensor<string, []>("frames_1_pad_0"), val = tensor<int32, [2]>([0, 0])];
9
+ tensor<int32, [1]> frames_1_dilations_0 = const()[name = tensor<string, []>("frames_1_dilations_0"), val = tensor<int32, [1]>([1])];
10
+ tensor<int32, []> frames_1_groups_0 = const()[name = tensor<string, []>("frames_1_groups_0"), val = tensor<int32, []>(1)];
11
+ tensor<fp16, [400, 1, 400]> frame_kernel_to_fp16 = const()[name = tensor<string, []>("frame_kernel_to_fp16"), val = tensor<fp16, [400, 1, 400]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
12
+ tensor<fp16, [?, 1, 160000]> audio_to_fp16 = cast(dtype = audio_to_fp16_dtype_0, x = audio)[name = tensor<string, []>("cast_7")];
13
+ tensor<fp16, [?, 400, 998]> frames_1_cast_fp16 = conv(dilations = frames_1_dilations_0, groups = frames_1_groups_0, pad = frames_1_pad_0, pad_type = frames_1_pad_type_0, strides = frames_1_strides_0, weight = frame_kernel_to_fp16, x = audio_to_fp16)[name = tensor<string, []>("frames_1_cast_fp16")];
14
+ tensor<int32, [3]> frames_3_perm_0 = const()[name = tensor<string, []>("frames_3_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
15
+ tensor<int32, [2]> concat_0x = const()[name = tensor<string, []>("concat_0x"), val = tensor<int32, [2]>([-1, 400])];
16
+ tensor<fp16, [?, 998, 400]> frames_3_cast_fp16 = transpose(perm = frames_3_perm_0, x = frames_1_cast_fp16)[name = tensor<string, []>("transpose_0")];
17
+ tensor<fp16, [?, 400]> frames_5_cast_fp16 = reshape(shape = concat_0x, x = frames_3_cast_fp16)[name = tensor<string, []>("frames_5_cast_fp16")];
18
+ tensor<string, []> frames_5_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("frames_5_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
19
+ tensor<int32, [1]> var_50_axes_0 = const()[name = tensor<string, []>("op_50_axes_0"), val = tensor<int32, [1]>([1])];
20
+ tensor<bool, []> var_50_keep_dims_0 = const()[name = tensor<string, []>("op_50_keep_dims_0"), val = tensor<bool, []>(true)];
21
+ tensor<fp32, [?, 400]> frames_5_cast_fp16_to_fp32 = cast(dtype = frames_5_cast_fp16_to_fp32_dtype_0, x = frames_5_cast_fp16)[name = tensor<string, []>("cast_6")];
22
+ tensor<fp32, [?, 1]> var_50 = reduce_mean(axes = var_50_axes_0, keep_dims = var_50_keep_dims_0, x = frames_5_cast_fp16_to_fp32)[name = tensor<string, []>("op_50")];
23
+ tensor<string, []> var_50_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_50_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
24
+ tensor<fp16, [?, 1]> var_50_to_fp16 = cast(dtype = var_50_to_fp16_dtype_0, x = var_50)[name = tensor<string, []>("cast_5")];
25
+ tensor<fp16, [?, 400]> frames_7_cast_fp16 = sub(x = frames_5_cast_fp16, y = var_50_to_fp16)[name = tensor<string, []>("frames_7_cast_fp16")];
26
+ tensor<int32, [1]> input_1_axes_0 = const()[name = tensor<string, []>("input_1_axes_0"), val = tensor<int32, [1]>([1])];
27
+ tensor<fp16, [?, 1, 400]> input_1_cast_fp16 = expand_dims(axes = input_1_axes_0, x = frames_7_cast_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
28
+ tensor<int32, [6]> var_60_pad_0 = const()[name = tensor<string, []>("op_60_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 1, 0])];
29
+ tensor<string, []> var_60_mode_0 = const()[name = tensor<string, []>("op_60_mode_0"), val = tensor<string, []>("replicate")];
30
+ tensor<fp16, []> const_0_to_fp16 = const()[name = tensor<string, []>("const_0_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
31
+ tensor<fp16, [?, 1, 401]> var_60_cast_fp16 = pad(constant_val = const_0_to_fp16, mode = var_60_mode_0, pad = var_60_pad_0, x = input_1_cast_fp16)[name = tensor<string, []>("op_60_cast_fp16")];
32
+ tensor<int32, [1]> padded_axes_0 = const()[name = tensor<string, []>("padded_axes_0"), val = tensor<int32, [1]>([1])];
33
+ tensor<fp16, [?, 401]> padded_cast_fp16 = squeeze(axes = padded_axes_0, x = var_60_cast_fp16)[name = tensor<string, []>("padded_cast_fp16")];
34
+ tensor<int32, [2]> var_72_begin_0 = const()[name = tensor<string, []>("op_72_begin_0"), val = tensor<int32, [2]>([0, 0])];
35
+ tensor<int32, [2]> var_72_end_0 = const()[name = tensor<string, []>("op_72_end_0"), val = tensor<int32, [2]>([0, 400])];
36
+ tensor<bool, [2]> var_72_end_mask_0 = const()[name = tensor<string, []>("op_72_end_mask_0"), val = tensor<bool, [2]>([true, false])];
37
+ tensor<fp16, [?, 400]> var_72_cast_fp16 = slice_by_index(begin = var_72_begin_0, end = var_72_end_0, end_mask = var_72_end_mask_0, x = padded_cast_fp16)[name = tensor<string, []>("op_72_cast_fp16")];
38
+ tensor<fp16, []> var_73_to_fp16 = const()[name = tensor<string, []>("op_73_to_fp16"), val = tensor<fp16, []>(0x1.f0cp-1)];
39
+ tensor<fp16, [?, 400]> var_74_cast_fp16 = mul(x = var_72_cast_fp16, y = var_73_to_fp16)[name = tensor<string, []>("op_74_cast_fp16")];
40
+ tensor<fp16, [?, 400]> frames_9_cast_fp16 = sub(x = frames_7_cast_fp16, y = var_74_cast_fp16)[name = tensor<string, []>("frames_9_cast_fp16")];
41
+ tensor<fp16, [1, 400]> window_to_fp16 = const()[name = tensor<string, []>("window_to_fp16"), val = tensor<fp16, [1, 400]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(320128)))];
42
+ tensor<fp16, [?, 400]> frames_11_cast_fp16 = mul(x = frames_9_cast_fp16, y = window_to_fp16)[name = tensor<string, []>("frames_11_cast_fp16")];
43
+ tensor<int32, [1]> input_axes_0 = const()[name = tensor<string, []>("input_axes_0"), val = tensor<int32, [1]>([1])];
44
+ tensor<fp16, [?, 1, 400]> input_cast_fp16 = expand_dims(axes = input_axes_0, x = frames_11_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
45
+ tensor<int32, [6]> var_85_pad_0 = const()[name = tensor<string, []>("op_85_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 0, 112])];
46
+ tensor<string, []> var_85_mode_0 = const()[name = tensor<string, []>("op_85_mode_0"), val = tensor<string, []>("constant")];
47
+ tensor<fp16, []> const_1_to_fp16 = const()[name = tensor<string, []>("const_1_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
48
+ tensor<fp16, [?, 1, 512]> var_85_cast_fp16 = pad(constant_val = const_1_to_fp16, mode = var_85_mode_0, pad = var_85_pad_0, x = input_cast_fp16)[name = tensor<string, []>("op_85_cast_fp16")];
49
+ tensor<string, []> var_105_pad_type_0 = const()[name = tensor<string, []>("op_105_pad_type_0"), val = tensor<string, []>("valid")];
50
+ tensor<int32, [1]> var_105_strides_0 = const()[name = tensor<string, []>("op_105_strides_0"), val = tensor<int32, [1]>([1])];
51
+ tensor<int32, [2]> var_105_pad_0 = const()[name = tensor<string, []>("op_105_pad_0"), val = tensor<int32, [2]>([0, 0])];
52
+ tensor<int32, [1]> var_105_dilations_0 = const()[name = tensor<string, []>("op_105_dilations_0"), val = tensor<int32, [1]>([1])];
53
+ tensor<int32, []> var_105_groups_0 = const()[name = tensor<string, []>("op_105_groups_0"), val = tensor<int32, []>(1)];
54
+ tensor<fp16, [257, 1, 512]> dft_real_weight_to_fp16 = const()[name = tensor<string, []>("dft_real_weight_to_fp16"), val = tensor<fp16, [257, 1, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(321024)))];
55
+ tensor<fp16, [?, 257, 1]> var_105_cast_fp16 = conv(dilations = var_105_dilations_0, groups = var_105_groups_0, pad = var_105_pad_0, pad_type = var_105_pad_type_0, strides = var_105_strides_0, weight = dft_real_weight_to_fp16, x = var_85_cast_fp16)[name = tensor<string, []>("op_105_cast_fp16")];
56
+ tensor<int32, [1]> real_axes_0 = const()[name = tensor<string, []>("real_axes_0"), val = tensor<int32, [1]>([-1])];
57
+ tensor<fp16, [?, 257]> real_cast_fp16 = squeeze(axes = real_axes_0, x = var_105_cast_fp16)[name = tensor<string, []>("real_cast_fp16")];
58
+ tensor<string, []> var_123_pad_type_0 = const()[name = tensor<string, []>("op_123_pad_type_0"), val = tensor<string, []>("valid")];
59
+ tensor<int32, [1]> var_123_strides_0 = const()[name = tensor<string, []>("op_123_strides_0"), val = tensor<int32, [1]>([1])];
60
+ tensor<int32, [2]> var_123_pad_0 = const()[name = tensor<string, []>("op_123_pad_0"), val = tensor<int32, [2]>([0, 0])];
61
+ tensor<int32, [1]> var_123_dilations_0 = const()[name = tensor<string, []>("op_123_dilations_0"), val = tensor<int32, [1]>([1])];
62
+ tensor<int32, []> var_123_groups_0 = const()[name = tensor<string, []>("op_123_groups_0"), val = tensor<int32, []>(1)];
63
+ tensor<fp16, [257, 1, 512]> dft_imag_weight_to_fp16 = const()[name = tensor<string, []>("dft_imag_weight_to_fp16"), val = tensor<fp16, [257, 1, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(584256)))];
64
+ tensor<fp16, [?, 257, 1]> var_123_cast_fp16 = conv(dilations = var_123_dilations_0, groups = var_123_groups_0, pad = var_123_pad_0, pad_type = var_123_pad_type_0, strides = var_123_strides_0, weight = dft_imag_weight_to_fp16, x = var_85_cast_fp16)[name = tensor<string, []>("op_123_cast_fp16")];
65
+ tensor<int32, [1]> imag_axes_0 = const()[name = tensor<string, []>("imag_axes_0"), val = tensor<int32, [1]>([-1])];
66
+ tensor<fp16, [?, 257]> imag_cast_fp16 = squeeze(axes = imag_axes_0, x = var_123_cast_fp16)[name = tensor<string, []>("imag_cast_fp16")];
67
+ tensor<fp16, []> var_126_promoted_to_fp16 = const()[name = tensor<string, []>("op_126_promoted_to_fp16"), val = tensor<fp16, []>(0x1p+1)];
68
+ tensor<fp16, [?, 257]> var_127_cast_fp16 = pow(x = real_cast_fp16, y = var_126_promoted_to_fp16)[name = tensor<string, []>("op_127_cast_fp16")];
69
+ tensor<fp16, []> var_128_promoted_to_fp16 = const()[name = tensor<string, []>("op_128_promoted_to_fp16"), val = tensor<fp16, []>(0x1p+1)];
70
+ tensor<fp16, [?, 257]> var_129_cast_fp16 = pow(x = imag_cast_fp16, y = var_128_promoted_to_fp16)[name = tensor<string, []>("op_129_cast_fp16")];
71
+ tensor<fp16, [?, 257]> power_cast_fp16 = add(x = var_127_cast_fp16, y = var_129_cast_fp16)[name = tensor<string, []>("power_cast_fp16")];
72
+ tensor<int32, [1]> var_133_axes_0 = const()[name = tensor<string, []>("op_133_axes_0"), val = tensor<int32, [1]>([-1])];
73
+ tensor<fp16, [?, 257, 1]> var_133_cast_fp16 = expand_dims(axes = var_133_axes_0, x = power_cast_fp16)[name = tensor<string, []>("op_133_cast_fp16")];
74
+ tensor<string, []> var_149_pad_type_0 = const()[name = tensor<string, []>("op_149_pad_type_0"), val = tensor<string, []>("valid")];
75
+ tensor<int32, [1]> var_149_strides_0 = const()[name = tensor<string, []>("op_149_strides_0"), val = tensor<int32, [1]>([1])];
76
+ tensor<int32, [2]> var_149_pad_0 = const()[name = tensor<string, []>("op_149_pad_0"), val = tensor<int32, [2]>([0, 0])];
77
+ tensor<int32, [1]> var_149_dilations_0 = const()[name = tensor<string, []>("op_149_dilations_0"), val = tensor<int32, [1]>([1])];
78
+ tensor<int32, []> var_149_groups_0 = const()[name = tensor<string, []>("op_149_groups_0"), val = tensor<int32, []>(1)];
79
+ tensor<fp16, [80, 257, 1]> mel_weight_to_fp16 = const()[name = tensor<string, []>("mel_weight_to_fp16"), val = tensor<fp16, [80, 257, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(847488)))];
80
+ tensor<fp16, [?, 80, 1]> var_149_cast_fp16 = conv(dilations = var_149_dilations_0, groups = var_149_groups_0, pad = var_149_pad_0, pad_type = var_149_pad_type_0, strides = var_149_strides_0, weight = mel_weight_to_fp16, x = var_133_cast_fp16)[name = tensor<string, []>("op_149_cast_fp16")];
81
+ tensor<int32, [1]> mel_1_axes_0 = const()[name = tensor<string, []>("mel_1_axes_0"), val = tensor<int32, [1]>([-1])];
82
+ tensor<fp16, [?, 80]> mel_1_cast_fp16 = squeeze(axes = mel_1_axes_0, x = var_149_cast_fp16)[name = tensor<string, []>("mel_1_cast_fp16")];
83
+ tensor<fp16, []> eps_to_fp16 = const()[name = tensor<string, []>("eps_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
84
+ tensor<fp16, []> const_2_to_fp16 = const()[name = tensor<string, []>("const_2_to_fp16"), val = tensor<fp16, []>(inf)];
85
+ tensor<fp16, [?, 80]> clip_0_cast_fp16 = clip(alpha = eps_to_fp16, beta = const_2_to_fp16, x = mel_1_cast_fp16)[name = tensor<string, []>("clip_0_cast_fp16")];
86
+ tensor<string, []> clip_0_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("clip_0_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
87
+ tensor<fp32, []> mel_epsilon_0 = const()[name = tensor<string, []>("mel_epsilon_0"), val = tensor<fp32, []>(0x1p-149)];
88
+ tensor<fp32, [?, 80]> clip_0_cast_fp16_to_fp32 = cast(dtype = clip_0_cast_fp16_to_fp32_dtype_0, x = clip_0_cast_fp16)[name = tensor<string, []>("cast_4")];
89
+ tensor<fp32, [?, 80]> mel = log(epsilon = mel_epsilon_0, x = clip_0_cast_fp16_to_fp32)[name = tensor<string, []>("mel")];
90
+ tensor<int32, [3]> concat_1x = const()[name = tensor<string, []>("concat_1x"), val = tensor<int32, [3]>([-1, 998, 80])];
91
+ tensor<string, []> mel_to_fp16_dtype_0 = const()[name = tensor<string, []>("mel_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
92
+ tensor<fp16, [?, 80]> mel_to_fp16 = cast(dtype = mel_to_fp16_dtype_0, x = mel)[name = tensor<string, []>("cast_3")];
93
+ tensor<fp16, [?, 998, 80]> var_157_cast_fp16 = reshape(shape = concat_1x, x = mel_to_fp16)[name = tensor<string, []>("op_157_cast_fp16")];
94
+ tensor<string, []> var_157_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("op_157_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
95
+ tensor<fp32, [?, 998, 80]> fbank = cast(dtype = var_157_cast_fp16_to_fp32_dtype_0, x = var_157_cast_fp16)[name = tensor<string, []>("cast_2")];
96
+ } -> (fbank);
97
+ }
FBank.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d992fbcd8d26540cfcb291d86417bf9bd2c94ac15295c8ff70b3b93ccd5158ed
3
+ size 888672