aufklarer commited on
Commit
745e9da
·
verified ·
1 Parent(s): d0595cb

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "numMelBins": 128,
3
+ "sampleRate": 16000,
4
+ "nFFT": 512,
5
+ "hopLength": 160,
6
+ "winLength": 400,
7
+ "preEmphasis": 0.97,
8
+ "encoderHidden": 1024,
9
+ "encoderLayers": 24,
10
+ "subsamplingFactor": 8,
11
+ "decoderHidden": 640,
12
+ "decoderLayers": 2,
13
+ "vocabSize": 8192,
14
+ "blankTokenId": 8192,
15
+ "numDurationBins": 5,
16
+ "durationBins": [
17
+ 0,
18
+ 1,
19
+ 2,
20
+ 3,
21
+ 4
22
+ ]
23
+ }
decoder.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26704839077423b097e5158bfd70ccb1fb08e9c9479830b94c38905923baab7d
3
+ size 243
decoder.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffe814e082d90c2f2da76d508550f55b9a29e49a25ef8a1ad77f18808b76f1f1
3
+ size 402
decoder.mlmodelc/metadata.json ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Float16",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float16",
10
+ "formattedType" : "MultiArray (Float16 1 × 1 × 640)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 1, 640]",
13
+ "name" : "decoder_output",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 2 × 1 × 640)",
21
+ "shortDescription" : "",
22
+ "shape" : "[2, 1, 640]",
23
+ "name" : "h_out",
24
+ "type" : "MultiArray"
25
+ },
26
+ {
27
+ "hasShapeFlexibility" : "0",
28
+ "isOptional" : "0",
29
+ "dataType" : "Float16",
30
+ "formattedType" : "MultiArray (Float16 2 × 1 × 640)",
31
+ "shortDescription" : "",
32
+ "shape" : "[2, 1, 640]",
33
+ "name" : "c_out",
34
+ "type" : "MultiArray"
35
+ }
36
+ ],
37
+ "modelParameters" : [
38
+
39
+ ],
40
+ "specificationVersion" : 8,
41
+ "mlProgramOperationTypeHistogram" : {
42
+ "Ios17.squeeze" : 4,
43
+ "Ios17.gather" : 1,
44
+ "Ios17.cast" : 1,
45
+ "Ios17.lstm" : 2,
46
+ "Split" : 2,
47
+ "Ios17.transpose" : 2,
48
+ "Stack" : 2
49
+ },
50
+ "computePrecision" : "Mixed (Float16, Int16, Int32)",
51
+ "isUpdatable" : "0",
52
+ "stateSchema" : [
53
+
54
+ ],
55
+ "availability" : {
56
+ "macOS" : "14.0",
57
+ "tvOS" : "17.0",
58
+ "visionOS" : "1.0",
59
+ "watchOS" : "10.0",
60
+ "iOS" : "17.0",
61
+ "macCatalyst" : "17.0"
62
+ },
63
+ "modelType" : {
64
+ "name" : "MLModelType_mlProgram"
65
+ },
66
+ "userDefinedMetadata" : {
67
+ "com.github.apple.coremltools.source_dialect" : "TorchScript",
68
+ "com.github.apple.coremltools.version" : "8.1",
69
+ "com.github.apple.coremltools.source" : "torch==2.10.0"
70
+ },
71
+ "inputSchema" : [
72
+ {
73
+ "hasShapeFlexibility" : "0",
74
+ "isOptional" : "0",
75
+ "dataType" : "Int32",
76
+ "formattedType" : "MultiArray (Int32 1 × 1)",
77
+ "shortDescription" : "",
78
+ "shape" : "[1, 1]",
79
+ "name" : "token",
80
+ "type" : "MultiArray"
81
+ },
82
+ {
83
+ "hasShapeFlexibility" : "0",
84
+ "isOptional" : "0",
85
+ "dataType" : "Float16",
86
+ "formattedType" : "MultiArray (Float16 2 × 1 × 640)",
87
+ "shortDescription" : "",
88
+ "shape" : "[2, 1, 640]",
89
+ "name" : "h",
90
+ "type" : "MultiArray"
91
+ },
92
+ {
93
+ "hasShapeFlexibility" : "0",
94
+ "isOptional" : "0",
95
+ "dataType" : "Float16",
96
+ "formattedType" : "MultiArray (Float16 2 × 1 × 640)",
97
+ "shortDescription" : "",
98
+ "shape" : "[2, 1, 640]",
99
+ "name" : "c",
100
+ "type" : "MultiArray"
101
+ }
102
+ ],
103
+ "generatedClassName" : "decoder",
104
+ "method" : "predict"
105
+ }
106
+ ]
decoder.mlmodelc/model.mil ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ program(1.0)
2
+ [buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.10.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.1"}})]
3
+ {
4
+ func main<ios17>(tensor<fp16, [2, 1, 640]> c, tensor<fp16, [2, 1, 640]> h, tensor<int32, [1, 1]> token) {
5
+ tensor<int32, []> y_1_axis_0 = const()[name = tensor<string, []>("y_1_axis_0"), val = tensor<int32, []>(0)];
6
+ tensor<int32, []> y_1_batch_dims_0 = const()[name = tensor<string, []>("y_1_batch_dims_0"), val = tensor<int32, []>(0)];
7
+ tensor<bool, []> y_1_validate_indices_0 = const()[name = tensor<string, []>("y_1_validate_indices_0"), val = tensor<bool, []>(false)];
8
+ tensor<fp16, [8193, 640]> decoder_prediction_embed_weight_to_fp16 = const()[name = tensor<string, []>("decoder_prediction_embed_weight_to_fp16"), val = tensor<fp16, [8193, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
9
+ tensor<string, []> token_to_int16_dtype_0 = const()[name = tensor<string, []>("token_to_int16_dtype_0"), val = tensor<string, []>("int16")];
10
+ tensor<int16, [1, 1]> token_to_int16 = cast(dtype = token_to_int16_dtype_0, x = token)[name = tensor<string, []>("cast_6")];
11
+ tensor<fp16, [1, 1, 640]> y_1_cast_fp16_cast_uint16 = gather(axis = y_1_axis_0, batch_dims = y_1_batch_dims_0, indices = token_to_int16, validate_indices = y_1_validate_indices_0, x = decoder_prediction_embed_weight_to_fp16)[name = tensor<string, []>("y_1_cast_fp16_cast_uint16")];
12
+ tensor<int32, [3]> input_1_perm_0 = const()[name = tensor<string, []>("input_1_perm_0"), val = tensor<int32, [3]>([1, 0, 2])];
13
+ tensor<int32, []> split_0_num_splits_0 = const()[name = tensor<string, []>("split_0_num_splits_0"), val = tensor<int32, []>(2)];
14
+ tensor<int32, []> split_0_axis_0 = const()[name = tensor<string, []>("split_0_axis_0"), val = tensor<int32, []>(0)];
15
+ tensor<fp16, [1, 1, 640]> split_0_cast_fp16_0, tensor<fp16, [1, 1, 640]> split_0_cast_fp16_1 = split(axis = split_0_axis_0, num_splits = split_0_num_splits_0, x = h)[name = tensor<string, []>("split_0_cast_fp16")];
16
+ tensor<int32, []> split_1_num_splits_0 = const()[name = tensor<string, []>("split_1_num_splits_0"), val = tensor<int32, []>(2)];
17
+ tensor<int32, []> split_1_axis_0 = const()[name = tensor<string, []>("split_1_axis_0"), val = tensor<int32, []>(0)];
18
+ tensor<fp16, [1, 1, 640]> split_1_cast_fp16_0, tensor<fp16, [1, 1, 640]> split_1_cast_fp16_1 = split(axis = split_1_axis_0, num_splits = split_1_num_splits_0, x = c)[name = tensor<string, []>("split_1_cast_fp16")];
19
+ tensor<int32, [1]> input0_1_lstm_layer_0_lstm_h0_squeeze_axes_0 = const()[name = tensor<string, []>("input0_1_lstm_layer_0_lstm_h0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
20
+ tensor<fp16, [1, 640]> input0_1_lstm_layer_0_lstm_h0_squeeze_cast_fp16 = squeeze(axes = input0_1_lstm_layer_0_lstm_h0_squeeze_axes_0, x = split_0_cast_fp16_0)[name = tensor<string, []>("input0_1_lstm_layer_0_lstm_h0_squeeze_cast_fp16")];
21
+ tensor<int32, [1]> input0_1_lstm_layer_0_lstm_c0_squeeze_axes_0 = const()[name = tensor<string, []>("input0_1_lstm_layer_0_lstm_c0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
22
+ tensor<fp16, [1, 640]> input0_1_lstm_layer_0_lstm_c0_squeeze_cast_fp16 = squeeze(axes = input0_1_lstm_layer_0_lstm_c0_squeeze_axes_0, x = split_1_cast_fp16_0)[name = tensor<string, []>("input0_1_lstm_layer_0_lstm_c0_squeeze_cast_fp16")];
23
+ tensor<string, []> input0_1_lstm_layer_0_direction_0 = const()[name = tensor<string, []>("input0_1_lstm_layer_0_direction_0"), val = tensor<string, []>("forward")];
24
+ tensor<bool, []> input0_1_lstm_layer_0_output_sequence_0 = const()[name = tensor<string, []>("input0_1_lstm_layer_0_output_sequence_0"), val = tensor<bool, []>(true)];
25
+ tensor<string, []> input0_1_lstm_layer_0_recurrent_activation_0 = const()[name = tensor<string, []>("input0_1_lstm_layer_0_recurrent_activation_0"), val = tensor<string, []>("sigmoid")];
26
+ tensor<string, []> input0_1_lstm_layer_0_cell_activation_0 = const()[name = tensor<string, []>("input0_1_lstm_layer_0_cell_activation_0"), val = tensor<string, []>("tanh")];
27
+ tensor<string, []> input0_1_lstm_layer_0_activation_0 = const()[name = tensor<string, []>("input0_1_lstm_layer_0_activation_0"), val = tensor<string, []>("tanh")];
28
+ tensor<fp16, [2560, 640]> concat_1_to_fp16 = const()[name = tensor<string, []>("concat_1_to_fp16"), val = tensor<fp16, [2560, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10487168)))];
29
+ tensor<fp16, [2560, 640]> concat_2_to_fp16 = const()[name = tensor<string, []>("concat_2_to_fp16"), val = tensor<fp16, [2560, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13764032)))];
30
+ tensor<fp16, [2560]> concat_0_to_fp16 = const()[name = tensor<string, []>("concat_0_to_fp16"), val = tensor<fp16, [2560]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17040896)))];
31
+ tensor<fp16, [1, 1, 640]> input_1_cast_fp16 = transpose(perm = input_1_perm_0, x = y_1_cast_fp16_cast_uint16)[name = tensor<string, []>("transpose_1")];
32
+ tensor<fp16, [1, 1, 640]> input0_1_lstm_layer_0_cast_fp16_0, tensor<fp16, [1, 640]> input0_1_lstm_layer_0_cast_fp16_1, tensor<fp16, [1, 640]> input0_1_lstm_layer_0_cast_fp16_2 = lstm(activation = input0_1_lstm_layer_0_activation_0, bias = concat_0_to_fp16, cell_activation = input0_1_lstm_layer_0_cell_activation_0, direction = input0_1_lstm_layer_0_direction_0, initial_c = input0_1_lstm_layer_0_lstm_c0_squeeze_cast_fp16, initial_h = input0_1_lstm_layer_0_lstm_h0_squeeze_cast_fp16, output_sequence = input0_1_lstm_layer_0_output_sequence_0, recurrent_activation = input0_1_lstm_layer_0_recurrent_activation_0, weight_hh = concat_2_to_fp16, weight_ih = concat_1_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("input0_1_lstm_layer_0_cast_fp16")];
33
+ tensor<int32, [1]> input0_1_lstm_h0_squeeze_axes_0 = const()[name = tensor<string, []>("input0_1_lstm_h0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
34
+ tensor<fp16, [1, 640]> input0_1_lstm_h0_squeeze_cast_fp16 = squeeze(axes = input0_1_lstm_h0_squeeze_axes_0, x = split_0_cast_fp16_1)[name = tensor<string, []>("input0_1_lstm_h0_squeeze_cast_fp16")];
35
+ tensor<int32, [1]> input0_1_lstm_c0_squeeze_axes_0 = const()[name = tensor<string, []>("input0_1_lstm_c0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
36
+ tensor<fp16, [1, 640]> input0_1_lstm_c0_squeeze_cast_fp16 = squeeze(axes = input0_1_lstm_c0_squeeze_axes_0, x = split_1_cast_fp16_1)[name = tensor<string, []>("input0_1_lstm_c0_squeeze_cast_fp16")];
37
+ tensor<string, []> input0_1_direction_0 = const()[name = tensor<string, []>("input0_1_direction_0"), val = tensor<string, []>("forward")];
38
+ tensor<bool, []> input0_1_output_sequence_0 = const()[name = tensor<string, []>("input0_1_output_sequence_0"), val = tensor<bool, []>(true)];
39
+ tensor<string, []> input0_1_recurrent_activation_0 = const()[name = tensor<string, []>("input0_1_recurrent_activation_0"), val = tensor<string, []>("sigmoid")];
40
+ tensor<string, []> input0_1_cell_activation_0 = const()[name = tensor<string, []>("input0_1_cell_activation_0"), val = tensor<string, []>("tanh")];
41
+ tensor<string, []> input0_1_activation_0 = const()[name = tensor<string, []>("input0_1_activation_0"), val = tensor<string, []>("tanh")];
42
+ tensor<fp16, [2560, 640]> concat_4_to_fp16 = const()[name = tensor<string, []>("concat_4_to_fp16"), val = tensor<fp16, [2560, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17046080)))];
43
+ tensor<fp16, [2560, 640]> concat_5_to_fp16 = const()[name = tensor<string, []>("concat_5_to_fp16"), val = tensor<fp16, [2560, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20322944)))];
44
+ tensor<fp16, [2560]> concat_3_to_fp16 = const()[name = tensor<string, []>("concat_3_to_fp16"), val = tensor<fp16, [2560]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23599808)))];
45
+ tensor<fp16, [1, 1, 640]> input0_1_cast_fp16_0, tensor<fp16, [1, 640]> input0_1_cast_fp16_1, tensor<fp16, [1, 640]> input0_1_cast_fp16_2 = lstm(activation = input0_1_activation_0, bias = concat_3_to_fp16, cell_activation = input0_1_cell_activation_0, direction = input0_1_direction_0, initial_c = input0_1_lstm_c0_squeeze_cast_fp16, initial_h = input0_1_lstm_h0_squeeze_cast_fp16, output_sequence = input0_1_output_sequence_0, recurrent_activation = input0_1_recurrent_activation_0, weight_hh = concat_5_to_fp16, weight_ih = concat_4_to_fp16, x = input0_1_lstm_layer_0_cast_fp16_0)[name = tensor<string, []>("input0_1_cast_fp16")];
46
+ tensor<int32, []> var_33_axis_0 = const()[name = tensor<string, []>("op_33_axis_0"), val = tensor<int32, []>(0)];
47
+ tensor<fp16, [2, 1, 640]> h_out = stack(axis = var_33_axis_0, values = (input0_1_lstm_layer_0_cast_fp16_1, input0_1_cast_fp16_1))[name = tensor<string, []>("op_33_cast_fp16")];
48
+ tensor<int32, []> var_34_axis_0 = const()[name = tensor<string, []>("op_34_axis_0"), val = tensor<int32, []>(0)];
49
+ tensor<fp16, [2, 1, 640]> c_out = stack(axis = var_34_axis_0, values = (input0_1_lstm_layer_0_cast_fp16_2, input0_1_cast_fp16_2))[name = tensor<string, []>("op_34_cast_fp16")];
50
+ tensor<int32, [3]> var_44_perm_0 = const()[name = tensor<string, []>("op_44_perm_0"), val = tensor<int32, [3]>([1, 0, 2])];
51
+ tensor<fp16, [1, 1, 640]> decoder_output = transpose(perm = var_44_perm_0, x = input0_1_cast_fp16_0)[name = tensor<string, []>("transpose_0")];
52
+ } -> (decoder_output, h_out, c_out);
53
+ }
decoder.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48adf0f0d47c406c8253d4f7fef967436a39da14f5a65e66d5a4b407be355d41
3
+ size 23604992
encoder.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d93192620b57bb9f779dc153a2dad5e041f188163c13c54623598ab5136c3cff
3
+ size 243
encoder.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:280f7c3bb4c97a759eea3d1db143c178702a542bb43b9481ff4774feb1a4af0f
3
+ size 420
encoder.mlmodelc/metadata.json ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Mixed (Float16, Palettized (8 bits))",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float16",
10
+ "formattedType" : "MultiArray (Float16)",
11
+ "shortDescription" : "",
12
+ "shape" : "[]",
13
+ "name" : "encoded",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Int32",
20
+ "formattedType" : "MultiArray (Int32 1)",
21
+ "shortDescription" : "",
22
+ "shape" : "[1]",
23
+ "name" : "encoded_length",
24
+ "type" : "MultiArray"
25
+ }
26
+ ],
27
+ "modelParameters" : [
28
+
29
+ ],
30
+ "specificationVersion" : 8,
31
+ "mlProgramOperationTypeHistogram" : {
32
+ "Range1d" : 5,
33
+ "Fill" : 1,
34
+ "Ios17.reshape" : 169,
35
+ "Ios17.logicalAnd" : 2,
36
+ "Ios16.softmax" : 24,
37
+ "Ios17.matmul" : 72,
38
+ "Ios17.transpose" : 195,
39
+ "Split" : 24,
40
+ "Ios17.expandDims" : 22,
41
+ "Select" : 72,
42
+ "Ios17.add" : 178,
43
+ "Tile" : 18,
44
+ "Ios17.sliceByIndex" : 51,
45
+ "Ios16.sigmoid" : 24,
46
+ "Ios17.squeeze" : 2,
47
+ "Shape" : 119,
48
+ "Ios17.gather" : 164,
49
+ "Ios17.logicalNot" : 2,
50
+ "Ios17.layerNorm" : 120,
51
+ "Pad" : 48,
52
+ "Ios17.less" : 5,
53
+ "Ios17.sub" : 5,
54
+ "Ios16.constexprLutToDense" : 295,
55
+ "Ios17.conv" : 77,
56
+ "Ios17.realDiv" : 17,
57
+ "Ios17.linear" : 217,
58
+ "Ios17.concat" : 144,
59
+ "Ios17.floorDiv" : 3,
60
+ "Ios16.relu" : 3,
61
+ "Ios17.cast" : 285,
62
+ "Ios16.silu" : 72,
63
+ "Ios17.mul" : 105
64
+ },
65
+ "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)",
66
+ "isUpdatable" : "0",
67
+ "stateSchema" : [
68
+
69
+ ],
70
+ "availability" : {
71
+ "macOS" : "14.0",
72
+ "tvOS" : "17.0",
73
+ "visionOS" : "1.0",
74
+ "watchOS" : "10.0",
75
+ "iOS" : "17.0",
76
+ "macCatalyst" : "17.0"
77
+ },
78
+ "modelType" : {
79
+ "name" : "MLModelType_mlProgram"
80
+ },
81
+ "userDefinedMetadata" : {
82
+ "com.github.apple.coremltools.source_dialect" : "TorchScript",
83
+ "com.github.apple.coremltools.source" : "torch==2.10.0",
84
+ "com.github.apple.coremltools.version" : "8.1"
85
+ },
86
+ "inputSchema" : [
87
+ {
88
+ "shortDescription" : "",
89
+ "dataType" : "Float32",
90
+ "hasShapeFlexibility" : "1",
91
+ "isOptional" : "0",
92
+ "shapeFlexibility" : "1 × 128 × 100 | 1 × 128 × 200 | 1 × 128 × 300 | 1 × 128 × 400 | 1 × 128 × 500 | 1 × 128 × 750 | 1 × 128 × 1000",
93
+ "formattedType" : "MultiArray (Float32 1 × 128 × 100)",
94
+ "type" : "MultiArray",
95
+ "shape" : "[1, 128, 100]",
96
+ "name" : "mel",
97
+ "enumeratedShapes" : "[[1, 128, 100], [1, 128, 200], [1, 128, 300], [1, 128, 400], [1, 128, 500], [1, 128, 750], [1, 128, 1000]]"
98
+ },
99
+ {
100
+ "hasShapeFlexibility" : "0",
101
+ "isOptional" : "0",
102
+ "dataType" : "Int32",
103
+ "formattedType" : "MultiArray (Int32 1)",
104
+ "shortDescription" : "",
105
+ "shape" : "[1]",
106
+ "name" : "length",
107
+ "type" : "MultiArray"
108
+ }
109
+ ],
110
+ "generatedClassName" : "encoder",
111
+ "method" : "predict"
112
+ }
113
+ ]
encoder.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
encoder.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ea8ea0f0199b1f808f8cc4c5b53f45f51595c2dfdac4a959a644988a6a311c4
3
+ size 619635392
joint.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c28d7854e47ae60503fe9691b3a7c30589bb27025cbdd3bcf05090ca058ab3e2
3
+ size 243
joint.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:937aa32b7a96fa9df9d09d619ca06662f75985cdcd29a7f3da40b46b0f592996
3
+ size 391
joint.mlmodelc/metadata.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Float16",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float16",
10
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8193)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 1, 8193]",
13
+ "name" : "token_logits",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 1 × 1 × 5)",
21
+ "shortDescription" : "",
22
+ "shape" : "[1, 1, 5]",
23
+ "name" : "duration_logits",
24
+ "type" : "MultiArray"
25
+ }
26
+ ],
27
+ "modelParameters" : [
28
+
29
+ ],
30
+ "specificationVersion" : 8,
31
+ "mlProgramOperationTypeHistogram" : {
32
+ "Ios17.squeeze" : 1,
33
+ "Ios17.log" : 1,
34
+ "Ios17.linear" : 3,
35
+ "Ios17.add" : 1,
36
+ "Ios16.relu" : 1,
37
+ "Ios16.softmax" : 1,
38
+ "Ios17.sliceByIndex" : 2,
39
+ "Ios17.expandDims" : 2
40
+ },
41
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
42
+ "isUpdatable" : "0",
43
+ "stateSchema" : [
44
+
45
+ ],
46
+ "availability" : {
47
+ "macOS" : "14.0",
48
+ "tvOS" : "17.0",
49
+ "visionOS" : "1.0",
50
+ "watchOS" : "10.0",
51
+ "iOS" : "17.0",
52
+ "macCatalyst" : "17.0"
53
+ },
54
+ "modelType" : {
55
+ "name" : "MLModelType_mlProgram"
56
+ },
57
+ "userDefinedMetadata" : {
58
+ "com.github.apple.coremltools.version" : "8.1",
59
+ "com.github.apple.coremltools.source_dialect" : "TorchScript",
60
+ "com.github.apple.coremltools.source" : "torch==2.10.0"
61
+ },
62
+ "inputSchema" : [
63
+ {
64
+ "hasShapeFlexibility" : "0",
65
+ "isOptional" : "0",
66
+ "dataType" : "Float16",
67
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1024)",
68
+ "shortDescription" : "",
69
+ "shape" : "[1, 1, 1024]",
70
+ "name" : "encoder_output",
71
+ "type" : "MultiArray"
72
+ },
73
+ {
74
+ "hasShapeFlexibility" : "0",
75
+ "isOptional" : "0",
76
+ "dataType" : "Float16",
77
+ "formattedType" : "MultiArray (Float16 1 × 1 × 640)",
78
+ "shortDescription" : "",
79
+ "shape" : "[1, 1, 640]",
80
+ "name" : "decoder_output",
81
+ "type" : "MultiArray"
82
+ }
83
+ ],
84
+ "generatedClassName" : "joint",
85
+ "method" : "predict"
86
+ }
87
+ ]
joint.mlmodelc/model.mil ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ program(1.0)
2
+ [buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.10.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.1"}})]
3
+ {
4
+ func main<ios17>(tensor<fp16, [1, 1, 640]> decoder_output, tensor<fp16, [1, 1, 1024]> encoder_output) {
5
+ tensor<int32, []> var_6 = const()[name = tensor<string, []>("op_6"), val = tensor<int32, []>(-1)];
6
+ tensor<fp16, [640, 1024]> joint_enc_weight_to_fp16 = const()[name = tensor<string, []>("joint_enc_weight_to_fp16"), val = tensor<fp16, [640, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
7
+ tensor<fp16, [640]> joint_enc_bias_to_fp16 = const()[name = tensor<string, []>("joint_enc_bias_to_fp16"), val = tensor<fp16, [640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1310848)))];
8
+ tensor<fp16, [1, 1, 640]> linear_0_cast_fp16 = linear(bias = joint_enc_bias_to_fp16, weight = joint_enc_weight_to_fp16, x = encoder_output)[name = tensor<string, []>("linear_0_cast_fp16")];
9
+ tensor<fp16, [640, 640]> joint_pred_weight_to_fp16 = const()[name = tensor<string, []>("joint_pred_weight_to_fp16"), val = tensor<fp16, [640, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1312192)))];
10
+ tensor<fp16, [640]> joint_pred_bias_to_fp16 = const()[name = tensor<string, []>("joint_pred_bias_to_fp16"), val = tensor<fp16, [640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2131456)))];
11
+ tensor<fp16, [1, 1, 640]> linear_1_cast_fp16 = linear(bias = joint_pred_bias_to_fp16, weight = joint_pred_weight_to_fp16, x = decoder_output)[name = tensor<string, []>("linear_1_cast_fp16")];
12
+ tensor<int32, [1]> f_3_axes_0 = const()[name = tensor<string, []>("f_3_axes_0"), val = tensor<int32, [1]>([2])];
13
+ tensor<fp16, [1, 1, 1, 640]> f_3_cast_fp16 = expand_dims(axes = f_3_axes_0, x = linear_0_cast_fp16)[name = tensor<string, []>("f_3_cast_fp16")];
14
+ tensor<int32, [1]> g_3_axes_0 = const()[name = tensor<string, []>("g_3_axes_0"), val = tensor<int32, [1]>([1])];
15
+ tensor<fp16, [1, 1, 1, 640]> g_3_cast_fp16 = expand_dims(axes = g_3_axes_0, x = linear_1_cast_fp16)[name = tensor<string, []>("g_3_cast_fp16")];
16
+ tensor<fp16, [1, 1, 1, 640]> input_3_cast_fp16 = add(x = f_3_cast_fp16, y = g_3_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
17
+ tensor<fp16, [1, 1, 1, 640]> var_28_cast_fp16 = relu(x = input_3_cast_fp16)[name = tensor<string, []>("op_28_cast_fp16")];
18
+ tensor<fp16, [8198, 640]> joint_joint_net_2_weight_to_fp16 = const()[name = tensor<string, []>("joint_joint_net_2_weight_to_fp16"), val = tensor<fp16, [8198, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2132800)))];
19
+ tensor<fp16, [8198]> joint_joint_net_2_bias_to_fp16 = const()[name = tensor<string, []>("joint_joint_net_2_bias_to_fp16"), val = tensor<fp16, [8198]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12626304)))];
20
+ tensor<fp16, [1, 1, 1, 8198]> linear_2_cast_fp16 = linear(bias = joint_joint_net_2_bias_to_fp16, weight = joint_joint_net_2_weight_to_fp16, x = var_28_cast_fp16)[name = tensor<string, []>("linear_2_cast_fp16")];
21
+ tensor<fp16, [1, 1, 1, 8198]> combined_1_softmax_cast_fp16 = softmax(axis = var_6, x = linear_2_cast_fp16)[name = tensor<string, []>("combined_1_softmax_cast_fp16")];
22
+ tensor<fp32, []> combined_1_epsilon_0 = const()[name = tensor<string, []>("combined_1_epsilon_0"), val = tensor<fp32, []>(0x1p-149)];
23
+ tensor<fp16, [1, 1, 1, 8198]> combined_1_cast_fp16 = log(epsilon = combined_1_epsilon_0, x = combined_1_softmax_cast_fp16)[name = tensor<string, []>("combined_1_cast_fp16")];
24
+ tensor<int32, [1]> combined0_1_axes_0 = const()[name = tensor<string, []>("combined0_1_axes_0"), val = tensor<int32, [1]>([2])];
25
+ tensor<fp16, [1, 1, 8198]> combined0_1_cast_fp16 = squeeze(axes = combined0_1_axes_0, x = combined_1_cast_fp16)[name = tensor<string, []>("combined0_1_cast_fp16")];
26
+ tensor<int32, [3]> var_35_begin_0 = const()[name = tensor<string, []>("op_35_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
27
+ tensor<int32, [3]> var_35_end_0 = const()[name = tensor<string, []>("op_35_end_0"), val = tensor<int32, [3]>([1, 1, 8193])];
28
+ tensor<bool, [3]> var_35_end_mask_0 = const()[name = tensor<string, []>("op_35_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
29
+ tensor<fp16, [1, 1, 8193]> token_logits = slice_by_index(begin = var_35_begin_0, end = var_35_end_0, end_mask = var_35_end_mask_0, x = combined0_1_cast_fp16)[name = tensor<string, []>("op_35_cast_fp16")];
30
+ tensor<int32, [3]> var_36_begin_0 = const()[name = tensor<string, []>("op_36_begin_0"), val = tensor<int32, [3]>([0, 0, 8193])];
31
+ tensor<int32, [3]> var_36_end_0 = const()[name = tensor<string, []>("op_36_end_0"), val = tensor<int32, [3]>([1, 1, 8198])];
32
+ tensor<bool, [3]> var_36_end_mask_0 = const()[name = tensor<string, []>("op_36_end_mask_0"), val = tensor<bool, [3]>([true, true, true])];
33
+ tensor<fp16, [1, 1, 5]> duration_logits = slice_by_index(begin = var_36_begin_0, end = var_36_end_0, end_mask = var_36_end_mask_0, x = combined0_1_cast_fp16)[name = tensor<string, []>("op_36_cast_fp16")];
34
+ } -> (token_logits, duration_logits);
35
+ }
joint.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e0e63d840032f7f07ddb1d64446051166281e5491bf22da8a945c41f6eedb3e
3
+ size 12642764
vocab.json ADDED
The diff for this file is too large to render. See raw diff