alexwengg commited on
Commit
accdafd
·
verified ·
1 Parent(s): 7966d1c

Upload 9 files

Browse files
CtcHead.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bb2d397129247a478fce10652d3011e155cf0247e93086bcdc384a754f005d5
3
+ size 243
CtcHead.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14df3ea46b298ac3efd8141b7dcab77acdaf2d0c700827bc1435093744ae206d
3
+ size 488
CtcHead.mlmodelc/metadata.json ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "shortDescription" : "CTC decoder head for parakeet-tdt-ctc-110m (encoder_dim=512, vocab=1024+1 blank)",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float32",
10
+ "formattedType" : "MultiArray (Float32 1 × 188 × 1025)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 188, 1025]",
13
+ "name" : "ctc_logits",
14
+ "type" : "MultiArray"
15
+ }
16
+ ],
17
+ "storagePrecision" : "Float16",
18
+ "modelParameters" : [
19
+
20
+ ],
21
+ "author" : "Fluid Inference",
22
+ "specificationVersion" : 8,
23
+ "mlProgramOperationTypeHistogram" : {
24
+ "Ios17.cast" : 2,
25
+ "Ios17.conv" : 1,
26
+ "Ios17.transpose" : 1,
27
+ "Ios16.softmax" : 1,
28
+ "Ios17.log" : 1
29
+ },
30
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
31
+ "isUpdatable" : "0",
32
+ "stateSchema" : [
33
+
34
+ ],
35
+ "availability" : {
36
+ "macOS" : "14.0",
37
+ "tvOS" : "17.0",
38
+ "visionOS" : "1.0",
39
+ "watchOS" : "10.0",
40
+ "iOS" : "17.0",
41
+ "macCatalyst" : "17.0"
42
+ },
43
+ "modelType" : {
44
+ "name" : "MLModelType_mlProgram"
45
+ },
46
+ "inputSchema" : [
47
+ {
48
+ "hasShapeFlexibility" : "0",
49
+ "isOptional" : "0",
50
+ "dataType" : "Float32",
51
+ "formattedType" : "MultiArray (Float32 1 × 512 × 188)",
52
+ "shortDescription" : "",
53
+ "shape" : "[1, 512, 188]",
54
+ "name" : "encoder_output",
55
+ "type" : "MultiArray"
56
+ }
57
+ ],
58
+ "userDefinedMetadata" : {
59
+ "com.github.apple.coremltools.conversion_date" : "2026-03-28",
60
+ "com.github.apple.coremltools.source" : "torch==2.7.0",
61
+ "com.github.apple.coremltools.version" : "9.0b1",
62
+ "com.github.apple.coremltools.source_dialect" : "TorchScript"
63
+ },
64
+ "generatedClassName" : "CtcHead",
65
+ "method" : "predict"
66
+ }
67
+ ]
CtcHead.mlmodelc/model.mil ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ program(1.0)
2
+ [buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}, {"coremltools-component-torch", "2.7.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0b1"}})]
3
+ {
4
+ func main<ios17>(tensor<fp32, [1, 512, 188]> encoder_output) {
5
+ tensor<int32, []> var_4 = const()[name = tensor<string, []>("op_4"), val = tensor<int32, []>(-1)];
6
+ tensor<string, []> var_18_pad_type_0 = const()[name = tensor<string, []>("op_18_pad_type_0"), val = tensor<string, []>("valid")];
7
+ tensor<int32, [1]> var_18_strides_0 = const()[name = tensor<string, []>("op_18_strides_0"), val = tensor<int32, [1]>([1])];
8
+ tensor<int32, [2]> var_18_pad_0 = const()[name = tensor<string, []>("op_18_pad_0"), val = tensor<int32, [2]>([0, 0])];
9
+ tensor<int32, [1]> var_18_dilations_0 = const()[name = tensor<string, []>("op_18_dilations_0"), val = tensor<int32, [1]>([1])];
10
+ tensor<int32, []> var_18_groups_0 = const()[name = tensor<string, []>("op_18_groups_0"), val = tensor<int32, []>(1)];
11
+ tensor<string, []> encoder_output_to_fp16_dtype_0 = const()[name = tensor<string, []>("encoder_output_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
12
+ tensor<fp16, [1025, 512, 1]> module_decoder_layers_0_weight_to_fp16 = const()[name = tensor<string, []>("module_decoder_layers_0_weight_to_fp16"), val = tensor<fp16, [1025, 512, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
13
+ tensor<fp16, [1025]> module_decoder_layers_0_bias_to_fp16 = const()[name = tensor<string, []>("module_decoder_layers_0_bias_to_fp16"), val = tensor<fp16, [1025]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1049728)))];
14
+ tensor<fp16, [1, 512, 188]> encoder_output_to_fp16 = cast(dtype = encoder_output_to_fp16_dtype_0, x = encoder_output)[name = tensor<string, []>("cast_1")];
15
+ tensor<fp16, [1, 1025, 188]> var_18_cast_fp16 = conv(bias = module_decoder_layers_0_bias_to_fp16, dilations = var_18_dilations_0, groups = var_18_groups_0, pad = var_18_pad_0, pad_type = var_18_pad_type_0, strides = var_18_strides_0, weight = module_decoder_layers_0_weight_to_fp16, x = encoder_output_to_fp16)[name = tensor<string, []>("op_18_cast_fp16")];
16
+ tensor<int32, [3]> input_perm_0 = const()[name = tensor<string, []>("input_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
17
+ tensor<fp16, [1, 188, 1025]> input_cast_fp16 = transpose(perm = input_perm_0, x = var_18_cast_fp16)[name = tensor<string, []>("transpose_0")];
18
+ tensor<fp16, [1, 188, 1025]> out_objects_softmax_cast_fp16 = softmax(axis = var_4, x = input_cast_fp16)[name = tensor<string, []>("out_objects_softmax_cast_fp16")];
19
+ tensor<fp32, []> out_objects_epsilon_0 = const()[name = tensor<string, []>("out_objects_epsilon_0"), val = tensor<fp32, []>(0x1p-149)];
20
+ tensor<fp16, [1, 188, 1025]> out_objects_cast_fp16 = log(epsilon = out_objects_epsilon_0, x = out_objects_softmax_cast_fp16)[name = tensor<string, []>("out_objects_cast_fp16")];
21
+ tensor<string, []> out_objects_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("out_objects_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
22
+ tensor<fp32, [1, 188, 1025]> ctc_logits = cast(dtype = out_objects_cast_fp16_to_fp32_dtype_0, x = out_objects_cast_fp16)[name = tensor<string, []>("cast_0")];
23
+ } -> (ctc_logits);
24
+ }
CtcHead.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb9bead064427ffcb7529c0e3f378e421b4dde8e6d81447b6d1ca3352ca850e1
3
+ size 1051842
CtcHead.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de3082e4ab3e934567431b370713514d72e6355e284cae0cda3c8e80cad6fe11
3
+ size 3477
CtcHead.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb9bead064427ffcb7529c0e3f378e421b4dde8e6d81447b6d1ca3352ca850e1
3
+ size 1051842
CtcHead.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "3FA85FCF-F3EE-4BA4-B9C9-562CE6B08C20": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Specification",
7
+ "name": "model.mlmodel",
8
+ "path": "com.apple.CoreML/model.mlmodel"
9
+ },
10
+ "AC32BF24-5F07-4CB2-AED5-C6E41D323170": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Weights",
13
+ "name": "weights",
14
+ "path": "com.apple.CoreML/weights"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "3FA85FCF-F3EE-4BA4-B9C9-562CE6B08C20"
18
+ }
ctc_head_metadata.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "parakeet-tdt-ctc-110m-ctc-head",
3
+ "source": "nvidia/parakeet-tdt_ctc-110m",
4
+ "encoder_dim": 512,
5
+ "time_steps": 188,
6
+ "vocab_size": 1024,
7
+ "ctc_classes": 1025,
8
+ "blank_id": 1024,
9
+ "max_audio_seconds": 15.0,
10
+ "input": {
11
+ "encoder_output": [
12
+ 1,
13
+ 512,
14
+ 188
15
+ ]
16
+ },
17
+ "output": {
18
+ "ctc_logits": [
19
+ 1,
20
+ 188,
21
+ 1025
22
+ ]
23
+ }
24
+ }