aufklarer commited on 26 days ago

Commit

745e9da

verified ·

1 Parent(s): d0595cb

Upload folder using huggingface_hub

Browse files

Files changed (17) hide show

config.json +23 -0
decoder.mlmodelc/analytics/coremldata.bin +3 -0
decoder.mlmodelc/coremldata.bin +3 -0
decoder.mlmodelc/metadata.json +106 -0
decoder.mlmodelc/model.mil +53 -0
decoder.mlmodelc/weights/weight.bin +3 -0
encoder.mlmodelc/analytics/coremldata.bin +3 -0
encoder.mlmodelc/coremldata.bin +3 -0
encoder.mlmodelc/metadata.json +113 -0
encoder.mlmodelc/model.mil +0 -0
encoder.mlmodelc/weights/weight.bin +3 -0
joint.mlmodelc/analytics/coremldata.bin +3 -0
joint.mlmodelc/coremldata.bin +3 -0
joint.mlmodelc/metadata.json +87 -0
joint.mlmodelc/model.mil +35 -0
joint.mlmodelc/weights/weight.bin +3 -0
vocab.json +0 -0

config.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "numMelBins": 128,
+  "sampleRate": 16000,
+  "nFFT": 512,
+  "hopLength": 160,
+  "winLength": 400,
+  "preEmphasis": 0.97,
+  "encoderHidden": 1024,
+  "encoderLayers": 24,
+  "subsamplingFactor": 8,
+  "decoderHidden": 640,
+  "decoderLayers": 2,
+  "vocabSize": 8192,
+  "blankTokenId": 8192,
+  "numDurationBins": 5,
+  "durationBins": [
+    0,
+    1,
+    2,
+    3,
+    4
+  ]
+}

decoder.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:26704839077423b097e5158bfd70ccb1fb08e9c9479830b94c38905923baab7d
+size 243

decoder.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ffe814e082d90c2f2da76d508550f55b9a29e49a25ef8a1ad77f18808b76f1f1
+size 402

decoder.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,106 @@

+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 640)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 640]",
+        "name" : "decoder_output",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 2 × 1 × 640)",
+        "shortDescription" : "",
+        "shape" : "[2, 1, 640]",
+        "name" : "h_out",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 2 × 1 × 640)",
+        "shortDescription" : "",
+        "shape" : "[2, 1, 640]",
+        "name" : "c_out",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+    ],
+    "specificationVersion" : 8,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios17.squeeze" : 4,
+      "Ios17.gather" : 1,
+      "Ios17.cast" : 1,
+      "Ios17.lstm" : 2,
+      "Split" : 2,
+      "Ios17.transpose" : 2,
+      "Stack" : 2
+    },
+    "computePrecision" : "Mixed (Float16, Int16, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+    ],
+    "availability" : {
+      "macOS" : "14.0",
+      "tvOS" : "17.0",
+      "visionOS" : "1.0",
+      "watchOS" : "10.0",
+      "iOS" : "17.0",
+      "macCatalyst" : "17.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.version" : "8.1",
+      "com.github.apple.coremltools.source" : "torch==2.10.0"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1, 1]",
+        "name" : "token",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 2 × 1 × 640)",
+        "shortDescription" : "",
+        "shape" : "[2, 1, 640]",
+        "name" : "h",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 2 × 1 × 640)",
+        "shortDescription" : "",
+        "shape" : "[2, 1, 640]",
+        "name" : "c",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "decoder",
+    "method" : "predict"
+  }
+]

decoder.mlmodelc/model.mil ADDED Viewed

	@@ -0,0 +1,53 @@

+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.10.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.1"}})]
+{
+    func main<ios17>(tensor<fp16, [2, 1, 640]> c, tensor<fp16, [2, 1, 640]> h, tensor<int32, [1, 1]> token) {
+            tensor<int32, []> y_1_axis_0 = const()[name = tensor<string, []>("y_1_axis_0"), val = tensor<int32, []>(0)];
+            tensor<int32, []> y_1_batch_dims_0 = const()[name = tensor<string, []>("y_1_batch_dims_0"), val = tensor<int32, []>(0)];
+            tensor<bool, []> y_1_validate_indices_0 = const()[name = tensor<string, []>("y_1_validate_indices_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [8193, 640]> decoder_prediction_embed_weight_to_fp16 = const()[name = tensor<string, []>("decoder_prediction_embed_weight_to_fp16"), val = tensor<fp16, [8193, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<string, []> token_to_int16_dtype_0 = const()[name = tensor<string, []>("token_to_int16_dtype_0"), val = tensor<string, []>("int16")];
+            tensor<int16, [1, 1]> token_to_int16 = cast(dtype = token_to_int16_dtype_0, x = token)[name = tensor<string, []>("cast_6")];
+            tensor<fp16, [1, 1, 640]> y_1_cast_fp16_cast_uint16 = gather(axis = y_1_axis_0, batch_dims = y_1_batch_dims_0, indices = token_to_int16, validate_indices = y_1_validate_indices_0, x = decoder_prediction_embed_weight_to_fp16)[name = tensor<string, []>("y_1_cast_fp16_cast_uint16")];
+            tensor<int32, [3]> input_1_perm_0 = const()[name = tensor<string, []>("input_1_perm_0"), val = tensor<int32, [3]>([1, 0, 2])];
+            tensor<int32, []> split_0_num_splits_0 = const()[name = tensor<string, []>("split_0_num_splits_0"), val = tensor<int32, []>(2)];
+            tensor<int32, []> split_0_axis_0 = const()[name = tensor<string, []>("split_0_axis_0"), val = tensor<int32, []>(0)];
+            tensor<fp16, [1, 1, 640]> split_0_cast_fp16_0, tensor<fp16, [1, 1, 640]> split_0_cast_fp16_1 = split(axis = split_0_axis_0, num_splits = split_0_num_splits_0, x = h)[name = tensor<string, []>("split_0_cast_fp16")];
+            tensor<int32, []> split_1_num_splits_0 = const()[name = tensor<string, []>("split_1_num_splits_0"), val = tensor<int32, []>(2)];
+            tensor<int32, []> split_1_axis_0 = const()[name = tensor<string, []>("split_1_axis_0"), val = tensor<int32, []>(0)];
+            tensor<fp16, [1, 1, 640]> split_1_cast_fp16_0, tensor<fp16, [1, 1, 640]> split_1_cast_fp16_1 = split(axis = split_1_axis_0, num_splits = split_1_num_splits_0, x = c)[name = tensor<string, []>("split_1_cast_fp16")];
+            tensor<int32, [1]> input0_1_lstm_layer_0_lstm_h0_squeeze_axes_0 = const()[name = tensor<string, []>("input0_1_lstm_layer_0_lstm_h0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1, 640]> input0_1_lstm_layer_0_lstm_h0_squeeze_cast_fp16 = squeeze(axes = input0_1_lstm_layer_0_lstm_h0_squeeze_axes_0, x = split_0_cast_fp16_0)[name = tensor<string, []>("input0_1_lstm_layer_0_lstm_h0_squeeze_cast_fp16")];
+            tensor<int32, [1]> input0_1_lstm_layer_0_lstm_c0_squeeze_axes_0 = const()[name = tensor<string, []>("input0_1_lstm_layer_0_lstm_c0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1, 640]> input0_1_lstm_layer_0_lstm_c0_squeeze_cast_fp16 = squeeze(axes = input0_1_lstm_layer_0_lstm_c0_squeeze_axes_0, x = split_1_cast_fp16_0)[name = tensor<string, []>("input0_1_lstm_layer_0_lstm_c0_squeeze_cast_fp16")];
+            tensor<string, []> input0_1_lstm_layer_0_direction_0 = const()[name = tensor<string, []>("input0_1_lstm_layer_0_direction_0"), val = tensor<string, []>("forward")];
+            tensor<bool, []> input0_1_lstm_layer_0_output_sequence_0 = const()[name = tensor<string, []>("input0_1_lstm_layer_0_output_sequence_0"), val = tensor<bool, []>(true)];
+            tensor<string, []> input0_1_lstm_layer_0_recurrent_activation_0 = const()[name = tensor<string, []>("input0_1_lstm_layer_0_recurrent_activation_0"), val = tensor<string, []>("sigmoid")];
+            tensor<string, []> input0_1_lstm_layer_0_cell_activation_0 = const()[name = tensor<string, []>("input0_1_lstm_layer_0_cell_activation_0"), val = tensor<string, []>("tanh")];
+            tensor<string, []> input0_1_lstm_layer_0_activation_0 = const()[name = tensor<string, []>("input0_1_lstm_layer_0_activation_0"), val = tensor<string, []>("tanh")];
+            tensor<fp16, [2560, 640]> concat_1_to_fp16 = const()[name = tensor<string, []>("concat_1_to_fp16"), val = tensor<fp16, [2560, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10487168)))];
+            tensor<fp16, [2560, 640]> concat_2_to_fp16 = const()[name = tensor<string, []>("concat_2_to_fp16"), val = tensor<fp16, [2560, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13764032)))];
+            tensor<fp16, [2560]> concat_0_to_fp16 = const()[name = tensor<string, []>("concat_0_to_fp16"), val = tensor<fp16, [2560]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17040896)))];
+            tensor<fp16, [1, 1, 640]> input_1_cast_fp16 = transpose(perm = input_1_perm_0, x = y_1_cast_fp16_cast_uint16)[name = tensor<string, []>("transpose_1")];
+            tensor<fp16, [1, 1, 640]> input0_1_lstm_layer_0_cast_fp16_0, tensor<fp16, [1, 640]> input0_1_lstm_layer_0_cast_fp16_1, tensor<fp16, [1, 640]> input0_1_lstm_layer_0_cast_fp16_2 = lstm(activation = input0_1_lstm_layer_0_activation_0, bias = concat_0_to_fp16, cell_activation = input0_1_lstm_layer_0_cell_activation_0, direction = input0_1_lstm_layer_0_direction_0, initial_c = input0_1_lstm_layer_0_lstm_c0_squeeze_cast_fp16, initial_h = input0_1_lstm_layer_0_lstm_h0_squeeze_cast_fp16, output_sequence = input0_1_lstm_layer_0_output_sequence_0, recurrent_activation = input0_1_lstm_layer_0_recurrent_activation_0, weight_hh = concat_2_to_fp16, weight_ih = concat_1_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("input0_1_lstm_layer_0_cast_fp16")];
+            tensor<int32, [1]> input0_1_lstm_h0_squeeze_axes_0 = const()[name = tensor<string, []>("input0_1_lstm_h0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1, 640]> input0_1_lstm_h0_squeeze_cast_fp16 = squeeze(axes = input0_1_lstm_h0_squeeze_axes_0, x = split_0_cast_fp16_1)[name = tensor<string, []>("input0_1_lstm_h0_squeeze_cast_fp16")];
+            tensor<int32, [1]> input0_1_lstm_c0_squeeze_axes_0 = const()[name = tensor<string, []>("input0_1_lstm_c0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1, 640]> input0_1_lstm_c0_squeeze_cast_fp16 = squeeze(axes = input0_1_lstm_c0_squeeze_axes_0, x = split_1_cast_fp16_1)[name = tensor<string, []>("input0_1_lstm_c0_squeeze_cast_fp16")];
+            tensor<string, []> input0_1_direction_0 = const()[name = tensor<string, []>("input0_1_direction_0"), val = tensor<string, []>("forward")];
+            tensor<bool, []> input0_1_output_sequence_0 = const()[name = tensor<string, []>("input0_1_output_sequence_0"), val = tensor<bool, []>(true)];
+            tensor<string, []> input0_1_recurrent_activation_0 = const()[name = tensor<string, []>("input0_1_recurrent_activation_0"), val = tensor<string, []>("sigmoid")];
+            tensor<string, []> input0_1_cell_activation_0 = const()[name = tensor<string, []>("input0_1_cell_activation_0"), val = tensor<string, []>("tanh")];
+            tensor<string, []> input0_1_activation_0 = const()[name = tensor<string, []>("input0_1_activation_0"), val = tensor<string, []>("tanh")];
+            tensor<fp16, [2560, 640]> concat_4_to_fp16 = const()[name = tensor<string, []>("concat_4_to_fp16"), val = tensor<fp16, [2560, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17046080)))];
+            tensor<fp16, [2560, 640]> concat_5_to_fp16 = const()[name = tensor<string, []>("concat_5_to_fp16"), val = tensor<fp16, [2560, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20322944)))];
+            tensor<fp16, [2560]> concat_3_to_fp16 = const()[name = tensor<string, []>("concat_3_to_fp16"), val = tensor<fp16, [2560]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23599808)))];
+            tensor<fp16, [1, 1, 640]> input0_1_cast_fp16_0, tensor<fp16, [1, 640]> input0_1_cast_fp16_1, tensor<fp16, [1, 640]> input0_1_cast_fp16_2 = lstm(activation = input0_1_activation_0, bias = concat_3_to_fp16, cell_activation = input0_1_cell_activation_0, direction = input0_1_direction_0, initial_c = input0_1_lstm_c0_squeeze_cast_fp16, initial_h = input0_1_lstm_h0_squeeze_cast_fp16, output_sequence = input0_1_output_sequence_0, recurrent_activation = input0_1_recurrent_activation_0, weight_hh = concat_5_to_fp16, weight_ih = concat_4_to_fp16, x = input0_1_lstm_layer_0_cast_fp16_0)[name = tensor<string, []>("input0_1_cast_fp16")];
+            tensor<int32, []> var_33_axis_0 = const()[name = tensor<string, []>("op_33_axis_0"), val = tensor<int32, []>(0)];
+            tensor<fp16, [2, 1, 640]> h_out = stack(axis = var_33_axis_0, values = (input0_1_lstm_layer_0_cast_fp16_1, input0_1_cast_fp16_1))[name = tensor<string, []>("op_33_cast_fp16")];
+            tensor<int32, []> var_34_axis_0 = const()[name = tensor<string, []>("op_34_axis_0"), val = tensor<int32, []>(0)];
+            tensor<fp16, [2, 1, 640]> c_out = stack(axis = var_34_axis_0, values = (input0_1_lstm_layer_0_cast_fp16_2, input0_1_cast_fp16_2))[name = tensor<string, []>("op_34_cast_fp16")];
+            tensor<int32, [3]> var_44_perm_0 = const()[name = tensor<string, []>("op_44_perm_0"), val = tensor<int32, [3]>([1, 0, 2])];
+            tensor<fp16, [1, 1, 640]> decoder_output = transpose(perm = var_44_perm_0, x = input0_1_cast_fp16_0)[name = tensor<string, []>("transpose_0")];
+        } -> (decoder_output, h_out, c_out);
+}

decoder.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:48adf0f0d47c406c8253d4f7fef967436a39da14f5a65e66d5a4b407be355d41
+size 23604992

encoder.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d93192620b57bb9f779dc153a2dad5e041f188163c13c54623598ab5136c3cff
+size 243

encoder.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:280f7c3bb4c97a759eea3d1db143c178702a542bb43b9481ff4774feb1a4af0f
+size 420

encoder.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,113 @@

+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Mixed (Float16, Palettized (8 bits))",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16)",
+        "shortDescription" : "",
+        "shape" : "[]",
+        "name" : "encoded",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "encoded_length",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+    ],
+    "specificationVersion" : 8,
+    "mlProgramOperationTypeHistogram" : {
+      "Range1d" : 5,
+      "Fill" : 1,
+      "Ios17.reshape" : 169,
+      "Ios17.logicalAnd" : 2,
+      "Ios16.softmax" : 24,
+      "Ios17.matmul" : 72,
+      "Ios17.transpose" : 195,
+      "Split" : 24,
+      "Ios17.expandDims" : 22,
+      "Select" : 72,
+      "Ios17.add" : 178,
+      "Tile" : 18,
+      "Ios17.sliceByIndex" : 51,
+      "Ios16.sigmoid" : 24,
+      "Ios17.squeeze" : 2,
+      "Shape" : 119,
+      "Ios17.gather" : 164,
+      "Ios17.logicalNot" : 2,
+      "Ios17.layerNorm" : 120,
+      "Pad" : 48,
+      "Ios17.less" : 5,
+      "Ios17.sub" : 5,
+      "Ios16.constexprLutToDense" : 295,
+      "Ios17.conv" : 77,
+      "Ios17.realDiv" : 17,
+      "Ios17.linear" : 217,
+      "Ios17.concat" : 144,
+      "Ios17.floorDiv" : 3,
+      "Ios16.relu" : 3,
+      "Ios17.cast" : 285,
+      "Ios16.silu" : 72,
+      "Ios17.mul" : 105
+    },
+    "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+    ],
+    "availability" : {
+      "macOS" : "14.0",
+      "tvOS" : "17.0",
+      "visionOS" : "1.0",
+      "watchOS" : "10.0",
+      "iOS" : "17.0",
+      "macCatalyst" : "17.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.10.0",
+      "com.github.apple.coremltools.version" : "8.1"
+    },
+    "inputSchema" : [
+      {
+        "shortDescription" : "",
+        "dataType" : "Float32",
+        "hasShapeFlexibility" : "1",
+        "isOptional" : "0",
+        "shapeFlexibility" : "1 × 128 × 100 | 1 × 128 × 200 | 1 × 128 × 300 | 1 × 128 × 400 | 1 × 128 × 500 | 1 × 128 × 750 | 1 × 128 × 1000",
+        "formattedType" : "MultiArray (Float32 1 × 128 × 100)",
+        "type" : "MultiArray",
+        "shape" : "[1, 128, 100]",
+        "name" : "mel",
+        "enumeratedShapes" : "[[1, 128, 100], [1, 128, 200], [1, 128, 300], [1, 128, 400], [1, 128, 500], [1, 128, 750], [1, 128, 1000]]"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "length",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "encoder",
+    "method" : "predict"
+  }
+]

encoder.mlmodelc/model.mil ADDED Viewed

The diff for this file is too large to render. See raw diff

encoder.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4ea8ea0f0199b1f808f8cc4c5b53f45f51595c2dfdac4a959a644988a6a311c4
+size 619635392

joint.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c28d7854e47ae60503fe9691b3a7c30589bb27025cbdd3bcf05090ca058ab3e2
+size 243

joint.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:937aa32b7a96fa9df9d09d619ca06662f75985cdcd29a7f3da40b46b0f592996
+size 391

joint.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,87 @@

+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 8193)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 8193]",
+        "name" : "token_logits",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 5)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 5]",
+        "name" : "duration_logits",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+    ],
+    "specificationVersion" : 8,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios17.squeeze" : 1,
+      "Ios17.log" : 1,
+      "Ios17.linear" : 3,
+      "Ios17.add" : 1,
+      "Ios16.relu" : 1,
+      "Ios16.softmax" : 1,
+      "Ios17.sliceByIndex" : 2,
+      "Ios17.expandDims" : 2
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+    ],
+    "availability" : {
+      "macOS" : "14.0",
+      "tvOS" : "17.0",
+      "visionOS" : "1.0",
+      "watchOS" : "10.0",
+      "iOS" : "17.0",
+      "macCatalyst" : "17.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.version" : "8.1",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.10.0"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 1024)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 1024]",
+        "name" : "encoder_output",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 640)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 640]",
+        "name" : "decoder_output",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "joint",
+    "method" : "predict"
+  }
+]

joint.mlmodelc/model.mil ADDED Viewed

	@@ -0,0 +1,35 @@

+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.10.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.1"}})]
+{
+    func main<ios17>(tensor<fp16, [1, 1, 640]> decoder_output, tensor<fp16, [1, 1, 1024]> encoder_output) {
+            tensor<int32, []> var_6 = const()[name = tensor<string, []>("op_6"), val = tensor<int32, []>(-1)];
+            tensor<fp16, [640, 1024]> joint_enc_weight_to_fp16 = const()[name = tensor<string, []>("joint_enc_weight_to_fp16"), val = tensor<fp16, [640, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [640]> joint_enc_bias_to_fp16 = const()[name = tensor<string, []>("joint_enc_bias_to_fp16"), val = tensor<fp16, [640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1310848)))];
+            tensor<fp16, [1, 1, 640]> linear_0_cast_fp16 = linear(bias = joint_enc_bias_to_fp16, weight = joint_enc_weight_to_fp16, x = encoder_output)[name = tensor<string, []>("linear_0_cast_fp16")];
+            tensor<fp16, [640, 640]> joint_pred_weight_to_fp16 = const()[name = tensor<string, []>("joint_pred_weight_to_fp16"), val = tensor<fp16, [640, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1312192)))];
+            tensor<fp16, [640]> joint_pred_bias_to_fp16 = const()[name = tensor<string, []>("joint_pred_bias_to_fp16"), val = tensor<fp16, [640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2131456)))];
+            tensor<fp16, [1, 1, 640]> linear_1_cast_fp16 = linear(bias = joint_pred_bias_to_fp16, weight = joint_pred_weight_to_fp16, x = decoder_output)[name = tensor<string, []>("linear_1_cast_fp16")];
+            tensor<int32, [1]> f_3_axes_0 = const()[name = tensor<string, []>("f_3_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1, 1, 640]> f_3_cast_fp16 = expand_dims(axes = f_3_axes_0, x = linear_0_cast_fp16)[name = tensor<string, []>("f_3_cast_fp16")];
+            tensor<int32, [1]> g_3_axes_0 = const()[name = tensor<string, []>("g_3_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 640]> g_3_cast_fp16 = expand_dims(axes = g_3_axes_0, x = linear_1_cast_fp16)[name = tensor<string, []>("g_3_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 640]> input_3_cast_fp16 = add(x = f_3_cast_fp16, y = g_3_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 640]> var_28_cast_fp16 = relu(x = input_3_cast_fp16)[name = tensor<string, []>("op_28_cast_fp16")];
+            tensor<fp16, [8198, 640]> joint_joint_net_2_weight_to_fp16 = const()[name = tensor<string, []>("joint_joint_net_2_weight_to_fp16"), val = tensor<fp16, [8198, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2132800)))];
+            tensor<fp16, [8198]> joint_joint_net_2_bias_to_fp16 = const()[name = tensor<string, []>("joint_joint_net_2_bias_to_fp16"), val = tensor<fp16, [8198]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12626304)))];
+            tensor<fp16, [1, 1, 1, 8198]> linear_2_cast_fp16 = linear(bias = joint_joint_net_2_bias_to_fp16, weight = joint_joint_net_2_weight_to_fp16, x = var_28_cast_fp16)[name = tensor<string, []>("linear_2_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 8198]> combined_1_softmax_cast_fp16 = softmax(axis = var_6, x = linear_2_cast_fp16)[name = tensor<string, []>("combined_1_softmax_cast_fp16")];
+            tensor<fp32, []> combined_1_epsilon_0 = const()[name = tensor<string, []>("combined_1_epsilon_0"), val = tensor<fp32, []>(0x1p-149)];
+            tensor<fp16, [1, 1, 1, 8198]> combined_1_cast_fp16 = log(epsilon = combined_1_epsilon_0, x = combined_1_softmax_cast_fp16)[name = tensor<string, []>("combined_1_cast_fp16")];
+            tensor<int32, [1]> combined0_1_axes_0 = const()[name = tensor<string, []>("combined0_1_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1, 8198]> combined0_1_cast_fp16 = squeeze(axes = combined0_1_axes_0, x = combined_1_cast_fp16)[name = tensor<string, []>("combined0_1_cast_fp16")];
+            tensor<int32, [3]> var_35_begin_0 = const()[name = tensor<string, []>("op_35_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> var_35_end_0 = const()[name = tensor<string, []>("op_35_end_0"), val = tensor<int32, [3]>([1, 1, 8193])];
+            tensor<bool, [3]> var_35_end_mask_0 = const()[name = tensor<string, []>("op_35_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
+            tensor<fp16, [1, 1, 8193]> token_logits = slice_by_index(begin = var_35_begin_0, end = var_35_end_0, end_mask = var_35_end_mask_0, x = combined0_1_cast_fp16)[name = tensor<string, []>("op_35_cast_fp16")];
+            tensor<int32, [3]> var_36_begin_0 = const()[name = tensor<string, []>("op_36_begin_0"), val = tensor<int32, [3]>([0, 0, 8193])];
+            tensor<int32, [3]> var_36_end_0 = const()[name = tensor<string, []>("op_36_end_0"), val = tensor<int32, [3]>([1, 1, 8198])];
+            tensor<bool, [3]> var_36_end_mask_0 = const()[name = tensor<string, []>("op_36_end_mask_0"), val = tensor<bool, [3]>([true, true, true])];
+            tensor<fp16, [1, 1, 5]> duration_logits = slice_by_index(begin = var_36_begin_0, end = var_36_end_0, end_mask = var_36_end_mask_0, x = combined0_1_cast_fp16)[name = tensor<string, []>("op_36_cast_fp16")];
+        } -> (token_logits, duration_logits);
+}

joint.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4e0e63d840032f7f07ddb1d64446051166281e5491bf22da8a945c41f6eedb3e
+size 12642764

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff