alexwengg commited on
Commit
b016d0d
·
verified ·
1 Parent(s): e14200d

Upload 205 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. manifest.json +1355 -360
  2. multilingual/1120ms/decoder.mlmodelc/analytics/coremldata.bin +3 -0
  3. multilingual/1120ms/decoder.mlmodelc/coremldata.bin +3 -0
  4. multilingual/1120ms/decoder.mlmodelc/model.mil +64 -0
  5. multilingual/1120ms/decoder.mlmodelc/weights/weight.bin +3 -0
  6. multilingual/1120ms/decoder.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
  7. multilingual/1120ms/decoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
  8. multilingual/1120ms/decoder.mlpackage/Manifest.json +18 -0
  9. multilingual/1120ms/decoder_joint.mlmodelc/analytics/coremldata.bin +3 -0
  10. multilingual/1120ms/decoder_joint.mlmodelc/coremldata.bin +3 -0
  11. multilingual/1120ms/decoder_joint.mlmodelc/model.mil +83 -0
  12. multilingual/1120ms/decoder_joint.mlmodelc/weights/weight.bin +3 -0
  13. multilingual/1120ms/decoder_joint.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
  14. multilingual/1120ms/decoder_joint.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
  15. multilingual/1120ms/decoder_joint.mlpackage/Manifest.json +18 -0
  16. multilingual/1120ms/decoder_joint_noencproj.mlmodelc/analytics/coremldata.bin +3 -0
  17. multilingual/1120ms/decoder_joint_noencproj.mlmodelc/coremldata.bin +3 -0
  18. multilingual/1120ms/decoder_joint_noencproj.mlmodelc/model.mil +91 -0
  19. multilingual/1120ms/decoder_joint_noencproj.mlmodelc/weights/weight.bin +3 -0
  20. multilingual/1120ms/decoder_joint_noencproj.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
  21. multilingual/1120ms/decoder_joint_noencproj.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
  22. multilingual/1120ms/decoder_joint_noencproj.mlpackage/Manifest.json +18 -0
  23. multilingual/1120ms/encoder.mlmodelc/analytics/coremldata.bin +3 -0
  24. multilingual/1120ms/encoder.mlmodelc/coremldata.bin +3 -0
  25. multilingual/1120ms/encoder.mlmodelc/model.mil +0 -0
  26. multilingual/1120ms/encoder.mlmodelc/weights/weight.bin +3 -0
  27. multilingual/1120ms/encoder.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
  28. multilingual/1120ms/encoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
  29. multilingual/1120ms/encoder.mlpackage/Manifest.json +18 -0
  30. multilingual/1120ms/joint.mlmodelc/analytics/coremldata.bin +3 -0
  31. multilingual/1120ms/joint.mlmodelc/coremldata.bin +3 -0
  32. multilingual/1120ms/joint.mlmodelc/model.mil +31 -0
  33. multilingual/1120ms/joint.mlmodelc/weights/weight.bin +3 -0
  34. multilingual/1120ms/joint.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
  35. multilingual/1120ms/joint.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
  36. multilingual/1120ms/joint.mlpackage/Manifest.json +18 -0
  37. multilingual/1120ms/joint_noencproj_batched.mlmodelc/analytics/coremldata.bin +3 -0
  38. multilingual/1120ms/joint_noencproj_batched.mlmodelc/coremldata.bin +3 -0
  39. multilingual/1120ms/joint_noencproj_batched.mlmodelc/model.mil +26 -0
  40. multilingual/1120ms/joint_noencproj_batched.mlmodelc/weights/weight.bin +3 -0
  41. multilingual/1120ms/joint_noencproj_batched.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
  42. multilingual/1120ms/joint_noencproj_batched.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
  43. multilingual/1120ms/joint_noencproj_batched.mlpackage/Manifest.json +18 -0
  44. multilingual/1120ms/metadata.json +196 -0
  45. multilingual/1120ms/preprocessor.mlmodelc/analytics/coremldata.bin +3 -0
  46. multilingual/1120ms/preprocessor.mlmodelc/coremldata.bin +3 -0
  47. multilingual/1120ms/preprocessor.mlmodelc/model.mil +122 -0
  48. multilingual/1120ms/preprocessor.mlmodelc/weights/weight.bin +3 -0
  49. multilingual/1120ms/preprocessor.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
  50. multilingual/1120ms/preprocessor.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
manifest.json CHANGED
@@ -1,376 +1,1371 @@
1
  {
2
- "name": "nemotron-asr-streaming-multilingual-0.6b-coreml",
3
- "version": "1.0.0",
4
  "base_model": "nvidia/nemotron-asr-streaming-multilingual-0.6b",
5
- "base_model_class": "nemo.collections.asr.models.rnnt_bpe_models_prompt.EncDecRNNTBPEModelWithPrompt",
6
- "framework": "coreml",
7
- "deployment_target": {
8
- "ios": "17.0",
9
- "macos": "14.0"
10
- },
11
- "quantization": {
12
- "encoder": "int8",
13
- "preprocessor": "fp16",
14
- "decoder": "fp16",
15
- "joint": "fp16"
16
- },
17
- "model": {
18
- "sample_rate": 16000,
19
- "mel_features": 128,
20
- "vocab_size": 13087,
21
- "blank_idx": 13087,
22
- "encoder_dim": 1024,
23
- "decoder_hidden": 640,
24
- "decoder_layers": 2,
25
- "num_prompts": 128,
26
- "default_prompt_id": 101
27
- },
28
- "loader": {
29
- "library": "fluidaudio",
30
- "class": "StreamingNemotronMultilingualAsrManager",
31
- "preferred_format": "mlmodelc",
32
- "fallback_format": "mlpackage"
33
- },
34
- "variants": [
35
- {
36
- "name": "80ms",
37
- "chunk_ms": 80,
38
- "chunk_mel_frames": 8,
39
- "pre_encode_cache": 9,
40
- "total_mel_frames": 17,
41
- "att_context_size": [
42
- 56,
43
- 0
 
 
 
 
 
 
44
  ],
45
- "cache_channel_shape": [
46
- 1,
47
- 24,
48
- 56,
49
- 1024
50
- ],
51
- "cache_time_shape": [
52
- 1,
53
- 24,
54
- 1024,
55
- 8
56
- ],
57
- "components": {
58
- "preprocessor": {
59
- "mlpackage": {
60
- "path": "80ms/preprocessor.mlpackage",
61
- "size_bytes": 608879
62
- },
63
- "mlmodelc": {
64
- "path": "80ms/preprocessor.mlmodelc",
65
- "size_bytes": 617594
66
- }
67
- },
68
- "encoder": {
69
- "mlpackage": {
70
- "path": "80ms/encoder.mlpackage",
71
- "size_bytes": 593323585
72
- },
73
- "mlmodelc": {
74
- "path": "80ms/encoder.mlmodelc",
75
- "size_bytes": 593506646
76
- }
77
- },
78
- "decoder": {
79
- "mlpackage": {
80
- "path": "80ms/decoder.mlpackage",
81
- "size_bytes": 29881569
82
- },
83
- "mlmodelc": {
84
- "path": "80ms/decoder.mlmodelc",
85
- "size_bytes": 29889159
86
- }
87
- },
88
- "joint": {
89
- "mlpackage": {
90
- "path": "80ms/joint.mlpackage",
91
- "size_bytes": 18916847
92
- },
93
- "mlmodelc": {
94
- "path": "80ms/joint.mlmodelc",
95
- "size_bytes": 18923548
96
- }
97
- }
98
- },
99
- "metadata_path": "80ms/metadata.json",
100
- "tokenizer_path": "80ms/tokenizer.json",
101
- "total_size_bytes": 1285964002
102
- },
103
- {
104
- "name": "160ms",
105
- "chunk_ms": 160,
106
- "chunk_mel_frames": 16,
107
- "pre_encode_cache": 9,
108
- "total_mel_frames": 25,
109
- "att_context_size": [
110
- 56,
111
- 0
112
  ],
113
- "cache_channel_shape": [
114
- 1,
115
- 24,
116
- 56,
117
- 1024
118
- ],
119
- "cache_time_shape": [
120
- 1,
121
- 24,
122
- 1024,
123
- 8
124
- ],
125
- "components": {
126
- "preprocessor": {
127
- "mlpackage": {
128
- "path": "160ms/preprocessor.mlpackage",
129
- "size_bytes": 608879
130
- },
131
- "mlmodelc": {
132
- "path": "160ms/preprocessor.mlmodelc",
133
- "size_bytes": 611446
134
- }
135
- },
136
- "encoder": {
137
- "mlpackage": {
138
- "path": "160ms/encoder.mlpackage",
139
- "size_bytes": 593373136
140
- },
141
- "mlmodelc": {
142
- "path": "160ms/encoder.mlmodelc",
143
- "size_bytes": 593550818
144
- }
145
- },
146
- "decoder": {
147
- "mlpackage": {
148
- "path": "160ms/decoder.mlpackage",
149
- "size_bytes": 29881569
150
- },
151
- "mlmodelc": {
152
- "path": "160ms/decoder.mlmodelc",
153
- "size_bytes": 29883011
154
- }
155
- },
156
- "joint": {
157
- "mlpackage": {
158
- "path": "160ms/joint.mlpackage",
159
- "size_bytes": 18916847
160
- },
161
- "mlmodelc": {
162
- "path": "160ms/joint.mlmodelc",
163
- "size_bytes": 18917400
164
- }
165
- }
166
- },
167
- "metadata_path": "160ms/metadata.json",
168
- "tokenizer_path": "160ms/tokenizer.json",
169
- "total_size_bytes": 1286031086
170
- },
171
- {
172
- "name": "320ms",
173
- "chunk_ms": 320,
174
- "chunk_mel_frames": 32,
175
- "pre_encode_cache": 9,
176
- "total_mel_frames": 41,
177
- "att_context_size": [
178
- 56,
179
- 0
180
  ],
181
- "cache_channel_shape": [
182
- 1,
183
- 24,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  56,
185
- 1024
186
- ],
187
- "cache_time_shape": [
188
- 1,
189
- 24,
190
- 1024,
191
- 8
192
- ],
193
- "components": {
194
- "preprocessor": {
195
- "mlpackage": {
196
- "path": "320ms/preprocessor.mlpackage",
197
- "size_bytes": 608879
198
- },
199
- "mlmodelc": {
200
- "path": "320ms/preprocessor.mlmodelc",
201
- "size_bytes": 617594
202
- }
203
- },
204
- "encoder": {
205
- "mlpackage": {
206
- "path": "320ms/encoder.mlpackage",
207
- "size_bytes": 593471709
208
- },
209
- "mlmodelc": {
210
- "path": "320ms/encoder.mlmodelc",
211
- "size_bytes": 593656949
212
- }
213
- },
214
- "decoder": {
215
- "mlpackage": {
216
- "path": "320ms/decoder.mlpackage",
217
- "size_bytes": 29881569
218
- },
219
- "mlmodelc": {
220
- "path": "320ms/decoder.mlmodelc",
221
- "size_bytes": 29889159
222
- }
223
- },
224
- "joint": {
225
- "mlpackage": {
226
- "path": "320ms/joint.mlpackage",
227
- "size_bytes": 18916847
228
- },
229
- "mlmodelc": {
230
- "path": "320ms/joint.mlmodelc",
231
- "size_bytes": 18923548
232
- }
233
- }
234
- },
235
- "metadata_path": "320ms/metadata.json",
236
- "tokenizer_path": "320ms/tokenizer.json",
237
- "total_size_bytes": 1286262430
238
- },
239
- {
240
- "name": "560ms",
241
  "chunk_ms": 560,
 
242
  "chunk_mel_frames": 56,
243
- "pre_encode_cache": 9,
244
  "total_mel_frames": 65,
245
- "att_context_size": [
246
- 56,
247
- 0
248
  ],
249
- "cache_channel_shape": [
250
- 1,
251
- 24,
252
- 56,
253
- 1024
254
- ],
255
- "cache_time_shape": [
256
- 1,
257
- 24,
258
- 1024,
259
- 8
260
- ],
261
- "components": {
262
- "preprocessor": {
263
- "mlpackage": {
264
- "path": "560ms/preprocessor.mlpackage",
265
- "size_bytes": 608879
266
- },
267
- "mlmodelc": {
268
- "path": "560ms/preprocessor.mlmodelc",
269
- "size_bytes": 617594
270
- }
271
- },
272
- "encoder": {
273
- "mlpackage": {
274
- "path": "560ms/encoder.mlpackage",
275
- "size_bytes": 593619582
276
- },
277
- "mlmodelc": {
278
- "path": "560ms/encoder.mlmodelc",
279
- "size_bytes": 593807012
280
- }
281
- },
282
- "decoder": {
283
- "mlpackage": {
284
- "path": "560ms/decoder.mlpackage",
285
- "size_bytes": 29881569
286
- },
287
- "mlmodelc": {
288
- "path": "560ms/decoder.mlmodelc",
289
- "size_bytes": 29889159
290
- }
291
- },
292
- "joint": {
293
- "mlpackage": {
294
- "path": "560ms/joint.mlpackage",
295
- "size_bytes": 18916847
296
- },
297
- "mlmodelc": {
298
- "path": "560ms/joint.mlmodelc",
299
- "size_bytes": 18923548
300
- }
301
- }
302
- },
303
- "metadata_path": "560ms/metadata.json",
304
- "tokenizer_path": "560ms/tokenizer.json",
305
- "total_size_bytes": 1286560366
306
- },
307
- {
308
- "name": "1120ms",
309
  "chunk_ms": 1120,
 
310
  "chunk_mel_frames": 112,
311
- "pre_encode_cache": 9,
312
  "total_mel_frames": 121,
313
- "att_context_size": [
314
- 56,
315
- 0
316
  ],
317
- "cache_channel_shape": [
318
- 1,
319
- 24,
320
- 56,
321
- 1024
322
- ],
323
- "cache_time_shape": [
324
- 1,
325
- 24,
326
- 1024,
327
- 8
328
- ],
329
- "components": {
330
- "preprocessor": {
331
- "mlpackage": {
332
- "path": "1120ms/preprocessor.mlpackage",
333
- "size_bytes": 608879
334
- },
335
- "mlmodelc": {
336
- "path": "1120ms/preprocessor.mlmodelc",
337
- "size_bytes": 617594
338
- }
339
- },
340
- "encoder": {
341
- "mlpackage": {
342
- "path": "1120ms/encoder.mlpackage",
343
- "size_bytes": 593966620
344
- },
345
- "mlmodelc": {
346
- "path": "1120ms/encoder.mlmodelc",
347
- "size_bytes": 594160802
348
- }
349
- },
350
- "decoder": {
351
- "mlpackage": {
352
- "path": "1120ms/decoder.mlpackage",
353
- "size_bytes": 29881569
354
- },
355
- "mlmodelc": {
356
- "path": "1120ms/decoder.mlmodelc",
357
- "size_bytes": 29889159
358
- }
359
- },
360
- "joint": {
361
- "mlpackage": {
362
- "path": "1120ms/joint.mlpackage",
363
- "size_bytes": 18916847
364
- },
365
- "mlmodelc": {
366
- "path": "1120ms/joint.mlmodelc",
367
- "size_bytes": 18923548
368
- }
369
- }
370
- },
371
- "metadata_path": "1120ms/metadata.json",
372
- "tokenizer_path": "1120ms/tokenizer.json",
373
- "total_size_bytes": 1287261196
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
  }
375
  ]
376
- }
 
1
  {
2
+ "name": "Nemotron 3.5 ASR Streaming Multilingual 0.6B — CoreML",
 
3
  "base_model": "nvidia/nemotron-asr-streaming-multilingual-0.6b",
4
+ "architecture": "Conformer encoder + RNN-T decoder",
5
+ "runtime": "CoreML / Apple Neural Engine",
6
+ "benchmark_machine": "Apple M5 Pro / macOS 26.5",
7
+ "recipe": "LAYERPOS[42,13] mixed-precision encoder + per-language vocab pruning + B1 fusion + triple-stage pipelining + smart-spec K=4",
8
+ "no_retraining": true,
9
+ "no_calibration": true,
10
+ "tiers_ms": [
11
+ 560,
12
+ 1120,
13
+ 2240,
14
+ 4480
15
+ ],
16
+ "recommended_tier_ms": 2240,
17
+ "models": [
18
+ "de",
19
+ "en",
20
+ "es",
21
+ "fr",
22
+ "it",
23
+ "ja",
24
+ "multilingual",
25
+ "pt",
26
+ "zh"
27
+ ],
28
+ "ship_count": 36,
29
+ "notes": [
30
+ "2240ms (2s) is the RTFx-per-latency sweet spot for every model (+20-44% over 1120ms, quality-neutral).",
31
+ "Multilingual peaks at 2240ms (74.6 RTFx); its 4480ms tier craters (19.4) because the 13088-vocab joint exceeds ANE working-set.",
32
+ "560ms is the lowest-latency tier but off the trained 14-frame attention tiling: lower RTFx (~57) and a small quality cost. en measured (57.2 RTFx); other 560ms ships shipped unbenched.",
33
+ "Portuguese 2240ms WER is +3.1pp vs 1120ms (B1-fallback path, chunk-sensitive).",
34
+ "de/zh/ja keep-sets were derived from FLEURS-test transcripts: in-domain/optimistic numbers + OOV risk on out-of-domain text. Rebuild keep-set from a broader corpus for production.",
35
+ "Each <lang>/<tier>ms/ dir is a self-contained FluidAudio model bundle (point --model-dir at it)."
36
+ ],
37
+ "ships": [
38
+ {
39
+ "path": "en/560ms",
40
+ "language": "English",
41
+ "language_code": "en-US",
42
+ "chunk_ms": 560,
43
+ "latency_s": 0.56,
44
+ "chunk_mel_frames": 56,
45
+ "total_mel_frames": 65,
46
+ "att_context": [
47
+ 42,
48
+ 13
49
  ],
50
+ "vocab_size": 988,
51
+ "vocab_pruned": true,
52
+ "components": [
53
+ "decoder",
54
+ "decoder_joint",
55
+ "decoder_joint_noencproj",
56
+ "encoder",
57
+ "joint",
58
+ "joint_noencproj_batched",
59
+ "preprocessor"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  ],
61
+ "formats": [
62
+ "mlpackage",
63
+ "mlmodelc"
64
+ ],
65
+ "benchmark": {
66
+ "rtfx": 57.2,
67
+ "wer_pct": 4.1,
68
+ "cer_pct": 1.5,
69
+ "metric": "WER",
70
+ "n": 2620,
71
+ "test_set": "LibriSpeech test-clean",
72
+ "benched": true
73
+ },
74
+ "recommended": false
75
+ },
76
+ {
77
+ "path": "en/1120ms",
78
+ "language": "English",
79
+ "language_code": "en-US",
80
+ "chunk_ms": 1120,
81
+ "latency_s": 1.12,
82
+ "chunk_mel_frames": 112,
83
+ "total_mel_frames": 121,
84
+ "att_context": [
85
+ 42,
86
+ 13
87
+ ],
88
+ "vocab_size": 988,
89
+ "vocab_pruned": true,
90
+ "components": [
91
+ "decoder",
92
+ "decoder_joint",
93
+ "decoder_joint_noencproj",
94
+ "encoder",
95
+ "joint",
96
+ "joint_noencproj_batched",
97
+ "preprocessor"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  ],
99
+ "formats": [
100
+ "mlpackage",
101
+ "mlmodelc"
102
+ ],
103
+ "benchmark": {
104
+ "rtfx": 101.4,
105
+ "wer_pct": 3.81,
106
+ "cer_pct": 1.4,
107
+ "metric": "WER",
108
+ "n": 2620,
109
+ "test_set": "LibriSpeech test-clean",
110
+ "benched": true
111
+ },
112
+ "recommended": false
113
+ },
114
+ {
115
+ "path": "en/2240ms",
116
+ "language": "English",
117
+ "language_code": "en-US",
118
+ "chunk_ms": 2240,
119
+ "latency_s": 2.24,
120
+ "chunk_mel_frames": 224,
121
+ "total_mel_frames": 233,
122
+ "att_context": [
123
+ 42,
124
+ 13
125
+ ],
126
+ "vocab_size": 988,
127
+ "vocab_pruned": true,
128
+ "components": [
129
+ "decoder",
130
+ "decoder_joint",
131
+ "decoder_joint_noencproj",
132
+ "encoder",
133
+ "joint",
134
+ "joint_noencproj_batched",
135
+ "preprocessor"
136
+ ],
137
+ "formats": [
138
+ "mlpackage",
139
+ "mlmodelc"
140
+ ],
141
+ "benchmark": {
142
+ "rtfx": 134.7,
143
+ "wer_pct": 3.7,
144
+ "cer_pct": 1.4,
145
+ "metric": "WER",
146
+ "n": 2620,
147
+ "test_set": "LibriSpeech test-clean",
148
+ "benched": true
149
+ },
150
+ "recommended": true
151
+ },
152
+ {
153
+ "path": "en/4480ms",
154
+ "language": "English",
155
+ "language_code": "en-US",
156
+ "chunk_ms": 4480,
157
+ "latency_s": 4.48,
158
+ "chunk_mel_frames": 448,
159
+ "total_mel_frames": 457,
160
+ "att_context": [
161
  56,
162
+ 13
163
+ ],
164
+ "vocab_size": 989,
165
+ "vocab_pruned": true,
166
+ "components": [
167
+ "decoder",
168
+ "decoder_joint",
169
+ "decoder_joint_noencproj",
170
+ "encoder",
171
+ "joint",
172
+ "joint_noencproj_batched",
173
+ "preprocessor"
174
+ ],
175
+ "formats": [
176
+ "mlpackage",
177
+ "mlmodelc"
178
+ ],
179
+ "benchmark": {
180
+ "rtfx": 136.7,
181
+ "wer_pct": 3.7,
182
+ "cer_pct": 1.4,
183
+ "metric": "WER",
184
+ "n": 2620,
185
+ "test_set": "LibriSpeech test-clean",
186
+ "benched": true
187
+ },
188
+ "recommended": false
189
+ },
190
+ {
191
+ "path": "es/560ms",
192
+ "language": "Spanish",
193
+ "language_code": "es-ES",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  "chunk_ms": 560,
195
+ "latency_s": 0.56,
196
  "chunk_mel_frames": 56,
 
197
  "total_mel_frames": 65,
198
+ "att_context": [
199
+ 42,
200
+ 13
201
  ],
202
+ "vocab_size": 831,
203
+ "vocab_pruned": true,
204
+ "components": [
205
+ "decoder",
206
+ "decoder_joint",
207
+ "encoder",
208
+ "joint",
209
+ "preprocessor"
210
+ ],
211
+ "formats": [
212
+ "mlpackage",
213
+ "mlmodelc"
214
+ ],
215
+ "benchmark": {
216
+ "rtfx": null,
217
+ "wer_pct": null,
218
+ "cer_pct": null,
219
+ "metric": "WER",
220
+ "n": 0,
221
+ "test_set": "MLS es_419",
222
+ "benched": false
223
+ },
224
+ "recommended": false
225
+ },
226
+ {
227
+ "path": "es/1120ms",
228
+ "language": "Spanish",
229
+ "language_code": "es-ES",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  "chunk_ms": 1120,
231
+ "latency_s": 1.12,
232
  "chunk_mel_frames": 112,
 
233
  "total_mel_frames": 121,
234
+ "att_context": [
235
+ 42,
236
+ 13
237
  ],
238
+ "vocab_size": 831,
239
+ "vocab_pruned": true,
240
+ "components": [
241
+ "decoder",
242
+ "decoder_joint",
243
+ "encoder",
244
+ "joint",
245
+ "preprocessor"
246
+ ],
247
+ "formats": [
248
+ "mlpackage",
249
+ "mlmodelc"
250
+ ],
251
+ "benchmark": {
252
+ "rtfx": 106.8,
253
+ "wer_pct": 6.5,
254
+ "cer_pct": null,
255
+ "metric": "WER",
256
+ "n": 2385,
257
+ "test_set": "MLS es_419",
258
+ "benched": true
259
+ },
260
+ "recommended": false
261
+ },
262
+ {
263
+ "path": "es/2240ms",
264
+ "language": "Spanish",
265
+ "language_code": "es-ES",
266
+ "chunk_ms": 2240,
267
+ "latency_s": 2.24,
268
+ "chunk_mel_frames": 224,
269
+ "total_mel_frames": 233,
270
+ "att_context": [
271
+ 42,
272
+ 13
273
+ ],
274
+ "vocab_size": 831,
275
+ "vocab_pruned": true,
276
+ "components": [
277
+ "decoder",
278
+ "decoder_joint",
279
+ "encoder",
280
+ "joint",
281
+ "preprocessor"
282
+ ],
283
+ "formats": [
284
+ "mlpackage",
285
+ "mlmodelc"
286
+ ],
287
+ "benchmark": {
288
+ "rtfx": 153.6,
289
+ "wer_pct": 6.8,
290
+ "cer_pct": null,
291
+ "metric": "WER",
292
+ "n": 2385,
293
+ "test_set": "MLS es_419",
294
+ "benched": true
295
+ },
296
+ "recommended": true
297
+ },
298
+ {
299
+ "path": "es/4480ms",
300
+ "language": "Spanish",
301
+ "language_code": "es-ES",
302
+ "chunk_ms": 4480,
303
+ "latency_s": 4.48,
304
+ "chunk_mel_frames": 448,
305
+ "total_mel_frames": 457,
306
+ "att_context": [
307
+ 42,
308
+ 13
309
+ ],
310
+ "vocab_size": 831,
311
+ "vocab_pruned": true,
312
+ "components": [
313
+ "decoder",
314
+ "decoder_joint",
315
+ "decoder_joint_noencproj",
316
+ "encoder",
317
+ "joint",
318
+ "joint_noencproj_batched",
319
+ "preprocessor"
320
+ ],
321
+ "formats": [
322
+ "mlpackage",
323
+ "mlmodelc"
324
+ ],
325
+ "benchmark": {
326
+ "rtfx": 139.0,
327
+ "wer_pct": 6.4,
328
+ "cer_pct": null,
329
+ "metric": "WER",
330
+ "n": 2385,
331
+ "test_set": "MLS es_419",
332
+ "benched": true
333
+ },
334
+ "recommended": false
335
+ },
336
+ {
337
+ "path": "fr/560ms",
338
+ "language": "French",
339
+ "language_code": "fr-FR",
340
+ "chunk_ms": 560,
341
+ "latency_s": 0.56,
342
+ "chunk_mel_frames": 56,
343
+ "total_mel_frames": 65,
344
+ "att_context": [
345
+ 42,
346
+ 13
347
+ ],
348
+ "vocab_size": 848,
349
+ "vocab_pruned": true,
350
+ "components": [
351
+ "decoder",
352
+ "decoder_joint",
353
+ "decoder_joint_noencproj",
354
+ "encoder",
355
+ "joint",
356
+ "joint_noencproj_batched",
357
+ "preprocessor"
358
+ ],
359
+ "formats": [
360
+ "mlpackage",
361
+ "mlmodelc"
362
+ ],
363
+ "benchmark": {
364
+ "rtfx": null,
365
+ "wer_pct": null,
366
+ "cer_pct": null,
367
+ "metric": "WER",
368
+ "n": 0,
369
+ "test_set": "MLS fr_fr",
370
+ "benched": false
371
+ },
372
+ "recommended": false
373
+ },
374
+ {
375
+ "path": "fr/1120ms",
376
+ "language": "French",
377
+ "language_code": "fr-FR",
378
+ "chunk_ms": 1120,
379
+ "latency_s": 1.12,
380
+ "chunk_mel_frames": 112,
381
+ "total_mel_frames": 121,
382
+ "att_context": [
383
+ 42,
384
+ 13
385
+ ],
386
+ "vocab_size": 848,
387
+ "vocab_pruned": true,
388
+ "components": [
389
+ "decoder",
390
+ "decoder_joint",
391
+ "decoder_joint_noencproj",
392
+ "encoder",
393
+ "joint",
394
+ "joint_noencproj_batched",
395
+ "preprocessor"
396
+ ],
397
+ "formats": [
398
+ "mlpackage",
399
+ "mlmodelc"
400
+ ],
401
+ "benchmark": {
402
+ "rtfx": 109.5,
403
+ "wer_pct": 9.9,
404
+ "cer_pct": null,
405
+ "metric": "WER",
406
+ "n": 2426,
407
+ "test_set": "MLS fr_fr",
408
+ "benched": true
409
+ },
410
+ "recommended": false
411
+ },
412
+ {
413
+ "path": "fr/2240ms",
414
+ "language": "French",
415
+ "language_code": "fr-FR",
416
+ "chunk_ms": 2240,
417
+ "latency_s": 2.24,
418
+ "chunk_mel_frames": 224,
419
+ "total_mel_frames": 233,
420
+ "att_context": [
421
+ 42,
422
+ 13
423
+ ],
424
+ "vocab_size": 848,
425
+ "vocab_pruned": true,
426
+ "components": [
427
+ "decoder",
428
+ "decoder_joint",
429
+ "decoder_joint_noencproj",
430
+ "encoder",
431
+ "joint",
432
+ "joint_noencproj_batched",
433
+ "preprocessor"
434
+ ],
435
+ "formats": [
436
+ "mlpackage",
437
+ "mlmodelc"
438
+ ],
439
+ "benchmark": {
440
+ "rtfx": 134.4,
441
+ "wer_pct": 10.4,
442
+ "cer_pct": null,
443
+ "metric": "WER",
444
+ "n": 2426,
445
+ "test_set": "MLS fr_fr",
446
+ "benched": true
447
+ },
448
+ "recommended": true
449
+ },
450
+ {
451
+ "path": "fr/4480ms",
452
+ "language": "French",
453
+ "language_code": "fr-FR",
454
+ "chunk_ms": 4480,
455
+ "latency_s": 4.48,
456
+ "chunk_mel_frames": 448,
457
+ "total_mel_frames": 457,
458
+ "att_context": [
459
+ 42,
460
+ 13
461
+ ],
462
+ "vocab_size": 848,
463
+ "vocab_pruned": true,
464
+ "components": [
465
+ "decoder",
466
+ "decoder_joint",
467
+ "decoder_joint_noencproj",
468
+ "encoder",
469
+ "joint",
470
+ "joint_noencproj_batched",
471
+ "preprocessor"
472
+ ],
473
+ "formats": [
474
+ "mlpackage",
475
+ "mlmodelc"
476
+ ],
477
+ "benchmark": {
478
+ "rtfx": 130.2,
479
+ "wer_pct": 16.8,
480
+ "cer_pct": null,
481
+ "metric": "WER",
482
+ "n": 100,
483
+ "test_set": "MLS fr_fr",
484
+ "benched": true
485
+ },
486
+ "recommended": false
487
+ },
488
+ {
489
+ "path": "it/560ms",
490
+ "language": "Italian",
491
+ "language_code": "it-IT",
492
+ "chunk_ms": 560,
493
+ "latency_s": 0.56,
494
+ "chunk_mel_frames": 56,
495
+ "total_mel_frames": 65,
496
+ "att_context": [
497
+ 42,
498
+ 13
499
+ ],
500
+ "vocab_size": 805,
501
+ "vocab_pruned": true,
502
+ "components": [
503
+ "decoder",
504
+ "decoder_joint",
505
+ "decoder_joint_noencproj",
506
+ "encoder",
507
+ "joint",
508
+ "joint_noencproj_batched",
509
+ "preprocessor"
510
+ ],
511
+ "formats": [
512
+ "mlpackage",
513
+ "mlmodelc"
514
+ ],
515
+ "benchmark": {
516
+ "rtfx": null,
517
+ "wer_pct": null,
518
+ "cer_pct": null,
519
+ "metric": "WER",
520
+ "n": 0,
521
+ "test_set": "MLS it_it",
522
+ "benched": false
523
+ },
524
+ "recommended": false
525
+ },
526
+ {
527
+ "path": "it/1120ms",
528
+ "language": "Italian",
529
+ "language_code": "it-IT",
530
+ "chunk_ms": 1120,
531
+ "latency_s": 1.12,
532
+ "chunk_mel_frames": 112,
533
+ "total_mel_frames": 121,
534
+ "att_context": [
535
+ 42,
536
+ 13
537
+ ],
538
+ "vocab_size": 805,
539
+ "vocab_pruned": true,
540
+ "components": [
541
+ "decoder",
542
+ "decoder_joint",
543
+ "decoder_joint_noencproj",
544
+ "encoder",
545
+ "joint",
546
+ "joint_noencproj_batched",
547
+ "preprocessor"
548
+ ],
549
+ "formats": [
550
+ "mlpackage",
551
+ "mlmodelc"
552
+ ],
553
+ "benchmark": {
554
+ "rtfx": 109.2,
555
+ "wer_pct": 23.0,
556
+ "cer_pct": null,
557
+ "metric": "WER",
558
+ "n": 1262,
559
+ "test_set": "MLS it_it",
560
+ "benched": true
561
+ },
562
+ "recommended": false
563
+ },
564
+ {
565
+ "path": "it/2240ms",
566
+ "language": "Italian",
567
+ "language_code": "it-IT",
568
+ "chunk_ms": 2240,
569
+ "latency_s": 2.24,
570
+ "chunk_mel_frames": 224,
571
+ "total_mel_frames": 233,
572
+ "att_context": [
573
+ 42,
574
+ 13
575
+ ],
576
+ "vocab_size": 805,
577
+ "vocab_pruned": true,
578
+ "components": [
579
+ "decoder",
580
+ "decoder_joint",
581
+ "decoder_joint_noencproj",
582
+ "encoder",
583
+ "joint",
584
+ "joint_noencproj_batched",
585
+ "preprocessor"
586
+ ],
587
+ "formats": [
588
+ "mlpackage",
589
+ "mlmodelc"
590
+ ],
591
+ "benchmark": {
592
+ "rtfx": 136.8,
593
+ "wer_pct": 18.0,
594
+ "cer_pct": null,
595
+ "metric": "WER",
596
+ "n": 1262,
597
+ "test_set": "MLS it_it",
598
+ "benched": true
599
+ },
600
+ "recommended": true
601
+ },
602
+ {
603
+ "path": "it/4480ms",
604
+ "language": "Italian",
605
+ "language_code": "it-IT",
606
+ "chunk_ms": 4480,
607
+ "latency_s": 4.48,
608
+ "chunk_mel_frames": 448,
609
+ "total_mel_frames": 457,
610
+ "att_context": [
611
+ 42,
612
+ 13
613
+ ],
614
+ "vocab_size": 805,
615
+ "vocab_pruned": true,
616
+ "components": [
617
+ "decoder",
618
+ "decoder_joint",
619
+ "decoder_joint_noencproj",
620
+ "encoder",
621
+ "joint",
622
+ "joint_noencproj_batched",
623
+ "preprocessor"
624
+ ],
625
+ "formats": [
626
+ "mlpackage",
627
+ "mlmodelc"
628
+ ],
629
+ "benchmark": {
630
+ "rtfx": 134.6,
631
+ "wer_pct": 25.6,
632
+ "cer_pct": null,
633
+ "metric": "WER",
634
+ "n": 100,
635
+ "test_set": "MLS it_it",
636
+ "benched": true
637
+ },
638
+ "recommended": false
639
+ },
640
+ {
641
+ "path": "pt/560ms",
642
+ "language": "Portuguese",
643
+ "language_code": "pt-BR",
644
+ "chunk_ms": 560,
645
+ "latency_s": 0.56,
646
+ "chunk_mel_frames": 56,
647
+ "total_mel_frames": 65,
648
+ "att_context": [
649
+ 42,
650
+ 13
651
+ ],
652
+ "vocab_size": 870,
653
+ "vocab_pruned": true,
654
+ "components": [
655
+ "decoder",
656
+ "decoder_joint",
657
+ "encoder",
658
+ "joint",
659
+ "preprocessor"
660
+ ],
661
+ "formats": [
662
+ "mlpackage",
663
+ "mlmodelc"
664
+ ],
665
+ "benchmark": {
666
+ "rtfx": null,
667
+ "wer_pct": null,
668
+ "cer_pct": null,
669
+ "metric": "WER",
670
+ "n": 0,
671
+ "test_set": "MLS pt_br",
672
+ "benched": false
673
+ },
674
+ "recommended": false
675
+ },
676
+ {
677
+ "path": "pt/1120ms",
678
+ "language": "Portuguese",
679
+ "language_code": "pt-BR",
680
+ "chunk_ms": 1120,
681
+ "latency_s": 1.12,
682
+ "chunk_mel_frames": 112,
683
+ "total_mel_frames": 121,
684
+ "att_context": [
685
+ 42,
686
+ 13
687
+ ],
688
+ "vocab_size": 870,
689
+ "vocab_pruned": true,
690
+ "components": [
691
+ "decoder",
692
+ "decoder_joint",
693
+ "encoder",
694
+ "joint",
695
+ "preprocessor"
696
+ ],
697
+ "formats": [
698
+ "mlpackage",
699
+ "mlmodelc"
700
+ ],
701
+ "benchmark": {
702
+ "rtfx": 111.4,
703
+ "wer_pct": 9.8,
704
+ "cer_pct": null,
705
+ "metric": "WER",
706
+ "n": 871,
707
+ "test_set": "MLS pt_br",
708
+ "benched": true
709
+ },
710
+ "recommended": false
711
+ },
712
+ {
713
+ "path": "pt/2240ms",
714
+ "language": "Portuguese",
715
+ "language_code": "pt-BR",
716
+ "chunk_ms": 2240,
717
+ "latency_s": 2.24,
718
+ "chunk_mel_frames": 224,
719
+ "total_mel_frames": 233,
720
+ "att_context": [
721
+ 42,
722
+ 13
723
+ ],
724
+ "vocab_size": 870,
725
+ "vocab_pruned": true,
726
+ "components": [
727
+ "decoder",
728
+ "decoder_joint",
729
+ "encoder",
730
+ "joint",
731
+ "preprocessor"
732
+ ],
733
+ "formats": [
734
+ "mlpackage",
735
+ "mlmodelc"
736
+ ],
737
+ "benchmark": {
738
+ "rtfx": 155.2,
739
+ "wer_pct": 12.9,
740
+ "cer_pct": null,
741
+ "metric": "WER",
742
+ "n": 871,
743
+ "test_set": "MLS pt_br",
744
+ "benched": true
745
+ },
746
+ "recommended": true
747
+ },
748
+ {
749
+ "path": "pt/4480ms",
750
+ "language": "Portuguese",
751
+ "language_code": "pt-BR",
752
+ "chunk_ms": 4480,
753
+ "latency_s": 4.48,
754
+ "chunk_mel_frames": 448,
755
+ "total_mel_frames": 457,
756
+ "att_context": [
757
+ 42,
758
+ 13
759
+ ],
760
+ "vocab_size": 870,
761
+ "vocab_pruned": true,
762
+ "components": [
763
+ "decoder",
764
+ "decoder_joint",
765
+ "decoder_joint_noencproj",
766
+ "encoder",
767
+ "joint",
768
+ "joint_noencproj_batched",
769
+ "preprocessor"
770
+ ],
771
+ "formats": [
772
+ "mlpackage",
773
+ "mlmodelc"
774
+ ],
775
+ "benchmark": {
776
+ "rtfx": 134.9,
777
+ "wer_pct": 10.3,
778
+ "cer_pct": null,
779
+ "metric": "WER",
780
+ "n": 871,
781
+ "test_set": "MLS pt_br",
782
+ "benched": true
783
+ },
784
+ "recommended": false
785
+ },
786
+ {
787
+ "path": "de/560ms",
788
+ "language": "German",
789
+ "language_code": "de-DE",
790
+ "chunk_ms": 560,
791
+ "latency_s": 0.56,
792
+ "chunk_mel_frames": 56,
793
+ "total_mel_frames": 65,
794
+ "att_context": [
795
+ 42,
796
+ 13
797
+ ],
798
+ "vocab_size": 795,
799
+ "vocab_pruned": true,
800
+ "components": [
801
+ "decoder",
802
+ "decoder_joint",
803
+ "encoder",
804
+ "joint",
805
+ "preprocessor"
806
+ ],
807
+ "formats": [
808
+ "mlpackage",
809
+ "mlmodelc"
810
+ ],
811
+ "benchmark": {
812
+ "rtfx": null,
813
+ "wer_pct": null,
814
+ "cer_pct": null,
815
+ "metric": "WER",
816
+ "n": 0,
817
+ "test_set": "FLEURS de_de",
818
+ "benched": false
819
+ },
820
+ "recommended": false
821
+ },
822
+ {
823
+ "path": "de/1120ms",
824
+ "language": "German",
825
+ "language_code": "de-DE",
826
+ "chunk_ms": 1120,
827
+ "latency_s": 1.12,
828
+ "chunk_mel_frames": 112,
829
+ "total_mel_frames": 121,
830
+ "att_context": [
831
+ 42,
832
+ 13
833
+ ],
834
+ "vocab_size": 795,
835
+ "vocab_pruned": true,
836
+ "components": [
837
+ "decoder",
838
+ "decoder_joint",
839
+ "encoder",
840
+ "joint",
841
+ "preprocessor"
842
+ ],
843
+ "formats": [
844
+ "mlpackage",
845
+ "mlmodelc"
846
+ ],
847
+ "benchmark": {
848
+ "rtfx": 107.5,
849
+ "wer_pct": 11.2,
850
+ "cer_pct": 3.6,
851
+ "metric": "WER",
852
+ "n": 862,
853
+ "test_set": "FLEURS de_de",
854
+ "benched": true
855
+ },
856
+ "recommended": false
857
+ },
858
+ {
859
+ "path": "de/2240ms",
860
+ "language": "German",
861
+ "language_code": "de-DE",
862
+ "chunk_ms": 2240,
863
+ "latency_s": 2.24,
864
+ "chunk_mel_frames": 224,
865
+ "total_mel_frames": 233,
866
+ "att_context": [
867
+ 42,
868
+ 13
869
+ ],
870
+ "vocab_size": 795,
871
+ "vocab_pruned": true,
872
+ "components": [
873
+ "decoder",
874
+ "decoder_joint",
875
+ "encoder",
876
+ "joint",
877
+ "preprocessor"
878
+ ],
879
+ "formats": [
880
+ "mlpackage",
881
+ "mlmodelc"
882
+ ],
883
+ "benchmark": {
884
+ "rtfx": 150.1,
885
+ "wer_pct": 11.0,
886
+ "cer_pct": 3.8,
887
+ "metric": "WER",
888
+ "n": 862,
889
+ "test_set": "FLEURS de_de",
890
+ "benched": true
891
+ },
892
+ "recommended": true
893
+ },
894
+ {
895
+ "path": "de/4480ms",
896
+ "language": "German",
897
+ "language_code": "de-DE",
898
+ "chunk_ms": 4480,
899
+ "latency_s": 4.48,
900
+ "chunk_mel_frames": 448,
901
+ "total_mel_frames": 457,
902
+ "att_context": [
903
+ 42,
904
+ 13
905
+ ],
906
+ "vocab_size": 795,
907
+ "vocab_pruned": true,
908
+ "components": [
909
+ "decoder",
910
+ "decoder_joint",
911
+ "encoder",
912
+ "joint",
913
+ "preprocessor"
914
+ ],
915
+ "formats": [
916
+ "mlpackage",
917
+ "mlmodelc"
918
+ ],
919
+ "benchmark": {
920
+ "rtfx": 151.9,
921
+ "wer_pct": 11.0,
922
+ "cer_pct": 3.9,
923
+ "metric": "WER",
924
+ "n": 862,
925
+ "test_set": "FLEURS de_de",
926
+ "benched": true
927
+ },
928
+ "recommended": false
929
+ },
930
+ {
931
+ "path": "zh/560ms",
932
+ "language": "Chinese",
933
+ "language_code": "zh-CN",
934
+ "chunk_ms": 560,
935
+ "latency_s": 0.56,
936
+ "chunk_mel_frames": 56,
937
+ "total_mel_frames": 65,
938
+ "att_context": [
939
+ 42,
940
+ 13
941
+ ],
942
+ "vocab_size": 1875,
943
+ "vocab_pruned": true,
944
+ "components": [
945
+ "decoder",
946
+ "decoder_joint",
947
+ "encoder",
948
+ "joint",
949
+ "preprocessor"
950
+ ],
951
+ "formats": [
952
+ "mlpackage",
953
+ "mlmodelc"
954
+ ],
955
+ "benchmark": {
956
+ "rtfx": null,
957
+ "wer_pct": null,
958
+ "cer_pct": null,
959
+ "metric": "CER",
960
+ "n": 0,
961
+ "test_set": "FLEURS cmn_hans_cn",
962
+ "benched": false
963
+ },
964
+ "recommended": false
965
+ },
966
+ {
967
+ "path": "zh/1120ms",
968
+ "language": "Chinese",
969
+ "language_code": "zh-CN",
970
+ "chunk_ms": 1120,
971
+ "latency_s": 1.12,
972
+ "chunk_mel_frames": 112,
973
+ "total_mel_frames": 121,
974
+ "att_context": [
975
+ 42,
976
+ 13
977
+ ],
978
+ "vocab_size": 1875,
979
+ "vocab_pruned": true,
980
+ "components": [
981
+ "decoder",
982
+ "decoder_joint",
983
+ "encoder",
984
+ "joint",
985
+ "preprocessor"
986
+ ],
987
+ "formats": [
988
+ "mlpackage",
989
+ "mlmodelc"
990
+ ],
991
+ "benchmark": {
992
+ "rtfx": 106.3,
993
+ "wer_pct": null,
994
+ "cer_pct": 21.9,
995
+ "metric": "CER",
996
+ "n": 945,
997
+ "test_set": "FLEURS cmn_hans_cn",
998
+ "benched": true
999
+ },
1000
+ "recommended": false
1001
+ },
1002
+ {
1003
+ "path": "zh/2240ms",
1004
+ "language": "Chinese",
1005
+ "language_code": "zh-CN",
1006
+ "chunk_ms": 2240,
1007
+ "latency_s": 2.24,
1008
+ "chunk_mel_frames": 224,
1009
+ "total_mel_frames": 233,
1010
+ "att_context": [
1011
+ 42,
1012
+ 13
1013
+ ],
1014
+ "vocab_size": 1875,
1015
+ "vocab_pruned": true,
1016
+ "components": [
1017
+ "decoder",
1018
+ "decoder_joint",
1019
+ "encoder",
1020
+ "joint",
1021
+ "preprocessor"
1022
+ ],
1023
+ "formats": [
1024
+ "mlpackage",
1025
+ "mlmodelc"
1026
+ ],
1027
+ "benchmark": {
1028
+ "rtfx": 146.0,
1029
+ "wer_pct": null,
1030
+ "cer_pct": 21.4,
1031
+ "metric": "CER",
1032
+ "n": 945,
1033
+ "test_set": "FLEURS cmn_hans_cn",
1034
+ "benched": true
1035
+ },
1036
+ "recommended": true
1037
+ },
1038
+ {
1039
+ "path": "zh/4480ms",
1040
+ "language": "Chinese",
1041
+ "language_code": "zh-CN",
1042
+ "chunk_ms": 4480,
1043
+ "latency_s": 4.48,
1044
+ "chunk_mel_frames": 448,
1045
+ "total_mel_frames": 457,
1046
+ "att_context": [
1047
+ 42,
1048
+ 13
1049
+ ],
1050
+ "vocab_size": 1875,
1051
+ "vocab_pruned": true,
1052
+ "components": [
1053
+ "decoder",
1054
+ "decoder_joint",
1055
+ "encoder",
1056
+ "joint",
1057
+ "preprocessor"
1058
+ ],
1059
+ "formats": [
1060
+ "mlpackage",
1061
+ "mlmodelc"
1062
+ ],
1063
+ "benchmark": {
1064
+ "rtfx": 140.2,
1065
+ "wer_pct": null,
1066
+ "cer_pct": 21.4,
1067
+ "metric": "CER",
1068
+ "n": 945,
1069
+ "test_set": "FLEURS cmn_hans_cn",
1070
+ "benched": true
1071
+ },
1072
+ "recommended": false
1073
+ },
1074
+ {
1075
+ "path": "ja/560ms",
1076
+ "language": "Japanese",
1077
+ "language_code": "ja-JP",
1078
+ "chunk_ms": 560,
1079
+ "latency_s": 0.56,
1080
+ "chunk_mel_frames": 56,
1081
+ "total_mel_frames": 65,
1082
+ "att_context": [
1083
+ 42,
1084
+ 13
1085
+ ],
1086
+ "vocab_size": 1403,
1087
+ "vocab_pruned": true,
1088
+ "components": [
1089
+ "decoder",
1090
+ "decoder_joint",
1091
+ "encoder",
1092
+ "joint",
1093
+ "preprocessor"
1094
+ ],
1095
+ "formats": [
1096
+ "mlpackage",
1097
+ "mlmodelc"
1098
+ ],
1099
+ "benchmark": {
1100
+ "rtfx": null,
1101
+ "wer_pct": null,
1102
+ "cer_pct": null,
1103
+ "metric": "CER",
1104
+ "n": 0,
1105
+ "test_set": "FLEURS ja_jp",
1106
+ "benched": false
1107
+ },
1108
+ "recommended": false
1109
+ },
1110
+ {
1111
+ "path": "ja/1120ms",
1112
+ "language": "Japanese",
1113
+ "language_code": "ja-JP",
1114
+ "chunk_ms": 1120,
1115
+ "latency_s": 1.12,
1116
+ "chunk_mel_frames": 112,
1117
+ "total_mel_frames": 121,
1118
+ "att_context": [
1119
+ 42,
1120
+ 13
1121
+ ],
1122
+ "vocab_size": 1403,
1123
+ "vocab_pruned": true,
1124
+ "components": [
1125
+ "decoder",
1126
+ "decoder_joint",
1127
+ "encoder",
1128
+ "joint",
1129
+ "preprocessor"
1130
+ ],
1131
+ "formats": [
1132
+ "mlpackage",
1133
+ "mlmodelc"
1134
+ ],
1135
+ "benchmark": {
1136
+ "rtfx": 108.5,
1137
+ "wer_pct": null,
1138
+ "cer_pct": 15.6,
1139
+ "metric": "CER",
1140
+ "n": 650,
1141
+ "test_set": "FLEURS ja_jp",
1142
+ "benched": true
1143
+ },
1144
+ "recommended": false
1145
+ },
1146
+ {
1147
+ "path": "ja/2240ms",
1148
+ "language": "Japanese",
1149
+ "language_code": "ja-JP",
1150
+ "chunk_ms": 2240,
1151
+ "latency_s": 2.24,
1152
+ "chunk_mel_frames": 224,
1153
+ "total_mel_frames": 233,
1154
+ "att_context": [
1155
+ 42,
1156
+ 13
1157
+ ],
1158
+ "vocab_size": 1403,
1159
+ "vocab_pruned": true,
1160
+ "components": [
1161
+ "decoder",
1162
+ "decoder_joint",
1163
+ "encoder",
1164
+ "joint",
1165
+ "preprocessor"
1166
+ ],
1167
+ "formats": [
1168
+ "mlpackage",
1169
+ "mlmodelc"
1170
+ ],
1171
+ "benchmark": {
1172
+ "rtfx": 150.5,
1173
+ "wer_pct": null,
1174
+ "cer_pct": 15.4,
1175
+ "metric": "CER",
1176
+ "n": 650,
1177
+ "test_set": "FLEURS ja_jp",
1178
+ "benched": true
1179
+ },
1180
+ "recommended": true
1181
+ },
1182
+ {
1183
+ "path": "ja/4480ms",
1184
+ "language": "Japanese",
1185
+ "language_code": "ja-JP",
1186
+ "chunk_ms": 4480,
1187
+ "latency_s": 4.48,
1188
+ "chunk_mel_frames": 448,
1189
+ "total_mel_frames": 457,
1190
+ "att_context": [
1191
+ 42,
1192
+ 13
1193
+ ],
1194
+ "vocab_size": 1403,
1195
+ "vocab_pruned": true,
1196
+ "components": [
1197
+ "decoder",
1198
+ "decoder_joint",
1199
+ "encoder",
1200
+ "joint",
1201
+ "preprocessor"
1202
+ ],
1203
+ "formats": [
1204
+ "mlpackage",
1205
+ "mlmodelc"
1206
+ ],
1207
+ "benchmark": {
1208
+ "rtfx": 147.8,
1209
+ "wer_pct": null,
1210
+ "cer_pct": 15.3,
1211
+ "metric": "CER",
1212
+ "n": 650,
1213
+ "test_set": "FLEURS ja_jp",
1214
+ "benched": true
1215
+ },
1216
+ "recommended": false
1217
+ },
1218
+ {
1219
+ "path": "multilingual/560ms",
1220
+ "language": "Multilingual (100+ langs via prompt_id)",
1221
+ "language_code": "auto",
1222
+ "chunk_ms": 560,
1223
+ "latency_s": 0.56,
1224
+ "chunk_mel_frames": 56,
1225
+ "total_mel_frames": 65,
1226
+ "att_context": [
1227
+ 42,
1228
+ 13
1229
+ ],
1230
+ "vocab_size": 13087,
1231
+ "vocab_pruned": false,
1232
+ "components": [
1233
+ "decoder",
1234
+ "decoder_joint",
1235
+ "decoder_joint_noencproj",
1236
+ "encoder",
1237
+ "joint",
1238
+ "joint_noencproj_batched",
1239
+ "preprocessor"
1240
+ ],
1241
+ "formats": [
1242
+ "mlpackage",
1243
+ "mlmodelc"
1244
+ ],
1245
+ "benchmark": {
1246
+ "rtfx": null,
1247
+ "wer_pct": null,
1248
+ "cer_pct": null,
1249
+ "metric": "WER",
1250
+ "n": 0,
1251
+ "test_set": "LibriSpeech test-clean (en)",
1252
+ "benched": false
1253
+ },
1254
+ "recommended": false
1255
+ },
1256
+ {
1257
+ "path": "multilingual/1120ms",
1258
+ "language": "Multilingual (100+ langs via prompt_id)",
1259
+ "language_code": "auto",
1260
+ "chunk_ms": 1120,
1261
+ "latency_s": 1.12,
1262
+ "chunk_mel_frames": 112,
1263
+ "total_mel_frames": 121,
1264
+ "att_context": [
1265
+ 42,
1266
+ 13
1267
+ ],
1268
+ "vocab_size": 13087,
1269
+ "vocab_pruned": false,
1270
+ "components": [
1271
+ "decoder",
1272
+ "decoder_joint",
1273
+ "decoder_joint_noencproj",
1274
+ "encoder",
1275
+ "joint",
1276
+ "joint_noencproj_batched",
1277
+ "preprocessor"
1278
+ ],
1279
+ "formats": [
1280
+ "mlpackage",
1281
+ "mlmodelc"
1282
+ ],
1283
+ "benchmark": {
1284
+ "rtfx": 62.2,
1285
+ "wer_pct": 3.8,
1286
+ "cer_pct": 1.4,
1287
+ "metric": "WER",
1288
+ "n": 2620,
1289
+ "test_set": "LibriSpeech test-clean (en)",
1290
+ "benched": true
1291
+ },
1292
+ "recommended": false
1293
+ },
1294
+ {
1295
+ "path": "multilingual/2240ms",
1296
+ "language": "Multilingual (100+ langs via prompt_id)",
1297
+ "language_code": "auto",
1298
+ "chunk_ms": 2240,
1299
+ "latency_s": 2.24,
1300
+ "chunk_mel_frames": 224,
1301
+ "total_mel_frames": 233,
1302
+ "att_context": [
1303
+ 42,
1304
+ 13
1305
+ ],
1306
+ "vocab_size": 13087,
1307
+ "vocab_pruned": false,
1308
+ "components": [
1309
+ "decoder",
1310
+ "decoder_joint",
1311
+ "decoder_joint_noencproj",
1312
+ "encoder",
1313
+ "joint",
1314
+ "joint_noencproj_batched",
1315
+ "preprocessor"
1316
+ ],
1317
+ "formats": [
1318
+ "mlpackage",
1319
+ "mlmodelc"
1320
+ ],
1321
+ "benchmark": {
1322
+ "rtfx": 74.6,
1323
+ "wer_pct": 3.7,
1324
+ "cer_pct": 1.4,
1325
+ "metric": "WER",
1326
+ "n": 2620,
1327
+ "test_set": "LibriSpeech test-clean (en)",
1328
+ "benched": true
1329
+ },
1330
+ "recommended": true
1331
+ },
1332
+ {
1333
+ "path": "multilingual/4480ms",
1334
+ "language": "Multilingual (100+ langs via prompt_id)",
1335
+ "language_code": "auto",
1336
+ "chunk_ms": 4480,
1337
+ "latency_s": 4.48,
1338
+ "chunk_mel_frames": 448,
1339
+ "total_mel_frames": 457,
1340
+ "att_context": [
1341
+ 42,
1342
+ 13
1343
+ ],
1344
+ "vocab_size": 13087,
1345
+ "vocab_pruned": false,
1346
+ "components": [
1347
+ "decoder",
1348
+ "decoder_joint",
1349
+ "decoder_joint_noencproj",
1350
+ "encoder",
1351
+ "joint",
1352
+ "joint_noencproj_batched",
1353
+ "preprocessor"
1354
+ ],
1355
+ "formats": [
1356
+ "mlpackage",
1357
+ "mlmodelc"
1358
+ ],
1359
+ "benchmark": {
1360
+ "rtfx": 19.4,
1361
+ "wer_pct": 3.7,
1362
+ "cer_pct": 1.4,
1363
+ "metric": "WER",
1364
+ "n": 2620,
1365
+ "test_set": "LibriSpeech test-clean (en)",
1366
+ "benched": true
1367
+ },
1368
+ "recommended": false
1369
  }
1370
  ]
1371
+ }
multilingual/1120ms/decoder.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c32520b84ded2c698000854a77228adf394db522b5a3c25f7737415aae7ed0d
3
+ size 243
multilingual/1120ms/decoder.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa812bb65dd2a3bef6acf584b2abd5d0f26f4d09afaccf6c5dfd41e630d0fd1b
3
+ size 433
multilingual/1120ms/decoder.mlmodelc/model.mil ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ program(1.3)
2
+ [buildInfo = dict<string, string>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}, {"coremltools-component-torch", "2.5.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
3
+ {
4
+ func main<ios18>(tensor<fp32, [2, 1, 640]> c_in, tensor<fp32, [2, 1, 640]> h_in, tensor<int32, [1, 1]> token, tensor<int32, [1]> token_length) {
5
+ int32 y_axis_0 = const()[name = string("y_axis_0"), val = int32(0)];
6
+ int32 y_batch_dims_0 = const()[name = string("y_batch_dims_0"), val = int32(0)];
7
+ bool y_validate_indices_0 = const()[name = string("y_validate_indices_0"), val = bool(false)];
8
+ tensor<fp16, [13088, 640]> module_prediction_embed_weight_to_fp16 = const()[name = string("module_prediction_embed_weight_to_fp16"), val = tensor<fp16, [13088, 640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
9
+ string token_to_int16_dtype_0 = const()[name = string("token_to_int16_dtype_0"), val = string("int16")];
10
+ tensor<int16, [1, 1]> token_to_int16 = cast(dtype = token_to_int16_dtype_0, x = token)[name = string("cast_8")];
11
+ tensor<fp16, [1, 1, 640]> y_cast_fp16_cast_uint16 = gather(axis = y_axis_0, batch_dims = y_batch_dims_0, indices = token_to_int16, validate_indices = y_validate_indices_0, x = module_prediction_embed_weight_to_fp16)[name = string("y_cast_fp16_cast_uint16")];
12
+ tensor<int32, [3]> input_3_perm_0 = const()[name = string("input_3_perm_0"), val = tensor<int32, [3]>([1, 0, 2])];
13
+ int32 split_0_num_splits_0 = const()[name = string("split_0_num_splits_0"), val = int32(2)];
14
+ int32 split_0_axis_0 = const()[name = string("split_0_axis_0"), val = int32(0)];
15
+ string h_in_to_fp16_dtype_0 = const()[name = string("h_in_to_fp16_dtype_0"), val = string("fp16")];
16
+ tensor<fp16, [2, 1, 640]> h_in_to_fp16 = cast(dtype = h_in_to_fp16_dtype_0, x = h_in)[name = string("cast_7")];
17
+ tensor<fp16, [1, 1, 640]> split_0_cast_fp16_0, tensor<fp16, [1, 1, 640]> split_0_cast_fp16_1 = split(axis = split_0_axis_0, num_splits = split_0_num_splits_0, x = h_in_to_fp16)[name = string("split_0_cast_fp16")];
18
+ int32 split_1_num_splits_0 = const()[name = string("split_1_num_splits_0"), val = int32(2)];
19
+ int32 split_1_axis_0 = const()[name = string("split_1_axis_0"), val = int32(0)];
20
+ string c_in_to_fp16_dtype_0 = const()[name = string("c_in_to_fp16_dtype_0"), val = string("fp16")];
21
+ tensor<fp16, [2, 1, 640]> c_in_to_fp16 = cast(dtype = c_in_to_fp16_dtype_0, x = c_in)[name = string("cast_6")];
22
+ tensor<fp16, [1, 1, 640]> split_1_cast_fp16_0, tensor<fp16, [1, 1, 640]> split_1_cast_fp16_1 = split(axis = split_1_axis_0, num_splits = split_1_num_splits_0, x = c_in_to_fp16)[name = string("split_1_cast_fp16")];
23
+ tensor<int32, [1]> input_lstm_layer_0_lstm_h0_squeeze_axes_0 = const()[name = string("input_lstm_layer_0_lstm_h0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
24
+ tensor<fp16, [1, 640]> input_lstm_layer_0_lstm_h0_squeeze_cast_fp16 = squeeze(axes = input_lstm_layer_0_lstm_h0_squeeze_axes_0, x = split_0_cast_fp16_0)[name = string("input_lstm_layer_0_lstm_h0_squeeze_cast_fp16")];
25
+ tensor<int32, [1]> input_lstm_layer_0_lstm_c0_squeeze_axes_0 = const()[name = string("input_lstm_layer_0_lstm_c0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
26
+ tensor<fp16, [1, 640]> input_lstm_layer_0_lstm_c0_squeeze_cast_fp16 = squeeze(axes = input_lstm_layer_0_lstm_c0_squeeze_axes_0, x = split_1_cast_fp16_0)[name = string("input_lstm_layer_0_lstm_c0_squeeze_cast_fp16")];
27
+ string input_lstm_layer_0_direction_0 = const()[name = string("input_lstm_layer_0_direction_0"), val = string("forward")];
28
+ bool input_lstm_layer_0_output_sequence_0 = const()[name = string("input_lstm_layer_0_output_sequence_0"), val = bool(true)];
29
+ string input_lstm_layer_0_recurrent_activation_0 = const()[name = string("input_lstm_layer_0_recurrent_activation_0"), val = string("sigmoid")];
30
+ string input_lstm_layer_0_cell_activation_0 = const()[name = string("input_lstm_layer_0_cell_activation_0"), val = string("tanh")];
31
+ string input_lstm_layer_0_activation_0 = const()[name = string("input_lstm_layer_0_activation_0"), val = string("tanh")];
32
+ tensor<fp16, [2560, 640]> concat_1_to_fp16 = const()[name = string("concat_1_to_fp16"), val = tensor<fp16, [2560, 640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16752768)))];
33
+ tensor<fp16, [2560, 640]> concat_2_to_fp16 = const()[name = string("concat_2_to_fp16"), val = tensor<fp16, [2560, 640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20029632)))];
34
+ tensor<fp16, [2560]> concat_0_to_fp16 = const()[name = string("concat_0_to_fp16"), val = tensor<fp16, [2560]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23306496)))];
35
+ tensor<fp16, [1, 1, 640]> input_3_cast_fp16 = transpose(perm = input_3_perm_0, x = y_cast_fp16_cast_uint16)[name = string("transpose_2")];
36
+ tensor<fp16, [1, 1, 640]> input_lstm_layer_0_cast_fp16_0, tensor<fp16, [1, 640]> input_lstm_layer_0_cast_fp16_1, tensor<fp16, [1, 640]> input_lstm_layer_0_cast_fp16_2 = lstm(activation = input_lstm_layer_0_activation_0, bias = concat_0_to_fp16, cell_activation = input_lstm_layer_0_cell_activation_0, direction = input_lstm_layer_0_direction_0, initial_c = input_lstm_layer_0_lstm_c0_squeeze_cast_fp16, initial_h = input_lstm_layer_0_lstm_h0_squeeze_cast_fp16, output_sequence = input_lstm_layer_0_output_sequence_0, recurrent_activation = input_lstm_layer_0_recurrent_activation_0, weight_hh = concat_2_to_fp16, weight_ih = concat_1_to_fp16, x = input_3_cast_fp16)[name = string("input_lstm_layer_0_cast_fp16")];
37
+ tensor<int32, [1]> input_lstm_h0_squeeze_axes_0 = const()[name = string("input_lstm_h0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
38
+ tensor<fp16, [1, 640]> input_lstm_h0_squeeze_cast_fp16 = squeeze(axes = input_lstm_h0_squeeze_axes_0, x = split_0_cast_fp16_1)[name = string("input_lstm_h0_squeeze_cast_fp16")];
39
+ tensor<int32, [1]> input_lstm_c0_squeeze_axes_0 = const()[name = string("input_lstm_c0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
40
+ tensor<fp16, [1, 640]> input_lstm_c0_squeeze_cast_fp16 = squeeze(axes = input_lstm_c0_squeeze_axes_0, x = split_1_cast_fp16_1)[name = string("input_lstm_c0_squeeze_cast_fp16")];
41
+ string input_direction_0 = const()[name = string("input_direction_0"), val = string("forward")];
42
+ bool input_output_sequence_0 = const()[name = string("input_output_sequence_0"), val = bool(true)];
43
+ string input_recurrent_activation_0 = const()[name = string("input_recurrent_activation_0"), val = string("sigmoid")];
44
+ string input_cell_activation_0 = const()[name = string("input_cell_activation_0"), val = string("tanh")];
45
+ string input_activation_0 = const()[name = string("input_activation_0"), val = string("tanh")];
46
+ tensor<fp16, [2560, 640]> concat_4_to_fp16 = const()[name = string("concat_4_to_fp16"), val = tensor<fp16, [2560, 640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23311680)))];
47
+ tensor<fp16, [2560, 640]> concat_5_to_fp16 = const()[name = string("concat_5_to_fp16"), val = tensor<fp16, [2560, 640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26588544)))];
48
+ tensor<fp16, [2560]> concat_3_to_fp16 = const()[name = string("concat_3_to_fp16"), val = tensor<fp16, [2560]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29865408)))];
49
+ tensor<fp16, [1, 1, 640]> input_cast_fp16_0, tensor<fp16, [1, 640]> input_cast_fp16_1, tensor<fp16, [1, 640]> input_cast_fp16_2 = lstm(activation = input_activation_0, bias = concat_3_to_fp16, cell_activation = input_cell_activation_0, direction = input_direction_0, initial_c = input_lstm_c0_squeeze_cast_fp16, initial_h = input_lstm_h0_squeeze_cast_fp16, output_sequence = input_output_sequence_0, recurrent_activation = input_recurrent_activation_0, weight_hh = concat_5_to_fp16, weight_ih = concat_4_to_fp16, x = input_lstm_layer_0_cast_fp16_0)[name = string("input_cast_fp16")];
50
+ int32 obj_3_axis_0 = const()[name = string("obj_3_axis_0"), val = int32(0)];
51
+ tensor<fp16, [2, 1, 640]> obj_3_cast_fp16 = stack(axis = obj_3_axis_0, values = (input_lstm_layer_0_cast_fp16_1, input_cast_fp16_1))[name = string("obj_3_cast_fp16")];
52
+ string obj_3_cast_fp16_to_fp32_dtype_0 = const()[name = string("obj_3_cast_fp16_to_fp32_dtype_0"), val = string("fp32")];
53
+ int32 obj_axis_0 = const()[name = string("obj_axis_0"), val = int32(0)];
54
+ tensor<fp16, [2, 1, 640]> obj_cast_fp16 = stack(axis = obj_axis_0, values = (input_lstm_layer_0_cast_fp16_2, input_cast_fp16_2))[name = string("obj_cast_fp16")];
55
+ string obj_cast_fp16_to_fp32_dtype_0 = const()[name = string("obj_cast_fp16_to_fp32_dtype_0"), val = string("fp32")];
56
+ tensor<int32, [3]> transpose_0_perm_0 = const()[name = string("transpose_0_perm_0"), val = tensor<int32, [3]>([1, 2, 0])];
57
+ string transpose_0_cast_fp16_to_fp32_dtype_0 = const()[name = string("transpose_0_cast_fp16_to_fp32_dtype_0"), val = string("fp32")];
58
+ tensor<fp16, [1, 640, 1]> transpose_0_cast_fp16 = transpose(perm = transpose_0_perm_0, x = input_cast_fp16_0)[name = string("transpose_1")];
59
+ tensor<fp32, [1, 640, 1]> decoder_out = cast(dtype = transpose_0_cast_fp16_to_fp32_dtype_0, x = transpose_0_cast_fp16)[name = string("cast_3")];
60
+ tensor<fp32, [2, 1, 640]> c_out = cast(dtype = obj_cast_fp16_to_fp32_dtype_0, x = obj_cast_fp16)[name = string("cast_4")];
61
+ tensor<fp32, [2, 1, 640]> h_out = cast(dtype = obj_3_cast_fp16_to_fp32_dtype_0, x = obj_3_cast_fp16)[name = string("cast_5")];
62
+ tensor<int32, [1]> token_length_tmp = identity(x = token_length)[name = string("token_length_tmp")];
63
+ } -> (decoder_out, h_out, c_out);
64
+ }
multilingual/1120ms/decoder.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e49d029739602d265719f0040fcf5328c007a1ed62d0c6ea621e0b2aaeb9a64
3
+ size 29870592
multilingual/1120ms/decoder.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c26ee345b7763ed9f217561572b1386719956de5b58e9174f5586926b4ab85c5
3
+ size 10360
multilingual/1120ms/decoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e49d029739602d265719f0040fcf5328c007a1ed62d0c6ea621e0b2aaeb9a64
3
+ size 29870592
multilingual/1120ms/decoder.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "542DC13B-08DF-47C7-AAAA-C2F9DE67BB37": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Weights",
7
+ "name": "weights",
8
+ "path": "com.apple.CoreML/weights"
9
+ },
10
+ "8B23B00A-4F60-49E4-B460-719FB6B05887": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Specification",
13
+ "name": "model.mlmodel",
14
+ "path": "com.apple.CoreML/model.mlmodel"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "8B23B00A-4F60-49E4-B460-719FB6B05887"
18
+ }
multilingual/1120ms/decoder_joint.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b314763ced4d2bd27484b8ec2a9c60939b724f8ab60b32d29ad0c03f6192599
3
+ size 243
multilingual/1120ms/decoder_joint.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:729baa5678fde0b9fa3e46044cb8eafcb96249bff4c306740e1c40ce326b7101
3
+ size 454
multilingual/1120ms/decoder_joint.mlmodelc/model.mil ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ program(1.3)
2
+ [buildInfo = dict<string, string>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}, {"coremltools-component-torch", "2.5.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
3
+ {
4
+ func main<ios18>(tensor<fp32, [2, 1, 640]> c_in, tensor<fp32, [1, 1024, 1]> encoder, tensor<fp32, [2, 1, 640]> h_in, tensor<int32, [1, 1]> token, tensor<int32, [1]> token_length) {
5
+ int32 y_axis_0 = const()[name = string("y_axis_0"), val = int32(0)];
6
+ int32 y_batch_dims_0 = const()[name = string("y_batch_dims_0"), val = int32(0)];
7
+ bool y_validate_indices_0 = const()[name = string("y_validate_indices_0"), val = bool(false)];
8
+ tensor<fp16, [13088, 640]> decoder_module_prediction_embed_weight_to_fp16 = const()[name = string("decoder_module_prediction_embed_weight_to_fp16"), val = tensor<fp16, [13088, 640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
9
+ string token_to_int16_dtype_0 = const()[name = string("token_to_int16_dtype_0"), val = string("int16")];
10
+ tensor<int16, [1, 1]> token_to_int16 = cast(dtype = token_to_int16_dtype_0, x = token)[name = string("cast_9")];
11
+ tensor<fp16, [1, 1, 640]> y_cast_fp16_cast_uint16 = gather(axis = y_axis_0, batch_dims = y_batch_dims_0, indices = token_to_int16, validate_indices = y_validate_indices_0, x = decoder_module_prediction_embed_weight_to_fp16)[name = string("y_cast_fp16_cast_uint16")];
12
+ tensor<int32, [3]> input_3_perm_0 = const()[name = string("input_3_perm_0"), val = tensor<int32, [3]>([1, 0, 2])];
13
+ int32 split_0_num_splits_0 = const()[name = string("split_0_num_splits_0"), val = int32(2)];
14
+ int32 split_0_axis_0 = const()[name = string("split_0_axis_0"), val = int32(0)];
15
+ string h_in_to_fp16_dtype_0 = const()[name = string("h_in_to_fp16_dtype_0"), val = string("fp16")];
16
+ tensor<fp16, [2, 1, 640]> h_in_to_fp16 = cast(dtype = h_in_to_fp16_dtype_0, x = h_in)[name = string("cast_8")];
17
+ tensor<fp16, [1, 1, 640]> split_0_cast_fp16_0, tensor<fp16, [1, 1, 640]> split_0_cast_fp16_1 = split(axis = split_0_axis_0, num_splits = split_0_num_splits_0, x = h_in_to_fp16)[name = string("split_0_cast_fp16")];
18
+ int32 split_1_num_splits_0 = const()[name = string("split_1_num_splits_0"), val = int32(2)];
19
+ int32 split_1_axis_0 = const()[name = string("split_1_axis_0"), val = int32(0)];
20
+ string c_in_to_fp16_dtype_0 = const()[name = string("c_in_to_fp16_dtype_0"), val = string("fp16")];
21
+ tensor<fp16, [2, 1, 640]> c_in_to_fp16 = cast(dtype = c_in_to_fp16_dtype_0, x = c_in)[name = string("cast_7")];
22
+ tensor<fp16, [1, 1, 640]> split_1_cast_fp16_0, tensor<fp16, [1, 1, 640]> split_1_cast_fp16_1 = split(axis = split_1_axis_0, num_splits = split_1_num_splits_0, x = c_in_to_fp16)[name = string("split_1_cast_fp16")];
23
+ tensor<int32, [1]> input_5_lstm_layer_0_lstm_h0_squeeze_axes_0 = const()[name = string("input_5_lstm_layer_0_lstm_h0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
24
+ tensor<fp16, [1, 640]> input_5_lstm_layer_0_lstm_h0_squeeze_cast_fp16 = squeeze(axes = input_5_lstm_layer_0_lstm_h0_squeeze_axes_0, x = split_0_cast_fp16_0)[name = string("input_5_lstm_layer_0_lstm_h0_squeeze_cast_fp16")];
25
+ tensor<int32, [1]> input_5_lstm_layer_0_lstm_c0_squeeze_axes_0 = const()[name = string("input_5_lstm_layer_0_lstm_c0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
26
+ tensor<fp16, [1, 640]> input_5_lstm_layer_0_lstm_c0_squeeze_cast_fp16 = squeeze(axes = input_5_lstm_layer_0_lstm_c0_squeeze_axes_0, x = split_1_cast_fp16_0)[name = string("input_5_lstm_layer_0_lstm_c0_squeeze_cast_fp16")];
27
+ string input_5_lstm_layer_0_direction_0 = const()[name = string("input_5_lstm_layer_0_direction_0"), val = string("forward")];
28
+ bool input_5_lstm_layer_0_output_sequence_0 = const()[name = string("input_5_lstm_layer_0_output_sequence_0"), val = bool(true)];
29
+ string input_5_lstm_layer_0_recurrent_activation_0 = const()[name = string("input_5_lstm_layer_0_recurrent_activation_0"), val = string("sigmoid")];
30
+ string input_5_lstm_layer_0_cell_activation_0 = const()[name = string("input_5_lstm_layer_0_cell_activation_0"), val = string("tanh")];
31
+ string input_5_lstm_layer_0_activation_0 = const()[name = string("input_5_lstm_layer_0_activation_0"), val = string("tanh")];
32
+ tensor<fp16, [2560, 640]> concat_1_to_fp16 = const()[name = string("concat_1_to_fp16"), val = tensor<fp16, [2560, 640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16752768)))];
33
+ tensor<fp16, [2560, 640]> concat_2_to_fp16 = const()[name = string("concat_2_to_fp16"), val = tensor<fp16, [2560, 640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20029632)))];
34
+ tensor<fp16, [2560]> concat_0_to_fp16 = const()[name = string("concat_0_to_fp16"), val = tensor<fp16, [2560]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23306496)))];
35
+ tensor<fp16, [1, 1, 640]> input_3_cast_fp16 = transpose(perm = input_3_perm_0, x = y_cast_fp16_cast_uint16)[name = string("transpose_4")];
36
+ tensor<fp16, [1, 1, 640]> input_5_lstm_layer_0_cast_fp16_0, tensor<fp16, [1, 640]> input_5_lstm_layer_0_cast_fp16_1, tensor<fp16, [1, 640]> input_5_lstm_layer_0_cast_fp16_2 = lstm(activation = input_5_lstm_layer_0_activation_0, bias = concat_0_to_fp16, cell_activation = input_5_lstm_layer_0_cell_activation_0, direction = input_5_lstm_layer_0_direction_0, initial_c = input_5_lstm_layer_0_lstm_c0_squeeze_cast_fp16, initial_h = input_5_lstm_layer_0_lstm_h0_squeeze_cast_fp16, output_sequence = input_5_lstm_layer_0_output_sequence_0, recurrent_activation = input_5_lstm_layer_0_recurrent_activation_0, weight_hh = concat_2_to_fp16, weight_ih = concat_1_to_fp16, x = input_3_cast_fp16)[name = string("input_5_lstm_layer_0_cast_fp16")];
37
+ tensor<int32, [1]> input_5_lstm_h0_squeeze_axes_0 = const()[name = string("input_5_lstm_h0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
38
+ tensor<fp16, [1, 640]> input_5_lstm_h0_squeeze_cast_fp16 = squeeze(axes = input_5_lstm_h0_squeeze_axes_0, x = split_0_cast_fp16_1)[name = string("input_5_lstm_h0_squeeze_cast_fp16")];
39
+ tensor<int32, [1]> input_5_lstm_c0_squeeze_axes_0 = const()[name = string("input_5_lstm_c0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
40
+ tensor<fp16, [1, 640]> input_5_lstm_c0_squeeze_cast_fp16 = squeeze(axes = input_5_lstm_c0_squeeze_axes_0, x = split_1_cast_fp16_1)[name = string("input_5_lstm_c0_squeeze_cast_fp16")];
41
+ string input_5_direction_0 = const()[name = string("input_5_direction_0"), val = string("forward")];
42
+ bool input_5_output_sequence_0 = const()[name = string("input_5_output_sequence_0"), val = bool(true)];
43
+ string input_5_recurrent_activation_0 = const()[name = string("input_5_recurrent_activation_0"), val = string("sigmoid")];
44
+ string input_5_cell_activation_0 = const()[name = string("input_5_cell_activation_0"), val = string("tanh")];
45
+ string input_5_activation_0 = const()[name = string("input_5_activation_0"), val = string("tanh")];
46
+ tensor<fp16, [2560, 640]> concat_4_to_fp16 = const()[name = string("concat_4_to_fp16"), val = tensor<fp16, [2560, 640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23311680)))];
47
+ tensor<fp16, [2560, 640]> concat_5_to_fp16 = const()[name = string("concat_5_to_fp16"), val = tensor<fp16, [2560, 640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26588544)))];
48
+ tensor<fp16, [2560]> concat_3_to_fp16 = const()[name = string("concat_3_to_fp16"), val = tensor<fp16, [2560]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29865408)))];
49
+ tensor<fp16, [1, 1, 640]> input_5_cast_fp16_0, tensor<fp16, [1, 640]> input_5_cast_fp16_1, tensor<fp16, [1, 640]> input_5_cast_fp16_2 = lstm(activation = input_5_activation_0, bias = concat_3_to_fp16, cell_activation = input_5_cell_activation_0, direction = input_5_direction_0, initial_c = input_5_lstm_c0_squeeze_cast_fp16, initial_h = input_5_lstm_h0_squeeze_cast_fp16, output_sequence = input_5_output_sequence_0, recurrent_activation = input_5_recurrent_activation_0, weight_hh = concat_5_to_fp16, weight_ih = concat_4_to_fp16, x = input_5_lstm_layer_0_cast_fp16_0)[name = string("input_5_cast_fp16")];
50
+ int32 obj_3_axis_0 = const()[name = string("obj_3_axis_0"), val = int32(0)];
51
+ tensor<fp16, [2, 1, 640]> obj_3_cast_fp16 = stack(axis = obj_3_axis_0, values = (input_5_lstm_layer_0_cast_fp16_1, input_5_cast_fp16_1))[name = string("obj_3_cast_fp16")];
52
+ string obj_3_cast_fp16_to_fp32_dtype_0 = const()[name = string("obj_3_cast_fp16_to_fp32_dtype_0"), val = string("fp32")];
53
+ int32 obj_axis_0 = const()[name = string("obj_axis_0"), val = int32(0)];
54
+ tensor<fp16, [2, 1, 640]> obj_cast_fp16 = stack(axis = obj_axis_0, values = (input_5_lstm_layer_0_cast_fp16_2, input_5_cast_fp16_2))[name = string("obj_cast_fp16")];
55
+ string obj_cast_fp16_to_fp32_dtype_0 = const()[name = string("obj_cast_fp16_to_fp32_dtype_0"), val = string("fp32")];
56
+ tensor<int32, [3]> transpose_1_perm_0 = const()[name = string("transpose_1_perm_0"), val = tensor<int32, [3]>([1, 0, 2])];
57
+ tensor<int32, [3]> input_7_perm_0 = const()[name = string("input_7_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
58
+ string encoder_to_fp16_dtype_0 = const()[name = string("encoder_to_fp16_dtype_0"), val = string("fp16")];
59
+ tensor<fp16, [640, 1024]> joint_module_enc_weight_to_fp16 = const()[name = string("joint_module_enc_weight_to_fp16"), val = tensor<fp16, [640, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29870592)))];
60
+ tensor<fp16, [640]> joint_module_enc_bias_to_fp16 = const()[name = string("joint_module_enc_bias_to_fp16"), val = tensor<fp16, [640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31181376)))];
61
+ tensor<fp16, [1, 1024, 1]> encoder_to_fp16 = cast(dtype = encoder_to_fp16_dtype_0, x = encoder)[name = string("cast_4")];
62
+ tensor<fp16, [1, 1, 1024]> input_7_cast_fp16 = transpose(perm = input_7_perm_0, x = encoder_to_fp16)[name = string("transpose_2")];
63
+ tensor<fp16, [1, 1, 640]> linear_0_cast_fp16 = linear(bias = joint_module_enc_bias_to_fp16, weight = joint_module_enc_weight_to_fp16, x = input_7_cast_fp16)[name = string("linear_0_cast_fp16")];
64
+ tensor<fp16, [640, 640]> joint_module_pred_weight_to_fp16 = const()[name = string("joint_module_pred_weight_to_fp16"), val = tensor<fp16, [640, 640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31182720)))];
65
+ tensor<fp16, [640]> joint_module_pred_bias_to_fp16 = const()[name = string("joint_module_pred_bias_to_fp16"), val = tensor<fp16, [640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32001984)))];
66
+ tensor<fp16, [1, 1, 640]> transpose_1_cast_fp16 = transpose(perm = transpose_1_perm_0, x = input_5_cast_fp16_0)[name = string("transpose_3")];
67
+ tensor<fp16, [1, 1, 640]> linear_1_cast_fp16 = linear(bias = joint_module_pred_bias_to_fp16, weight = joint_module_pred_weight_to_fp16, x = transpose_1_cast_fp16)[name = string("linear_1_cast_fp16")];
68
+ tensor<int32, [1]> var_79_axes_0 = const()[name = string("op_79_axes_0"), val = tensor<int32, [1]>([2])];
69
+ tensor<fp16, [1, 1, 1, 640]> var_79_cast_fp16 = expand_dims(axes = var_79_axes_0, x = linear_0_cast_fp16)[name = string("op_79_cast_fp16")];
70
+ tensor<int32, [1]> var_80_axes_0 = const()[name = string("op_80_axes_0"), val = tensor<int32, [1]>([1])];
71
+ tensor<fp16, [1, 1, 1, 640]> var_80_cast_fp16 = expand_dims(axes = var_80_axes_0, x = linear_1_cast_fp16)[name = string("op_80_cast_fp16")];
72
+ tensor<fp16, [1, 1, 1, 640]> input_11_cast_fp16 = add(x = var_79_cast_fp16, y = var_80_cast_fp16)[name = string("input_11_cast_fp16")];
73
+ tensor<fp16, [1, 1, 1, 640]> input_13_cast_fp16 = relu(x = input_11_cast_fp16)[name = string("input_13_cast_fp16")];
74
+ tensor<fp16, [13088, 640]> joint_module_joint_net_2_weight_to_fp16 = const()[name = string("joint_module_joint_net_2_weight_to_fp16"), val = tensor<fp16, [13088, 640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32003328)))];
75
+ tensor<fp16, [13088]> joint_module_joint_net_2_bias_to_fp16 = const()[name = string("joint_module_joint_net_2_bias_to_fp16"), val = tensor<fp16, [13088]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48756032)))];
76
+ tensor<fp16, [1, 1, 1, 13088]> linear_2_cast_fp16 = linear(bias = joint_module_joint_net_2_bias_to_fp16, weight = joint_module_joint_net_2_weight_to_fp16, x = input_13_cast_fp16)[name = string("linear_2_cast_fp16")];
77
+ string linear_2_cast_fp16_to_fp32_dtype_0 = const()[name = string("linear_2_cast_fp16_to_fp32_dtype_0"), val = string("fp32")];
78
+ tensor<fp32, [1, 1, 1, 13088]> logits = cast(dtype = linear_2_cast_fp16_to_fp32_dtype_0, x = linear_2_cast_fp16)[name = string("cast_3")];
79
+ tensor<fp32, [2, 1, 640]> c_out = cast(dtype = obj_cast_fp16_to_fp32_dtype_0, x = obj_cast_fp16)[name = string("cast_5")];
80
+ tensor<fp32, [2, 1, 640]> h_out = cast(dtype = obj_3_cast_fp16_to_fp32_dtype_0, x = obj_3_cast_fp16)[name = string("cast_6")];
81
+ tensor<int32, [1]> token_length_tmp = identity(x = token_length)[name = string("token_length_tmp")];
82
+ } -> (logits, h_out, c_out);
83
+ }
multilingual/1120ms/decoder_joint.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f283edec035d616e9e1372419dd9bfd8a2de2c92a70b23f6a0f5ed93366ebb03
3
+ size 48782272
multilingual/1120ms/decoder_joint.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:507c3a291a78a11f62b898c64e611016f518d1af658b2aa55c054e0a1029f7ea
3
+ size 13746
multilingual/1120ms/decoder_joint.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f283edec035d616e9e1372419dd9bfd8a2de2c92a70b23f6a0f5ed93366ebb03
3
+ size 48782272
multilingual/1120ms/decoder_joint.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "627E3113-852A-47BD-981E-FAB26C6AB6D0": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Weights",
7
+ "name": "weights",
8
+ "path": "com.apple.CoreML/weights"
9
+ },
10
+ "7EEB6F52-3184-47E3-98CD-28268604F7F1": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Specification",
13
+ "name": "model.mlmodel",
14
+ "path": "com.apple.CoreML/model.mlmodel"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "7EEB6F52-3184-47E3-98CD-28268604F7F1"
18
+ }
multilingual/1120ms/decoder_joint_noencproj.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4e3c25c06d72ba514e93ae2ea0dd313f057622a3fb70f65de3bee4b80b3946b
3
+ size 243
multilingual/1120ms/decoder_joint_noencproj.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9ea28113904cfeca8e5684fb8b54358eb48338f26c894739e3bd076848dcadd
3
+ size 519
multilingual/1120ms/decoder_joint_noencproj.mlmodelc/model.mil ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ program(1.3)
2
+ [buildInfo = dict<string, string>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}, {"coremltools-component-torch", "2.10.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})]
3
+ {
4
+ func main<ios18>(tensor<fp32, [2, 1, 640]> c_in, tensor<fp32, [1, 1, 640]> encoder_proj, tensor<fp32, [2, 1, 640]> h_in, tensor<int32, [1, 1]> token, tensor<int32, [1]> token_length) {
5
+ int32 y_batch_dims_0 = const()[name = string("y_batch_dims_0"), val = int32(0)];
6
+ bool y_validate_indices_0 = const()[name = string("y_validate_indices_0"), val = bool(false)];
7
+ tensor<fp16, [13088, 640]> decoder_module_prediction_embed_weight_to_fp16 = const()[name = string("decoder_module_prediction_embed_weight_to_fp16"), val = tensor<fp16, [13088, 640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
8
+ string token_to_int16_dtype_0 = const()[name = string("token_to_int16_dtype_0"), val = string("int16")];
9
+ string cast_1_dtype_0 = const()[name = string("cast_1_dtype_0"), val = string("int32")];
10
+ int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)];
11
+ tensor<int16, [1, 1]> token_to_int16 = cast(dtype = token_to_int16_dtype_0, x = token)[name = string("cast_10")];
12
+ tensor<int32, [1, 1]> cast_1 = cast(dtype = cast_1_dtype_0, x = token_to_int16)[name = string("cast_9")];
13
+ tensor<bool, [1, 1]> greater_equal_0 = greater_equal(x = cast_1, y = greater_equal_0_y_0)[name = string("greater_equal_0")];
14
+ int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(13088)];
15
+ tensor<int32, [1, 1]> add_2 = add(x = cast_1, y = slice_by_index_0)[name = string("add_2")];
16
+ tensor<int32, [1, 1]> select_0 = select(a = cast_1, b = add_2, cond = greater_equal_0)[name = string("select_0")];
17
+ int32 y_cast_fp16_cast_uint16_axis_0 = const()[name = string("y_cast_fp16_cast_uint16_axis_0"), val = int32(0)];
18
+ string select_0_to_int16_dtype_0 = const()[name = string("select_0_to_int16_dtype_0"), val = string("int16")];
19
+ tensor<int16, [1, 1]> select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = string("cast_8")];
20
+ tensor<fp16, [1, 1, 640]> y_cast_fp16_cast_uint16_cast_uint16 = gather(axis = y_cast_fp16_cast_uint16_axis_0, batch_dims = y_batch_dims_0, indices = select_0_to_int16, validate_indices = y_validate_indices_0, x = decoder_module_prediction_embed_weight_to_fp16)[name = string("y_cast_fp16_cast_uint16_cast_uint16")];
21
+ tensor<int32, [3]> input_3_perm_0 = const()[name = string("input_3_perm_0"), val = tensor<int32, [3]>([1, 0, 2])];
22
+ int32 split_0_num_splits_0 = const()[name = string("split_0_num_splits_0"), val = int32(2)];
23
+ int32 split_0_axis_0 = const()[name = string("split_0_axis_0"), val = int32(0)];
24
+ string h_in_to_fp16_dtype_0 = const()[name = string("h_in_to_fp16_dtype_0"), val = string("fp16")];
25
+ tensor<fp16, [2, 1, 640]> h_in_to_fp16 = cast(dtype = h_in_to_fp16_dtype_0, x = h_in)[name = string("cast_7")];
26
+ tensor<fp16, [1, 1, 640]> split_0_cast_fp16_0, tensor<fp16, [1, 1, 640]> split_0_cast_fp16_1 = split(axis = split_0_axis_0, num_splits = split_0_num_splits_0, x = h_in_to_fp16)[name = string("split_0_cast_fp16")];
27
+ int32 split_1_num_splits_0 = const()[name = string("split_1_num_splits_0"), val = int32(2)];
28
+ int32 split_1_axis_0 = const()[name = string("split_1_axis_0"), val = int32(0)];
29
+ string c_in_to_fp16_dtype_0 = const()[name = string("c_in_to_fp16_dtype_0"), val = string("fp16")];
30
+ tensor<fp16, [2, 1, 640]> c_in_to_fp16 = cast(dtype = c_in_to_fp16_dtype_0, x = c_in)[name = string("cast_6")];
31
+ tensor<fp16, [1, 1, 640]> split_1_cast_fp16_0, tensor<fp16, [1, 1, 640]> split_1_cast_fp16_1 = split(axis = split_1_axis_0, num_splits = split_1_num_splits_0, x = c_in_to_fp16)[name = string("split_1_cast_fp16")];
32
+ tensor<int32, [1]> input_5_lstm_layer_0_lstm_h0_squeeze_axes_0 = const()[name = string("input_5_lstm_layer_0_lstm_h0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
33
+ tensor<fp16, [1, 640]> input_5_lstm_layer_0_lstm_h0_squeeze_cast_fp16 = squeeze(axes = input_5_lstm_layer_0_lstm_h0_squeeze_axes_0, x = split_0_cast_fp16_0)[name = string("input_5_lstm_layer_0_lstm_h0_squeeze_cast_fp16")];
34
+ tensor<int32, [1]> input_5_lstm_layer_0_lstm_c0_squeeze_axes_0 = const()[name = string("input_5_lstm_layer_0_lstm_c0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
35
+ tensor<fp16, [1, 640]> input_5_lstm_layer_0_lstm_c0_squeeze_cast_fp16 = squeeze(axes = input_5_lstm_layer_0_lstm_c0_squeeze_axes_0, x = split_1_cast_fp16_0)[name = string("input_5_lstm_layer_0_lstm_c0_squeeze_cast_fp16")];
36
+ string input_5_lstm_layer_0_direction_0 = const()[name = string("input_5_lstm_layer_0_direction_0"), val = string("forward")];
37
+ bool input_5_lstm_layer_0_output_sequence_0 = const()[name = string("input_5_lstm_layer_0_output_sequence_0"), val = bool(true)];
38
+ string input_5_lstm_layer_0_recurrent_activation_0 = const()[name = string("input_5_lstm_layer_0_recurrent_activation_0"), val = string("sigmoid")];
39
+ string input_5_lstm_layer_0_cell_activation_0 = const()[name = string("input_5_lstm_layer_0_cell_activation_0"), val = string("tanh")];
40
+ string input_5_lstm_layer_0_activation_0 = const()[name = string("input_5_lstm_layer_0_activation_0"), val = string("tanh")];
41
+ tensor<fp16, [2560, 640]> concat_1_to_fp16 = const()[name = string("concat_1_to_fp16"), val = tensor<fp16, [2560, 640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16752768)))];
42
+ tensor<fp16, [2560, 640]> concat_2_to_fp16 = const()[name = string("concat_2_to_fp16"), val = tensor<fp16, [2560, 640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20029632)))];
43
+ tensor<fp16, [2560]> concat_0_to_fp16 = const()[name = string("concat_0_to_fp16"), val = tensor<fp16, [2560]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23306496)))];
44
+ tensor<fp16, [1, 1, 640]> input_3_cast_fp16 = transpose(perm = input_3_perm_0, x = y_cast_fp16_cast_uint16_cast_uint16)[name = string("transpose_3")];
45
+ tensor<fp16, [1, 1, 640]> input_5_lstm_layer_0_cast_fp16_0, tensor<fp16, [1, 640]> input_5_lstm_layer_0_cast_fp16_1, tensor<fp16, [1, 640]> input_5_lstm_layer_0_cast_fp16_2 = lstm(activation = input_5_lstm_layer_0_activation_0, bias = concat_0_to_fp16, cell_activation = input_5_lstm_layer_0_cell_activation_0, direction = input_5_lstm_layer_0_direction_0, initial_c = input_5_lstm_layer_0_lstm_c0_squeeze_cast_fp16, initial_h = input_5_lstm_layer_0_lstm_h0_squeeze_cast_fp16, output_sequence = input_5_lstm_layer_0_output_sequence_0, recurrent_activation = input_5_lstm_layer_0_recurrent_activation_0, weight_hh = concat_2_to_fp16, weight_ih = concat_1_to_fp16, x = input_3_cast_fp16)[name = string("input_5_lstm_layer_0_cast_fp16")];
46
+ tensor<int32, [1]> input_5_lstm_h0_squeeze_axes_0 = const()[name = string("input_5_lstm_h0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
47
+ tensor<fp16, [1, 640]> input_5_lstm_h0_squeeze_cast_fp16 = squeeze(axes = input_5_lstm_h0_squeeze_axes_0, x = split_0_cast_fp16_1)[name = string("input_5_lstm_h0_squeeze_cast_fp16")];
48
+ tensor<int32, [1]> input_5_lstm_c0_squeeze_axes_0 = const()[name = string("input_5_lstm_c0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
49
+ tensor<fp16, [1, 640]> input_5_lstm_c0_squeeze_cast_fp16 = squeeze(axes = input_5_lstm_c0_squeeze_axes_0, x = split_1_cast_fp16_1)[name = string("input_5_lstm_c0_squeeze_cast_fp16")];
50
+ string input_5_direction_0 = const()[name = string("input_5_direction_0"), val = string("forward")];
51
+ bool input_5_output_sequence_0 = const()[name = string("input_5_output_sequence_0"), val = bool(true)];
52
+ string input_5_recurrent_activation_0 = const()[name = string("input_5_recurrent_activation_0"), val = string("sigmoid")];
53
+ string input_5_cell_activation_0 = const()[name = string("input_5_cell_activation_0"), val = string("tanh")];
54
+ string input_5_activation_0 = const()[name = string("input_5_activation_0"), val = string("tanh")];
55
+ tensor<fp16, [2560, 640]> concat_4_to_fp16 = const()[name = string("concat_4_to_fp16"), val = tensor<fp16, [2560, 640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23311680)))];
56
+ tensor<fp16, [2560, 640]> concat_5_to_fp16 = const()[name = string("concat_5_to_fp16"), val = tensor<fp16, [2560, 640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26588544)))];
57
+ tensor<fp16, [2560]> concat_3_to_fp16 = const()[name = string("concat_3_to_fp16"), val = tensor<fp16, [2560]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29865408)))];
58
+ tensor<fp16, [1, 1, 640]> input_5_cast_fp16_0, tensor<fp16, [1, 640]> input_5_cast_fp16_1, tensor<fp16, [1, 640]> input_5_cast_fp16_2 = lstm(activation = input_5_activation_0, bias = concat_3_to_fp16, cell_activation = input_5_cell_activation_0, direction = input_5_direction_0, initial_c = input_5_lstm_c0_squeeze_cast_fp16, initial_h = input_5_lstm_h0_squeeze_cast_fp16, output_sequence = input_5_output_sequence_0, recurrent_activation = input_5_recurrent_activation_0, weight_hh = concat_5_to_fp16, weight_ih = concat_4_to_fp16, x = input_5_lstm_layer_0_cast_fp16_0)[name = string("input_5_cast_fp16")];
59
+ int32 obj_3_axis_0 = const()[name = string("obj_3_axis_0"), val = int32(0)];
60
+ tensor<fp16, [2, 1, 640]> obj_3_cast_fp16 = stack(axis = obj_3_axis_0, values = (input_5_lstm_layer_0_cast_fp16_1, input_5_cast_fp16_1))[name = string("obj_3_cast_fp16")];
61
+ string obj_3_cast_fp16_to_fp32_dtype_0 = const()[name = string("obj_3_cast_fp16_to_fp32_dtype_0"), val = string("fp32")];
62
+ int32 obj_axis_0 = const()[name = string("obj_axis_0"), val = int32(0)];
63
+ tensor<fp16, [2, 1, 640]> obj_cast_fp16 = stack(axis = obj_axis_0, values = (input_5_lstm_layer_0_cast_fp16_2, input_5_cast_fp16_2))[name = string("obj_cast_fp16")];
64
+ string obj_cast_fp16_to_fp32_dtype_0 = const()[name = string("obj_cast_fp16_to_fp32_dtype_0"), val = string("fp32")];
65
+ tensor<int32, [3]> transpose_1_perm_0 = const()[name = string("transpose_1_perm_0"), val = tensor<int32, [3]>([1, 0, 2])];
66
+ tensor<fp16, [640, 640]> joint_module_pred_weight_to_fp16 = const()[name = string("joint_module_pred_weight_to_fp16"), val = tensor<fp16, [640, 640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29870592)))];
67
+ tensor<fp16, [640]> joint_module_pred_bias_to_fp16 = const()[name = string("joint_module_pred_bias_to_fp16"), val = tensor<fp16, [640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30689856)))];
68
+ tensor<fp16, [1, 1, 640]> transpose_1_cast_fp16 = transpose(perm = transpose_1_perm_0, x = input_5_cast_fp16_0)[name = string("transpose_2")];
69
+ tensor<fp16, [1, 1, 640]> linear_0_cast_fp16 = linear(bias = joint_module_pred_bias_to_fp16, weight = joint_module_pred_weight_to_fp16, x = transpose_1_cast_fp16)[name = string("linear_0_cast_fp16")];
70
+ tensor<int32, [1]> f_axes_0 = const()[name = string("f_axes_0"), val = tensor<int32, [1]>([2])];
71
+ string encoder_proj_to_fp16_dtype_0 = const()[name = string("encoder_proj_to_fp16_dtype_0"), val = string("fp16")];
72
+ tensor<fp16, [1, 1, 640]> encoder_proj_to_fp16 = cast(dtype = encoder_proj_to_fp16_dtype_0, x = encoder_proj)[name = string("cast_3")];
73
+ tensor<fp16, [1, 1, 1, 640]> f_cast_fp16 = expand_dims(axes = f_axes_0, x = encoder_proj_to_fp16)[name = string("f_cast_fp16")];
74
+ tensor<int32, [1]> g_axes_0 = const()[name = string("g_axes_0"), val = tensor<int32, [1]>([1])];
75
+ tensor<fp16, [1, 1, 1, 640]> g_cast_fp16 = expand_dims(axes = g_axes_0, x = linear_0_cast_fp16)[name = string("g_cast_fp16")];
76
+ tensor<fp16, [1, 1, 1, 640]> input_9_cast_fp16 = add(x = f_cast_fp16, y = g_cast_fp16)[name = string("input_9_cast_fp16")];
77
+ tensor<fp16, [1, 1, 1, 640]> input_11_cast_fp16 = relu(x = input_9_cast_fp16)[name = string("input_11_cast_fp16")];
78
+ tensor<fp16, [13088, 640]> joint_module_joint_net_2_weight_to_fp16 = const()[name = string("joint_module_joint_net_2_weight_to_fp16"), val = tensor<fp16, [13088, 640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30691200)))];
79
+ tensor<fp16, [13088]> joint_module_joint_net_2_bias_to_fp16 = const()[name = string("joint_module_joint_net_2_bias_to_fp16"), val = tensor<fp16, [13088]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47443904)))];
80
+ tensor<fp16, [1, 1, 1, 13088]> linear_1_cast_fp16 = linear(bias = joint_module_joint_net_2_bias_to_fp16, weight = joint_module_joint_net_2_weight_to_fp16, x = input_11_cast_fp16)[name = string("linear_1_cast_fp16")];
81
+ int32 var_83 = const()[name = string("op_83"), val = int32(-1)];
82
+ tensor<fp16, [1, 1, 1, 13088]> var_85_softmax_cast_fp16 = softmax(axis = var_83, x = linear_1_cast_fp16)[name = string("op_85_softmax_cast_fp16")];
83
+ fp32 var_85_epsilon_0 = const()[name = string("op_85_epsilon_0"), val = fp32(0x1p-149)];
84
+ tensor<fp16, [1, 1, 1, 13088]> var_85_cast_fp16 = log(epsilon = var_85_epsilon_0, x = var_85_softmax_cast_fp16)[name = string("op_85_cast_fp16")];
85
+ string var_85_cast_fp16_to_fp32_dtype_0 = const()[name = string("op_85_cast_fp16_to_fp32_dtype_0"), val = string("fp32")];
86
+ tensor<fp32, [1, 1, 1, 13088]> logits = cast(dtype = var_85_cast_fp16_to_fp32_dtype_0, x = var_85_cast_fp16)[name = string("cast_2")];
87
+ tensor<fp32, [2, 1, 640]> c_out = cast(dtype = obj_cast_fp16_to_fp32_dtype_0, x = obj_cast_fp16)[name = string("cast_4")];
88
+ tensor<fp32, [2, 1, 640]> h_out = cast(dtype = obj_3_cast_fp16_to_fp32_dtype_0, x = obj_3_cast_fp16)[name = string("cast_5")];
89
+ tensor<int32, [1]> token_length_tmp = identity(x = token_length)[name = string("token_length_tmp")];
90
+ } -> (logits, h_out, c_out);
91
+ }
multilingual/1120ms/decoder_joint_noencproj.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7ddfe0cb3be2e2896258d91a95483b898e8a274c49fee256e8effd86dc64dda
3
+ size 47470144
multilingual/1120ms/decoder_joint_noencproj.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cd7f3fdcb96eec2c46b11da7cb758c2c7ec851677cc1d089bf0388138761c22
3
+ size 14631
multilingual/1120ms/decoder_joint_noencproj.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7ddfe0cb3be2e2896258d91a95483b898e8a274c49fee256e8effd86dc64dda
3
+ size 47470144
multilingual/1120ms/decoder_joint_noencproj.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "0D1E34D7-EC88-4977-9F50-17C13013772E": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Weights",
7
+ "name": "weights",
8
+ "path": "com.apple.CoreML/weights"
9
+ },
10
+ "4F1B4339-60A2-4F3E-9F0C-B5B2CB77BDAE": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Specification",
13
+ "name": "model.mlmodel",
14
+ "path": "com.apple.CoreML/model.mlmodel"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "4F1B4339-60A2-4F3E-9F0C-B5B2CB77BDAE"
18
+ }
multilingual/1120ms/encoder.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afb78b1b16fbeee7e0c4953a6401b77ed7b9fa6e27ba9c55ac386ca27e1033cf
3
+ size 243
multilingual/1120ms/encoder.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e316a70a3a1d1d51edcee4b8c6f386d40684c8892cf84c5d2c478c10458ce6d
3
+ size 572
multilingual/1120ms/encoder.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
multilingual/1120ms/encoder.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f23e8a17303bd5838e6ad4a402d26552f06a3097834d9fc4bf8e11973e63aa7
3
+ size 564607680
multilingual/1120ms/encoder.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66717fbcd7c596e73f7da91a275ba5d7f9f808ed141cc3f565f03f0565882926
3
+ size 800661
multilingual/1120ms/encoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f23e8a17303bd5838e6ad4a402d26552f06a3097834d9fc4bf8e11973e63aa7
3
+ size 564607680
multilingual/1120ms/encoder.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "1ED77C41-E152-4A99-A249-1381B7A2B0B2": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Weights",
7
+ "name": "weights",
8
+ "path": "com.apple.CoreML/weights"
9
+ },
10
+ "3DFEC7C7-7F84-47C7-AA9B-03492E557363": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Specification",
13
+ "name": "model.mlmodel",
14
+ "path": "com.apple.CoreML/model.mlmodel"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "3DFEC7C7-7F84-47C7-AA9B-03492E557363"
18
+ }
multilingual/1120ms/joint.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a285566495ca331c28bd65cb8cd6869402e41b7e1acaca31afd91458a2070130
3
+ size 243
multilingual/1120ms/joint.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38ad7913a24486cc3178df1a42d6a8233bcd54c5b42f59bc419ff9101bd19135
3
+ size 341
multilingual/1120ms/joint.mlmodelc/model.mil ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ program(1.3)
2
+ [buildInfo = dict<string, string>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}, {"coremltools-component-torch", "2.5.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
3
+ {
4
+ func main<ios18>(tensor<fp32, [1, 640, 1]> decoder, tensor<fp32, [1, 1024, 1]> encoder) {
5
+ tensor<int32, [3]> input_1_perm_0 = const()[name = string("input_1_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
6
+ string encoder_to_fp16_dtype_0 = const()[name = string("encoder_to_fp16_dtype_0"), val = string("fp16")];
7
+ tensor<int32, [3]> input_3_perm_0 = const()[name = string("input_3_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
8
+ string decoder_to_fp16_dtype_0 = const()[name = string("decoder_to_fp16_dtype_0"), val = string("fp16")];
9
+ tensor<fp16, [640, 1024]> module_enc_weight_to_fp16 = const()[name = string("module_enc_weight_to_fp16"), val = tensor<fp16, [640, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
10
+ tensor<fp16, [640]> module_enc_bias_to_fp16 = const()[name = string("module_enc_bias_to_fp16"), val = tensor<fp16, [640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1310848)))];
11
+ tensor<fp16, [1, 1024, 1]> encoder_to_fp16 = cast(dtype = encoder_to_fp16_dtype_0, x = encoder)[name = string("cast_2")];
12
+ tensor<fp16, [1, 1, 1024]> input_1_cast_fp16 = transpose(perm = input_1_perm_0, x = encoder_to_fp16)[name = string("transpose_1")];
13
+ tensor<fp16, [1, 1, 640]> linear_0_cast_fp16 = linear(bias = module_enc_bias_to_fp16, weight = module_enc_weight_to_fp16, x = input_1_cast_fp16)[name = string("linear_0_cast_fp16")];
14
+ tensor<fp16, [640, 640]> module_pred_weight_to_fp16 = const()[name = string("module_pred_weight_to_fp16"), val = tensor<fp16, [640, 640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1312192)))];
15
+ tensor<fp16, [640]> module_pred_bias_to_fp16 = const()[name = string("module_pred_bias_to_fp16"), val = tensor<fp16, [640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2131456)))];
16
+ tensor<fp16, [1, 640, 1]> decoder_to_fp16 = cast(dtype = decoder_to_fp16_dtype_0, x = decoder)[name = string("cast_1")];
17
+ tensor<fp16, [1, 1, 640]> input_3_cast_fp16 = transpose(perm = input_3_perm_0, x = decoder_to_fp16)[name = string("transpose_0")];
18
+ tensor<fp16, [1, 1, 640]> linear_1_cast_fp16 = linear(bias = module_pred_bias_to_fp16, weight = module_pred_weight_to_fp16, x = input_3_cast_fp16)[name = string("linear_1_cast_fp16")];
19
+ tensor<int32, [1]> var_23_axes_0 = const()[name = string("op_23_axes_0"), val = tensor<int32, [1]>([2])];
20
+ tensor<fp16, [1, 1, 1, 640]> var_23_cast_fp16 = expand_dims(axes = var_23_axes_0, x = linear_0_cast_fp16)[name = string("op_23_cast_fp16")];
21
+ tensor<int32, [1]> var_25_axes_0 = const()[name = string("op_25_axes_0"), val = tensor<int32, [1]>([1])];
22
+ tensor<fp16, [1, 1, 1, 640]> var_25_cast_fp16 = expand_dims(axes = var_25_axes_0, x = linear_1_cast_fp16)[name = string("op_25_cast_fp16")];
23
+ tensor<fp16, [1, 1, 1, 640]> input_5_cast_fp16 = add(x = var_23_cast_fp16, y = var_25_cast_fp16)[name = string("input_5_cast_fp16")];
24
+ tensor<fp16, [1, 1, 1, 640]> input_7_cast_fp16 = relu(x = input_5_cast_fp16)[name = string("input_7_cast_fp16")];
25
+ tensor<fp16, [13088, 640]> module_joint_net_2_weight_to_fp16 = const()[name = string("module_joint_net_2_weight_to_fp16"), val = tensor<fp16, [13088, 640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2132800)))];
26
+ tensor<fp16, [13088]> module_joint_net_2_bias_to_fp16 = const()[name = string("module_joint_net_2_bias_to_fp16"), val = tensor<fp16, [13088]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18885504)))];
27
+ tensor<fp16, [1, 1, 1, 13088]> linear_2_cast_fp16 = linear(bias = module_joint_net_2_bias_to_fp16, weight = module_joint_net_2_weight_to_fp16, x = input_7_cast_fp16)[name = string("linear_2_cast_fp16")];
28
+ string linear_2_cast_fp16_to_fp32_dtype_0 = const()[name = string("linear_2_cast_fp16_to_fp32_dtype_0"), val = string("fp32")];
29
+ tensor<fp32, [1, 1, 1, 13088]> logits = cast(dtype = linear_2_cast_fp16_to_fp32_dtype_0, x = linear_2_cast_fp16)[name = string("cast_0")];
30
+ } -> (logits);
31
+ }
multilingual/1120ms/joint.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4c8ae93e304a187ebfa0b88c812b70e79b625a549727922e7f63d61c1c7b6dd
3
+ size 18911744
multilingual/1120ms/joint.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8897a4790a6c88fde37843c50b4c45d5ea24f24ea5811fe7555b54fcdde8a5c0
3
+ size 4486
multilingual/1120ms/joint.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4c8ae93e304a187ebfa0b88c812b70e79b625a549727922e7f63d61c1c7b6dd
3
+ size 18911744
multilingual/1120ms/joint.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "3F33ED2D-8E5C-4BBD-B243-7D003571E7E2": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Specification",
7
+ "name": "model.mlmodel",
8
+ "path": "com.apple.CoreML/model.mlmodel"
9
+ },
10
+ "7D35F675-3334-491B-8264-00E768D11202": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Weights",
13
+ "name": "weights",
14
+ "path": "com.apple.CoreML/weights"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "3F33ED2D-8E5C-4BBD-B243-7D003571E7E2"
18
+ }
multilingual/1120ms/joint_noencproj_batched.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5f4a9b0771be9af64fd93db7ceb42dbd305920b1260fe2219f1b046e84841cd
3
+ size 243
multilingual/1120ms/joint_noencproj_batched.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc07c4c2de2b13127f406ee70373b2c178702a03755bdc7a2bd57e623b5e65c5
3
+ size 406
multilingual/1120ms/joint_noencproj_batched.mlmodelc/model.mil ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ program(1.3)
2
+ [buildInfo = dict<string, string>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}, {"coremltools-component-torch", "2.10.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})]
3
+ {
4
+ func main<ios18>(tensor<fp32, [1, 640, 1]> decoder, tensor<fp32, [1, 4, 640]> encoder_proj) {
5
+ tensor<int32, [3]> input_1_perm_0 = const()[name = string("input_1_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
6
+ string decoder_to_fp16_dtype_0 = const()[name = string("decoder_to_fp16_dtype_0"), val = string("fp16")];
7
+ tensor<fp16, [640, 640]> joint_module_pred_weight_to_fp16 = const()[name = string("joint_module_pred_weight_to_fp16"), val = tensor<fp16, [640, 640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
8
+ tensor<fp16, [640]> joint_module_pred_bias_to_fp16 = const()[name = string("joint_module_pred_bias_to_fp16"), val = tensor<fp16, [640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(819328)))];
9
+ tensor<fp16, [1, 640, 1]> decoder_to_fp16 = cast(dtype = decoder_to_fp16_dtype_0, x = decoder)[name = string("cast_2")];
10
+ tensor<fp16, [1, 1, 640]> input_1_cast_fp16 = transpose(perm = input_1_perm_0, x = decoder_to_fp16)[name = string("transpose_0")];
11
+ tensor<fp16, [1, 1, 640]> linear_0_cast_fp16 = linear(bias = joint_module_pred_bias_to_fp16, weight = joint_module_pred_weight_to_fp16, x = input_1_cast_fp16)[name = string("linear_0_cast_fp16")];
12
+ tensor<int32, [1]> var_15_axes_0 = const()[name = string("op_15_axes_0"), val = tensor<int32, [1]>([2])];
13
+ string encoder_proj_to_fp16_dtype_0 = const()[name = string("encoder_proj_to_fp16_dtype_0"), val = string("fp16")];
14
+ tensor<fp16, [1, 4, 640]> encoder_proj_to_fp16 = cast(dtype = encoder_proj_to_fp16_dtype_0, x = encoder_proj)[name = string("cast_1")];
15
+ tensor<fp16, [1, 4, 1, 640]> var_15_cast_fp16 = expand_dims(axes = var_15_axes_0, x = encoder_proj_to_fp16)[name = string("op_15_cast_fp16")];
16
+ tensor<int32, [1]> var_17_axes_0 = const()[name = string("op_17_axes_0"), val = tensor<int32, [1]>([1])];
17
+ tensor<fp16, [1, 1, 1, 640]> var_17_cast_fp16 = expand_dims(axes = var_17_axes_0, x = linear_0_cast_fp16)[name = string("op_17_cast_fp16")];
18
+ tensor<fp16, [1, 4, 1, 640]> input_3_cast_fp16 = add(x = var_15_cast_fp16, y = var_17_cast_fp16)[name = string("input_3_cast_fp16")];
19
+ tensor<fp16, [1, 4, 1, 640]> input_5_cast_fp16 = relu(x = input_3_cast_fp16)[name = string("input_5_cast_fp16")];
20
+ tensor<fp16, [13088, 640]> joint_module_joint_net_2_weight_to_fp16 = const()[name = string("joint_module_joint_net_2_weight_to_fp16"), val = tensor<fp16, [13088, 640]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(820672)))];
21
+ tensor<fp16, [13088]> joint_module_joint_net_2_bias_to_fp16 = const()[name = string("joint_module_joint_net_2_bias_to_fp16"), val = tensor<fp16, [13088]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17573376)))];
22
+ tensor<fp16, [1, 4, 1, 13088]> linear_1_cast_fp16 = linear(bias = joint_module_joint_net_2_bias_to_fp16, weight = joint_module_joint_net_2_weight_to_fp16, x = input_5_cast_fp16)[name = string("linear_1_cast_fp16")];
23
+ string linear_1_cast_fp16_to_fp32_dtype_0 = const()[name = string("linear_1_cast_fp16_to_fp32_dtype_0"), val = string("fp32")];
24
+ tensor<fp32, [1, 4, 1, 13088]> logits = cast(dtype = linear_1_cast_fp16_to_fp32_dtype_0, x = linear_1_cast_fp16)[name = string("cast_0")];
25
+ } -> (logits);
26
+ }
multilingual/1120ms/joint_noencproj_batched.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d6b104e9d6990c07d6cd41bafe27cae8d39cfe037ec701584c47af1094daeeb
3
+ size 17599616
multilingual/1120ms/joint_noencproj_batched.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebb2680467cb6774940b609a57ff38df74262ab683052ee3615204e4bd1e3949
3
+ size 3840
multilingual/1120ms/joint_noencproj_batched.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d6b104e9d6990c07d6cd41bafe27cae8d39cfe037ec701584c47af1094daeeb
3
+ size 17599616
multilingual/1120ms/joint_noencproj_batched.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "55B094CA-55E5-480A-8B14-30A24DC3EEF0": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Weights",
7
+ "name": "weights",
8
+ "path": "com.apple.CoreML/weights"
9
+ },
10
+ "CA02FD13-87CE-4425-9B49-DE8265EC1B54": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Specification",
13
+ "name": "model.mlmodel",
14
+ "path": "com.apple.CoreML/model.mlmodel"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "CA02FD13-87CE-4425-9B49-DE8265EC1B54"
18
+ }
multilingual/1120ms/metadata.json ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "nvidia/nemotron-asr-streaming-multilingual-0.6b",
3
+ "model_class": "nemo.collections.asr.models.rnnt_bpe_models_prompt.EncDecRNNTBPEModelWithPrompt",
4
+ "sample_rate": 16000,
5
+ "mel_features": 128,
6
+ "chunk_mel_frames": 112,
7
+ "pre_encode_cache": 9,
8
+ "total_mel_frames": 121,
9
+ "att_context_size": [
10
+ 42,
11
+ 13
12
+ ],
13
+ "vocab_size": 13087,
14
+ "blank_idx": 13087,
15
+ "cache_channel_shape": [
16
+ 1,
17
+ 24,
18
+ 42,
19
+ 1024
20
+ ],
21
+ "cache_time_shape": [
22
+ 1,
23
+ 24,
24
+ 1024,
25
+ 8
26
+ ],
27
+ "decoder_hidden": 640,
28
+ "decoder_layers": 2,
29
+ "encoder_dim": 1024,
30
+ "num_prompts": 128,
31
+ "prompt_dictionary": {
32
+ "af-ZA": 54,
33
+ "am-ET": 49,
34
+ "ar": 7,
35
+ "ar-AR": 7,
36
+ "auto": 101,
37
+ "ay-BO": 81,
38
+ "az-AZ": 66,
39
+ "bg": 30,
40
+ "bg-BG": 30,
41
+ "bn-IN": 36,
42
+ "cs": 22,
43
+ "cs-CZ": 22,
44
+ "da": 25,
45
+ "da-DK": 25,
46
+ "de": 9,
47
+ "de-DE": 9,
48
+ "el": 21,
49
+ "el-GR": 21,
50
+ "en": 0,
51
+ "en-GB": 1,
52
+ "en-US": 0,
53
+ "enGB": 1,
54
+ "es": 3,
55
+ "es-ES": 2,
56
+ "es-US": 3,
57
+ "esES": 2,
58
+ "et": 60,
59
+ "et-EE": 60,
60
+ "fa-IR": 38,
61
+ "fi": 26,
62
+ "fi-FI": 26,
63
+ "fr": 8,
64
+ "fr-CA": 100,
65
+ "fr-FR": 8,
66
+ "gn-PY": 82,
67
+ "gu-IN": 42,
68
+ "ha-NG": 50,
69
+ "haw-US": 97,
70
+ "he-IL": 64,
71
+ "hi": 6,
72
+ "hi-HI": 6,
73
+ "hi-IN": 6,
74
+ "hr": 29,
75
+ "hr-HR": 29,
76
+ "hu": 23,
77
+ "hu-HU": 23,
78
+ "hy-AM": 68,
79
+ "id-ID": 34,
80
+ "ig-NG": 53,
81
+ "it": 15,
82
+ "it-IT": 15,
83
+ "ja-JA": 10,
84
+ "ja-JP": 10,
85
+ "ka-GE": 67,
86
+ "km-KH": 47,
87
+ "kn-IN": 43,
88
+ "ko": 14,
89
+ "ko-KO": 14,
90
+ "ko-KR": 14,
91
+ "ku-TR": 65,
92
+ "ky-KG": 71,
93
+ "ln-CD": 58,
94
+ "lt": 31,
95
+ "lt-LT": 31,
96
+ "lv": 61,
97
+ "lv-LV": 61,
98
+ "mi-NZ": 96,
99
+ "ml-IN": 44,
100
+ "mr-IN": 41,
101
+ "ms-MY": 35,
102
+ "mt-MT": 102,
103
+ "nah-MX": 83,
104
+ "nb": 103,
105
+ "nb-NO": 103,
106
+ "ne-NP": 46,
107
+ "nl": 16,
108
+ "nl-NL": 16,
109
+ "nn": 104,
110
+ "nn-NO": 104,
111
+ "no": 27,
112
+ "no-NO": 27,
113
+ "ny-MW": 57,
114
+ "or-KE": 59,
115
+ "pl": 17,
116
+ "pl-PL": 17,
117
+ "pt": 13,
118
+ "pt-BR": 12,
119
+ "pt-PT": 13,
120
+ "qu-PE": 80,
121
+ "ro": 20,
122
+ "ro-RO": 20,
123
+ "ru": 11,
124
+ "ru-RU": 11,
125
+ "rw-RW": 55,
126
+ "si-LK": 45,
127
+ "sk": 28,
128
+ "sk-SK": 28,
129
+ "sl": 62,
130
+ "sl-SI": 62,
131
+ "sm-WS": 98,
132
+ "so-SO": 56,
133
+ "sv": 24,
134
+ "sv-SE": 24,
135
+ "sw-KE": 48,
136
+ "ta-IN": 39,
137
+ "te-IN": 40,
138
+ "tg-TJ": 70,
139
+ "th-TH": 32,
140
+ "to-TO": 99,
141
+ "tr": 18,
142
+ "tr-TR": 18,
143
+ "uk": 19,
144
+ "uk-UA": 19,
145
+ "ur-PK": 37,
146
+ "uz-UZ": 69,
147
+ "vi-VN": 33,
148
+ "yo-NG": 52,
149
+ "zh-CN": 4,
150
+ "zh-TW": 5,
151
+ "zh-ZH": 4,
152
+ "zu-ZA": 51
153
+ },
154
+ "default_prompt_id": 101,
155
+ "lang_tag_token_ids": [
156
+ 1,
157
+ 256,
158
+ 397,
159
+ 518,
160
+ 673,
161
+ 814,
162
+ 907,
163
+ 993,
164
+ 1125,
165
+ 1232,
166
+ 1279,
167
+ 1383,
168
+ 1455,
169
+ 1603,
170
+ 1724,
171
+ 1841,
172
+ 1929,
173
+ 2021,
174
+ 2124,
175
+ 2205,
176
+ 2322,
177
+ 2440,
178
+ 2529,
179
+ 2809,
180
+ 2947,
181
+ 2986,
182
+ 3051,
183
+ 3064,
184
+ 3134,
185
+ 3247,
186
+ 3446,
187
+ 7489,
188
+ 9532,
189
+ 9544,
190
+ 9596,
191
+ 9695,
192
+ 9815,
193
+ 9847,
194
+ 12944
195
+ ]
196
+ }
multilingual/1120ms/preprocessor.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63c74dcaace5d0cef0b6bcd65225e8985e605517fa97f95b13218d02735b6a42
3
+ size 243
multilingual/1120ms/preprocessor.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7b11e08aba46d1845d8ad3f247717e0f6fae35b21d71d52e44a69ea73587bfe
3
+ size 371
multilingual/1120ms/preprocessor.mlmodelc/model.mil ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ program(1.3)
2
+ [buildInfo = dict<string, string>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}, {"coremltools-component-torch", "2.5.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
3
+ {
4
+ func main<ios18>(tensor<fp32, [1, ?]> audio, tensor<int32, [1]> audio_length) [FlexibleShapeInformation = tuple<tuple<string, dict<string, tensor<int32, [?]>>>, tuple<string, dict<string, list<tensor<int32, [2]>, ?>>>>((("DefaultShapes", {{"audio", [1, 1]}}), ("RangeDims", {{"audio", [[1, 1], [1, 480000]]}})))] {
5
+ int32 var_9 = const()[name = string("op_9"), val = int32(1)];
6
+ int32 var_10 = const()[name = string("op_10"), val = int32(160)];
7
+ int32 var_12 = const()[name = string("op_12"), val = int32(0)];
8
+ int32 var_33 = const()[name = string("op_33"), val = int32(512)];
9
+ tensor<int32, [1]> var_34 = add(x = audio_length, y = var_33)[name = string("op_34")];
10
+ int32 var_35 = const()[name = string("op_35"), val = int32(512)];
11
+ tensor<int32, [1]> var_36 = sub(x = var_34, y = var_35)[name = string("op_36")];
12
+ tensor<int32, [1]> floor_div_0 = floor_div(x = var_36, y = var_10)[name = string("floor_div_0")];
13
+ tensor<bool, [1]> var_39 = equal(x = audio_length, y = var_12)[name = string("op_39")];
14
+ tensor<int32, [1]> var_40 = const()[name = string("op_40"), val = tensor<int32, [1]>([0])];
15
+ tensor<int32, [1]> mel_length = select(a = var_40, b = floor_div_0, cond = var_39)[name = string("seq_len")];
16
+ string audio_to_fp16_dtype_0 = const()[name = string("audio_to_fp16_dtype_0"), val = string("fp16")];
17
+ tensor<fp16, [1, ?]> audio_to_fp16 = cast(dtype = audio_to_fp16_dtype_0, x = audio)[name = string("cast_14")];
18
+ tensor<int32, [2]> var_42_shape_cast_fp16 = shape(x = audio_to_fp16)[name = string("op_42_shape_cast_fp16")];
19
+ int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)];
20
+ int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)];
21
+ bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)];
22
+ string var_42_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_42_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")];
23
+ uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)];
24
+ tensor<int16, [2]> var_42_shape_cast_fp16_to_int16 = cast(dtype = var_42_shape_cast_fp16_to_int16_dtype_0, x = var_42_shape_cast_fp16)[name = string("cast_13")];
25
+ int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_42_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")];
26
+ string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")];
27
+ int32 const_0 = const()[name = string("const_0"), val = int32(0)];
28
+ int32 const_1 = const()[name = string("const_1"), val = int32(1)];
29
+ int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_12")];
30
+ tensor<int32, [?]> var_43 = range_1d(end = gather_0_cast_uint16_to_int32, start = const_0, step = const_1)[name = string("op_43")];
31
+ tensor<int32, [1]> var_44_axes_0 = const()[name = string("op_44_axes_0"), val = tensor<int32, [1]>([0])];
32
+ tensor<int32, [1, ?]> var_44 = expand_dims(axes = var_44_axes_0, x = var_43)[name = string("op_44")];
33
+ tensor<int32, [1]> var_45_axes_0 = const()[name = string("op_45_axes_0"), val = tensor<int32, [1]>([1])];
34
+ tensor<int32, [1, 1]> var_45 = expand_dims(axes = var_45_axes_0, x = audio_length)[name = string("op_45")];
35
+ tensor<bool, [1, ?]> timemask = less(x = var_44, y = var_45)[name = string("timemask")];
36
+ tensor<int32, [2]> var_48_begin_0 = const()[name = string("op_48_begin_0"), val = tensor<int32, [2]>([0, 0])];
37
+ tensor<int32, [2]> var_48_end_0 = const()[name = string("op_48_end_0"), val = tensor<int32, [2]>([1, 1])];
38
+ tensor<bool, [2]> var_48_end_mask_0 = const()[name = string("op_48_end_mask_0"), val = tensor<bool, [2]>([true, false])];
39
+ tensor<bool, [2]> var_48_squeeze_mask_0 = const()[name = string("op_48_squeeze_mask_0"), val = tensor<bool, [2]>([false, true])];
40
+ tensor<fp16, [1]> var_48_cast_fp16 = slice_by_index(begin = var_48_begin_0, end = var_48_end_0, end_mask = var_48_end_mask_0, squeeze_mask = var_48_squeeze_mask_0, x = audio_to_fp16)[name = string("op_48_cast_fp16")];
41
+ tensor<int32, [1]> var_49_axes_0 = const()[name = string("op_49_axes_0"), val = tensor<int32, [1]>([1])];
42
+ tensor<fp16, [1, 1]> var_49_cast_fp16 = expand_dims(axes = var_49_axes_0, x = var_48_cast_fp16)[name = string("op_49_cast_fp16")];
43
+ tensor<int32, [2]> var_51_begin_0 = const()[name = string("op_51_begin_0"), val = tensor<int32, [2]>([0, 1])];
44
+ tensor<int32, [2]> var_51_end_0 = const()[name = string("op_51_end_0"), val = tensor<int32, [2]>([1, 0])];
45
+ tensor<bool, [2]> var_51_end_mask_0 = const()[name = string("op_51_end_mask_0"), val = tensor<bool, [2]>([true, true])];
46
+ tensor<fp16, [1, ?]> var_51_cast_fp16 = slice_by_index(begin = var_51_begin_0, end = var_51_end_0, end_mask = var_51_end_mask_0, x = audio_to_fp16)[name = string("op_51_cast_fp16")];
47
+ tensor<int32, [2]> var_53_begin_0 = const()[name = string("op_53_begin_0"), val = tensor<int32, [2]>([0, 0])];
48
+ tensor<int32, [2]> var_53_end_0 = const()[name = string("op_53_end_0"), val = tensor<int32, [2]>([1, -1])];
49
+ tensor<bool, [2]> var_53_end_mask_0 = const()[name = string("op_53_end_mask_0"), val = tensor<bool, [2]>([true, false])];
50
+ tensor<fp16, [1, ?]> var_53_cast_fp16 = slice_by_index(begin = var_53_begin_0, end = var_53_end_0, end_mask = var_53_end_mask_0, x = audio_to_fp16)[name = string("op_53_cast_fp16")];
51
+ fp16 var_54_to_fp16 = const()[name = string("op_54_to_fp16"), val = fp16(0x1.f0cp-1)];
52
+ tensor<fp16, [1, ?]> var_55_cast_fp16 = mul(x = var_53_cast_fp16, y = var_54_to_fp16)[name = string("op_55_cast_fp16")];
53
+ tensor<fp16, [1, ?]> var_56_cast_fp16 = sub(x = var_51_cast_fp16, y = var_55_cast_fp16)[name = string("op_56_cast_fp16")];
54
+ bool x_3_interleave_0 = const()[name = string("x_3_interleave_0"), val = bool(false)];
55
+ tensor<fp16, [1, ?]> x_3_cast_fp16 = concat(axis = var_9, interleave = x_3_interleave_0, values = (var_49_cast_fp16, var_56_cast_fp16))[name = string("x_3_cast_fp16")];
56
+ tensor<bool, [1, ?]> var_59 = logical_not(x = timemask)[name = string("op_59")];
57
+ fp16 var_16_to_fp16 = const()[name = string("op_16_to_fp16"), val = fp16(0x0p+0)];
58
+ tensor<fp16, [1, ?]> input_1_cast_fp16 = select(a = var_16_to_fp16, b = x_3_cast_fp16, cond = var_59)[name = string("input_1_cast_fp16")];
59
+ tensor<int32, [3]> concat_1x = const()[name = string("concat_1x"), val = tensor<int32, [3]>([1, 1, -1])];
60
+ tensor<fp16, [1, 1, ?]> input_3_cast_fp16 = reshape(shape = concat_1x, x = input_1_cast_fp16)[name = string("input_3_cast_fp16")];
61
+ tensor<int32, [6]> input_5_pad_0 = const()[name = string("input_5_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 256, 256])];
62
+ string input_5_mode_0 = const()[name = string("input_5_mode_0"), val = string("constant")];
63
+ fp16 const_3_to_fp16 = const()[name = string("const_3_to_fp16"), val = fp16(0x0p+0)];
64
+ tensor<fp16, [1, 1, ?]> input_5_cast_fp16 = pad(constant_val = const_3_to_fp16, mode = input_5_mode_0, pad = input_5_pad_0, x = input_3_cast_fp16)[name = string("input_5_cast_fp16")];
65
+ tensor<int32, [2]> concat_2x = const()[name = string("concat_2x"), val = tensor<int32, [2]>([1, -1])];
66
+ tensor<fp16, [1, ?]> input_cast_fp16 = reshape(shape = concat_2x, x = input_5_cast_fp16)[name = string("input_cast_fp16")];
67
+ tensor<int32, [1]> expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor<int32, [1]>([160])];
68
+ tensor<int32, [1]> expand_dims_4_axes_0 = const()[name = string("expand_dims_4_axes_0"), val = tensor<int32, [1]>([1])];
69
+ tensor<fp16, [1, 1, ?]> expand_dims_4_cast_fp16 = expand_dims(axes = expand_dims_4_axes_0, x = input_cast_fp16)[name = string("expand_dims_4_cast_fp16")];
70
+ string conv_0_pad_type_0 = const()[name = string("conv_0_pad_type_0"), val = string("valid")];
71
+ tensor<int32, [2]> conv_0_pad_0 = const()[name = string("conv_0_pad_0"), val = tensor<int32, [2]>([0, 0])];
72
+ tensor<int32, [1]> conv_0_dilations_0 = const()[name = string("conv_0_dilations_0"), val = tensor<int32, [1]>([1])];
73
+ int32 conv_0_groups_0 = const()[name = string("conv_0_groups_0"), val = int32(1)];
74
+ tensor<fp16, [257, 1, 512]> expand_dims_1_to_fp16 = const()[name = string("expand_dims_1_to_fp16"), val = tensor<fp16, [257, 1, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
75
+ tensor<fp16, [1, 257, ?]> conv_0_cast_fp16 = conv(dilations = conv_0_dilations_0, groups = conv_0_groups_0, pad = conv_0_pad_0, pad_type = conv_0_pad_type_0, strides = expand_dims_3, weight = expand_dims_1_to_fp16, x = expand_dims_4_cast_fp16)[name = string("conv_0_cast_fp16")];
76
+ string conv_1_pad_type_0 = const()[name = string("conv_1_pad_type_0"), val = string("valid")];
77
+ tensor<int32, [2]> conv_1_pad_0 = const()[name = string("conv_1_pad_0"), val = tensor<int32, [2]>([0, 0])];
78
+ tensor<int32, [1]> conv_1_dilations_0 = const()[name = string("conv_1_dilations_0"), val = tensor<int32, [1]>([1])];
79
+ int32 conv_1_groups_0 = const()[name = string("conv_1_groups_0"), val = int32(1)];
80
+ tensor<fp16, [257, 1, 512]> expand_dims_2_to_fp16 = const()[name = string("expand_dims_2_to_fp16"), val = tensor<fp16, [257, 1, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263296)))];
81
+ tensor<fp16, [1, 257, ?]> conv_1_cast_fp16 = conv(dilations = conv_1_dilations_0, groups = conv_1_groups_0, pad = conv_1_pad_0, pad_type = conv_1_pad_type_0, strides = expand_dims_3, weight = expand_dims_2_to_fp16, x = expand_dims_4_cast_fp16)[name = string("conv_1_cast_fp16")];
82
+ int32 stack_0_axis_0 = const()[name = string("stack_0_axis_0"), val = int32(-1)];
83
+ tensor<fp16, [1, 257, ?, 2]> stack_0_cast_fp16 = stack(axis = stack_0_axis_0, values = (conv_0_cast_fp16, conv_1_cast_fp16))[name = string("stack_0_cast_fp16")];
84
+ fp16 var_19_promoted_to_fp16 = const()[name = string("op_19_promoted_to_fp16"), val = fp16(0x1p+1)];
85
+ tensor<fp16, [1, 257, ?, 2]> var_74_cast_fp16 = pow(x = stack_0_cast_fp16, y = var_19_promoted_to_fp16)[name = string("op_74_cast_fp16")];
86
+ tensor<int32, [1]> var_76_axes_0 = const()[name = string("op_76_axes_0"), val = tensor<int32, [1]>([-1])];
87
+ bool var_76_keep_dims_0 = const()[name = string("op_76_keep_dims_0"), val = bool(false)];
88
+ tensor<fp16, [1, 257, ?]> var_76_cast_fp16 = reduce_sum(axes = var_76_axes_0, keep_dims = var_76_keep_dims_0, x = var_74_cast_fp16)[name = string("op_76_cast_fp16")];
89
+ tensor<fp16, [1, 257, ?]> x_11_cast_fp16 = identity(x = var_76_cast_fp16)[name = string("x_11_cast_fp16")];
90
+ bool x_13_transpose_x_0 = const()[name = string("x_13_transpose_x_0"), val = bool(false)];
91
+ bool x_13_transpose_y_0 = const()[name = string("x_13_transpose_y_0"), val = bool(false)];
92
+ tensor<fp16, [1, 128, 257]> const_4_to_fp16 = const()[name = string("const_4_to_fp16"), val = tensor<fp16, [1, 128, 257]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(526528)))];
93
+ tensor<fp16, [1, 128, ?]> x_13_cast_fp16 = matmul(transpose_x = x_13_transpose_x_0, transpose_y = x_13_transpose_y_0, x = const_4_to_fp16, y = x_11_cast_fp16)[name = string("x_13_cast_fp16")];
94
+ fp16 var_83_to_fp16 = const()[name = string("op_83_to_fp16"), val = fp16(0x1p-24)];
95
+ tensor<fp16, [1, 128, ?]> var_84_cast_fp16 = add(x = x_13_cast_fp16, y = var_83_to_fp16)[name = string("op_84_cast_fp16")];
96
+ fp32 x_epsilon_0 = const()[name = string("x_epsilon_0"), val = fp32(0x1p-149)];
97
+ tensor<fp16, [1, 128, ?]> x_cast_fp16 = log(epsilon = x_epsilon_0, x = var_84_cast_fp16)[name = string("x_cast_fp16")];
98
+ tensor<int32, [3]> var_86_shape_cast_fp16 = shape(x = x_cast_fp16)[name = string("op_86_shape_cast_fp16")];
99
+ int32 gather_5_axis_0 = const()[name = string("gather_5_axis_0"), val = int32(0)];
100
+ int32 gather_5_batch_dims_0 = const()[name = string("gather_5_batch_dims_0"), val = int32(0)];
101
+ bool gather_5_validate_indices_0 = const()[name = string("gather_5_validate_indices_0"), val = bool(false)];
102
+ string var_86_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_86_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
103
+ uint16 select_5_to_uint16 = const()[name = string("select_5_to_uint16"), val = uint16(2)];
104
+ tensor<uint16, [3]> var_86_shape_cast_fp16_to_uint16 = cast(dtype = var_86_shape_cast_fp16_to_uint16_dtype_0, x = var_86_shape_cast_fp16)[name = string("cast_11")];
105
+ uint16 gather_5_cast_uint16 = gather(axis = gather_5_axis_0, batch_dims = gather_5_batch_dims_0, indices = select_5_to_uint16, validate_indices = gather_5_validate_indices_0, x = var_86_shape_cast_fp16_to_uint16)[name = string("gather_5_cast_uint16")];
106
+ string gather_5_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_5_cast_uint16_to_int32_dtype_0"), val = string("int32")];
107
+ int32 const_5 = const()[name = string("const_5"), val = int32(0)];
108
+ int32 const_6 = const()[name = string("const_6"), val = int32(1)];
109
+ int32 gather_5_cast_uint16_to_int32 = cast(dtype = gather_5_cast_uint16_to_int32_dtype_0, x = gather_5_cast_uint16)[name = string("cast_10")];
110
+ tensor<int32, [?]> mask_1 = range_1d(end = gather_5_cast_uint16_to_int32, start = const_5, step = const_6)[name = string("mask_1")];
111
+ tensor<int32, [1]> expand_dims_0_axes_0 = const()[name = string("expand_dims_0_axes_0"), val = tensor<int32, [1]>([0])];
112
+ tensor<int32, [1, ?]> expand_dims_0 = expand_dims(axes = expand_dims_0_axes_0, x = mask_1)[name = string("expand_dims_0")];
113
+ tensor<int32, [1]> var_91_axes_0 = const()[name = string("op_91_axes_0"), val = tensor<int32, [1]>([1])];
114
+ tensor<int32, [1, 1]> var_91 = expand_dims(axes = var_91_axes_0, x = mel_length)[name = string("op_91")];
115
+ tensor<bool, [1, ?]> mask = greater_equal(x = expand_dims_0, y = var_91)[name = string("mask")];
116
+ tensor<int32, [1]> var_93_axes_0 = const()[name = string("op_93_axes_0"), val = tensor<int32, [1]>([1])];
117
+ tensor<bool, [1, 1, ?]> var_93 = expand_dims(axes = var_93_axes_0, x = mask)[name = string("op_93")];
118
+ tensor<fp16, [1, 128, ?]> processed_signal_cast_fp16 = select(a = var_16_to_fp16, b = x_cast_fp16, cond = var_93)[name = string("processed_signal_cast_fp16")];
119
+ string processed_signal_cast_fp16_to_fp32_dtype_0 = const()[name = string("processed_signal_cast_fp16_to_fp32_dtype_0"), val = string("fp32")];
120
+ tensor<fp32, [1, 128, ?]> mel = cast(dtype = processed_signal_cast_fp16_to_fp32_dtype_0, x = processed_signal_cast_fp16)[name = string("cast_9")];
121
+ } -> (mel, mel_length);
122
+ }
multilingual/1120ms/preprocessor.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:297514e2b211d14b0e53cb97193d679bb89ead98d28e578f3f1d049ddbcc36b3
3
+ size 592384
multilingual/1120ms/preprocessor.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b636481110d0a364e9d3379470371c0eda62de5eeac1e58ed3e6371c26c14cb3
3
+ size 15878
multilingual/1120ms/preprocessor.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:297514e2b211d14b0e53cb97193d679bb89ead98d28e578f3f1d049ddbcc36b3
3
+ size 592384