bweng commited on
Commit
8f05dca
·
verified ·
1 Parent(s): a0cd34f

Upload 11 files

Browse files
mlpackages/.DS_Store ADDED
Binary file (6.15 kB). View file
 
mlpackages/Decoder.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bfdcef9fb1b46a1ec2f3c1895c74ed90644a69105efda370e412a522bd5b353
3
+ size 11672
mlpackages/Decoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48adf0f0d47c406c8253d4f7fef967436a39da14f5a65e66d5a4b407be355d41
3
+ size 23604992
mlpackages/Decoder.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "466FE08B-3D1B-4E1F-9B79-C18F489CA87E": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Specification",
7
+ "name": "model.mlmodel",
8
+ "path": "com.apple.CoreML/model.mlmodel"
9
+ },
10
+ "D7C703DA-6632-4032-845C-478F29130C0D": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Weights",
13
+ "name": "weights",
14
+ "path": "com.apple.CoreML/weights"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "466FE08B-3D1B-4E1F-9B79-C18F489CA87E"
18
+ }
mlpackages/JointDecision.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:130d2712b827b8356667c02494efd117ac8aec2d1c563f169e9d4d350c346301
3
+ size 8820
mlpackages/JointDecision.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e0e63d840032f7f07ddb1d64446051166281e5491bf22da8a945c41f6eedb3e
3
+ size 12642764
mlpackages/JointDecision.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "4B2544BC-9848-45E5-9B40-BC04D1318223": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Specification",
7
+ "name": "model.mlmodel",
8
+ "path": "com.apple.CoreML/model.mlmodel"
9
+ },
10
+ "8081058E-60BD-4BF1-8E7D-57BF1A46D2C9": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Weights",
13
+ "name": "weights",
14
+ "path": "com.apple.CoreML/weights"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "4B2544BC-9848-45E5-9B40-BC04D1318223"
18
+ }
mlpackages/MelEncoder.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e7ee9681979808688ac650b08349059f0526ebc3bb5ba73cc76e2b414046d72
3
+ size 716613
mlpackages/MelEncoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:351c7a3bee2bf563472a5b09a2dde37b940d59aa23ef39a201c9885c7f010c61
3
+ size 595166912
mlpackages/MelEncoder.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "1ECB2652-D8D4-4534-ABAA-C23A2DC3EE58": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Specification",
7
+ "name": "model.mlmodel",
8
+ "path": "com.apple.CoreML/model.mlmodel"
9
+ },
10
+ "63AFCF12-D757-418B-8A97-9AB86ED23FBE": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Weights",
13
+ "name": "weights",
14
+ "path": "com.apple.CoreML/weights"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "1ECB2652-D8D4-4534-ABAA-C23A2DC3EE58"
18
+ }
mlpackages/metadata.json ADDED
@@ -0,0 +1,432 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_id": "nvidia/parakeet-tdt-0.6b-v3",
3
+ "sample_rate": 16000,
4
+ "max_audio_seconds": 15.0,
5
+ "max_audio_samples": 240000,
6
+ "max_symbol_steps": 1,
7
+ "vocab_size": 8192,
8
+ "joint_extra_outputs": 5,
9
+ "checkpoint": {
10
+ "type": "pretrained",
11
+ "model_id": "nvidia/parakeet-tdt-0.6b-v3"
12
+ },
13
+ "coreml": {
14
+ "compute_units": "ALL",
15
+ "compute_precision": "FLOAT32"
16
+ },
17
+ "components": {
18
+ "preprocessor": {
19
+ "inputs": {
20
+ "audio_signal": [
21
+ 1,
22
+ 240000
23
+ ],
24
+ "audio_length": [
25
+ 1
26
+ ]
27
+ },
28
+ "outputs": {
29
+ "mel": [
30
+ 1,
31
+ 128,
32
+ 1501
33
+ ],
34
+ "mel_length": [
35
+ 1
36
+ ]
37
+ },
38
+ "path": "parakeet_preprocessor.mlpackage"
39
+ },
40
+ "encoder": {
41
+ "inputs": {
42
+ "mel": [
43
+ 1,
44
+ 128,
45
+ 1501
46
+ ],
47
+ "mel_length": [
48
+ 1
49
+ ]
50
+ },
51
+ "outputs": {
52
+ "encoder": [
53
+ 1,
54
+ 1024,
55
+ 188
56
+ ],
57
+ "encoder_length": [
58
+ 1
59
+ ]
60
+ },
61
+ "path": "parakeet_encoder.mlpackage"
62
+ },
63
+ "mel_encoder": {
64
+ "inputs": {
65
+ "audio_signal": [
66
+ 1,
67
+ 240000
68
+ ],
69
+ "audio_length": [
70
+ 1
71
+ ]
72
+ },
73
+ "outputs": {
74
+ "encoder": [
75
+ 1,
76
+ 1024,
77
+ 188
78
+ ],
79
+ "encoder_length": [
80
+ 1
81
+ ]
82
+ },
83
+ "path": "parakeet_mel_encoder.mlpackage"
84
+ },
85
+ "decoder": {
86
+ "inputs": {
87
+ "targets": [
88
+ 1,
89
+ 1
90
+ ],
91
+ "target_length": [
92
+ 1
93
+ ],
94
+ "h_in": [
95
+ 2,
96
+ 1,
97
+ 640
98
+ ],
99
+ "c_in": [
100
+ 2,
101
+ 1,
102
+ 640
103
+ ]
104
+ },
105
+ "outputs": {
106
+ "decoder": [
107
+ 1,
108
+ 640,
109
+ 1
110
+ ],
111
+ "h_out": [
112
+ 2,
113
+ 1,
114
+ 640
115
+ ],
116
+ "c_out": [
117
+ 2,
118
+ 1,
119
+ 640
120
+ ]
121
+ },
122
+ "path": "parakeet_decoder.mlpackage"
123
+ },
124
+ "joint": {
125
+ "inputs": {
126
+ "encoder": [
127
+ 1,
128
+ 1024,
129
+ 188
130
+ ],
131
+ "decoder": [
132
+ 1,
133
+ 640,
134
+ 1
135
+ ]
136
+ },
137
+ "outputs": {
138
+ "logits": [
139
+ 1,
140
+ 188,
141
+ 1,
142
+ 8198
143
+ ]
144
+ },
145
+ "path": "parakeet_joint.mlpackage"
146
+ },
147
+ "joint_decision": {
148
+ "inputs": {
149
+ "encoder": [
150
+ 1,
151
+ 1024,
152
+ 188
153
+ ],
154
+ "decoder": [
155
+ 1,
156
+ 640,
157
+ 1
158
+ ]
159
+ },
160
+ "outputs": {
161
+ "token_id": [
162
+ 1,
163
+ 1024,
164
+ 640
165
+ ],
166
+ "token_prob": [
167
+ 1,
168
+ 1024,
169
+ 640
170
+ ],
171
+ "duration": [
172
+ 1,
173
+ 1024,
174
+ 640
175
+ ]
176
+ },
177
+ "path": "parakeet_joint_decision.mlpackage"
178
+ },
179
+ "joint_decision_single_step": {
180
+ "inputs": {
181
+ "encoder_step": [
182
+ 1,
183
+ 1024,
184
+ 1
185
+ ],
186
+ "decoder_step": [
187
+ 1,
188
+ 640,
189
+ 1
190
+ ]
191
+ },
192
+ "outputs": {
193
+ "token_id": [
194
+ 1,
195
+ 1,
196
+ 1
197
+ ],
198
+ "token_prob": [
199
+ 1,
200
+ 1,
201
+ 1
202
+ ],
203
+ "duration": [
204
+ 1,
205
+ 1,
206
+ 1
207
+ ]
208
+ },
209
+ "path": "parakeet_joint_decision_single_step.mlpackage"
210
+ }
211
+ },
212
+ "validation": {
213
+ "requested": true,
214
+ "status": "ok",
215
+ "atol": 0.0001,
216
+ "rtol": 0.001,
217
+ "symbol_steps": 32,
218
+ "audio_path": null,
219
+ "components": {
220
+ "preprocessor": {
221
+ "mel": {
222
+ "max_abs": 0.48384106159210205,
223
+ "max_rel": 1.9971061944961548,
224
+ "match": true
225
+ },
226
+ "length_match": true,
227
+ "latency": {
228
+ "runs": 5,
229
+ "warmup": 2,
230
+ "torch_ms": {
231
+ "mean": 1.9939915742725134,
232
+ "std": 0.07679941425541917
233
+ },
234
+ "coreml_ms": {
235
+ "mean": 1.1883336119353771,
236
+ "std": 0.06561290487126992
237
+ },
238
+ "rtf": {
239
+ "torch": 0.00013293277161816755,
240
+ "coreml": 7.922224079569181e-05
241
+ }
242
+ },
243
+ "plots": {
244
+ "mel_composite.png": "mel_composite.png"
245
+ }
246
+ },
247
+ "encoder": {
248
+ "encoder": {
249
+ "max_abs": 0.005408093333244324,
250
+ "max_rel": 1.9989821910858154,
251
+ "match": true
252
+ },
253
+ "length_match": true,
254
+ "latency": {
255
+ "runs": 5,
256
+ "warmup": 2,
257
+ "torch_ms": {
258
+ "mean": 1030.481566535309,
259
+ "std": 61.12092100354026
260
+ },
261
+ "coreml_ms": {
262
+ "mean": 25.444025173783302,
263
+ "std": 0.019404912635624456
264
+ },
265
+ "rtf": {
266
+ "torch": 0.06869877110235394,
267
+ "coreml": 0.0016962683449188868
268
+ }
269
+ },
270
+ "plots": {
271
+ "encoder_time_l2.png": "encoder_time_l2.png"
272
+ }
273
+ },
274
+ "decoder": {
275
+ "decoder": {
276
+ "max_abs": 0.01992393285036087,
277
+ "max_rel": 1.9797950983047485,
278
+ "match": true
279
+ },
280
+ "h_out": {
281
+ "max_abs": 0.048870980739593506,
282
+ "max_rel": 1.6649388074874878,
283
+ "match": true
284
+ },
285
+ "c_out": {
286
+ "max_abs": 0.7885193824768066,
287
+ "max_rel": 1.666900634765625,
288
+ "match": true
289
+ },
290
+ "latency": {
291
+ "runs": 5,
292
+ "warmup": 2,
293
+ "torch_ms": {
294
+ "mean": 7.512458227574825,
295
+ "std": 0.14005641459625567
296
+ },
297
+ "coreml_ms": {
298
+ "mean": 4.324891557916999,
299
+ "std": 0.10921706592501401
300
+ },
301
+ "rtf": {
302
+ "torch": 0.0005008305485049884,
303
+ "coreml": 0.0002883261038611333
304
+ }
305
+ },
306
+ "plots": {
307
+ "decoder_steps_l2.png": "decoder_steps_l2.png"
308
+ }
309
+ },
310
+ "joint": {
311
+ "logits": {
312
+ "max_abs": 0.099273681640625,
313
+ "max_rel": 1.917771577835083,
314
+ "match": true
315
+ },
316
+ "latency": {
317
+ "runs": 5,
318
+ "warmup": 2,
319
+ "torch_ms": {
320
+ "mean": 28.3439417835325,
321
+ "std": 0.265091042836136
322
+ },
323
+ "coreml_ms": {
324
+ "mean": 22.657233430072665,
325
+ "std": 0.20110920609593658
326
+ },
327
+ "rtf": {
328
+ "torch": 0.0018895961189021667,
329
+ "coreml": 0.001510482228671511
330
+ }
331
+ },
332
+ "plots": {
333
+ "joint_top50.png": "joint_top50.png",
334
+ "joint_time_l2.png": "joint_time_l2.png"
335
+ }
336
+ },
337
+ "mel_encoder": {
338
+ "encoder": {
339
+ "max_abs": 0.010630208998918533,
340
+ "max_rel": 1.9964393377304077,
341
+ "match": true
342
+ },
343
+ "length_match": true,
344
+ "vs_separate_coreml": {
345
+ "max_abs": 0.0089111328125,
346
+ "max_rel": 2.0,
347
+ "match": true
348
+ },
349
+ "latency": {
350
+ "runs": 5,
351
+ "warmup": 2,
352
+ "fused_coreml_ms": {
353
+ "mean": 27.10114144720137,
354
+ "std": 0.06494816293632807
355
+ },
356
+ "separate_coreml_ms": {
357
+ "mean": 26.63235878571868,
358
+ "std": 0.06842224725951737
359
+ },
360
+ "separate_torch_ms": {
361
+ "mean": 1032.4755581095815,
362
+ "std": 61.120969253367036
363
+ },
364
+ "rtf": {
365
+ "fused_coreml": 0.001806742763146758,
366
+ "separate_coreml": 0.0017754905857145786,
367
+ "separate_torch": 0.0688317038739721
368
+ }
369
+ },
370
+ "plots": {
371
+ "mel_encoder_time_l2.png": "mel_encoder_time_l2.png"
372
+ }
373
+ },
374
+ "joint_decision": {
375
+ "vs_torch_cpu": {
376
+ "token_id": {
377
+ "max_abs": 7954.0,
378
+ "max_rel": 0.970947265625,
379
+ "match": false
380
+ },
381
+ "token_prob": {
382
+ "max_abs": 0.006797075271606445,
383
+ "max_rel": 0.013232328929007053,
384
+ "match": true
385
+ },
386
+ "duration": {
387
+ "max_abs": 1.0,
388
+ "max_rel": 1.0,
389
+ "match": false
390
+ }
391
+ },
392
+ "vs_coreml_joint_cpu": {
393
+ "token_id": {
394
+ "max_abs": 0.0,
395
+ "max_rel": 0.0,
396
+ "match": true
397
+ },
398
+ "token_prob": {
399
+ "max_abs": 0.0002440810203552246,
400
+ "max_rel": 0.0004735384718514979,
401
+ "match": true
402
+ },
403
+ "duration": {
404
+ "max_abs": 0.0,
405
+ "max_rel": 0.0,
406
+ "match": true
407
+ }
408
+ },
409
+ "latency": {
410
+ "runs": 5,
411
+ "warmup": 2,
412
+ "fused_coreml_ms": {
413
+ "mean": 64.08805842511356,
414
+ "std": 0.596408636567345
415
+ },
416
+ "separate_joint_coreml_plus_cpu_ms": {
417
+ "mean": 50.046625174582005,
418
+ "std": 1.0175464926370517
419
+ },
420
+ "rtf": {
421
+ "fused_coreml": 0.004272537228340904,
422
+ "separate_joint_coreml_plus_cpu": 0.003336441678305467
423
+ }
424
+ },
425
+ "plots": {
426
+ "joint_decision_prob_u0.png": "joint_decision_prob_u0.png",
427
+ "joint_decision_token_agree.png": "joint_decision_token_agree.png"
428
+ }
429
+ }
430
+ }
431
+ }
432
+ }