oraculumai commited on
Commit
dbfcd29
·
verified ·
1 Parent(s): d0c04ab

Add 14ch EPOC X Core ML profile (fp16) + parity artifacts

Browse files
.gitattributes CHANGED
@@ -4,5 +4,17 @@
4
  profiles/14ch/fp16/ZunaDecoderStep.mlpackage/Data/com.apple.CoreML/model.mlmodel filter=lfs diff=lfs merge=lfs -text
5
  profiles/14ch/fp16/ZunaDecoderStepUpdate.mlpackage/Data/com.apple.CoreML/model.mlmodel filter=lfs diff=lfs merge=lfs -text
6
  profiles/14ch/fp16/ZunaEncoder.mlpackage/Data/com.apple.CoreML/model.mlmodel filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
7
  profiles/validation/parity_summary.png filter=lfs diff=lfs merge=lfs -text
8
  profiles/validation/waveform_overlay_residual.png filter=lfs diff=lfs merge=lfs -text
 
4
  profiles/14ch/fp16/ZunaDecoderStep.mlpackage/Data/com.apple.CoreML/model.mlmodel filter=lfs diff=lfs merge=lfs -text
5
  profiles/14ch/fp16/ZunaDecoderStepUpdate.mlpackage/Data/com.apple.CoreML/model.mlmodel filter=lfs diff=lfs merge=lfs -text
6
  profiles/14ch/fp16/ZunaEncoder.mlpackage/Data/com.apple.CoreML/model.mlmodel filter=lfs diff=lfs merge=lfs -text
7
+ profiles/16ch/fp16/ZunaDecoderStep.mlpackage/Data/com.apple.CoreML/model.mlmodel filter=lfs diff=lfs merge=lfs -text
8
+ profiles/16ch/fp16/ZunaDecoderStepUpdate.mlpackage/Data/com.apple.CoreML/model.mlmodel filter=lfs diff=lfs merge=lfs -text
9
+ profiles/16ch/fp16/ZunaEncoder.mlpackage/Data/com.apple.CoreML/model.mlmodel filter=lfs diff=lfs merge=lfs -text
10
+ profiles/32ch/fp16/ZunaDecoderStep.mlpackage/Data/com.apple.CoreML/model.mlmodel filter=lfs diff=lfs merge=lfs -text
11
+ profiles/32ch/fp16/ZunaDecoderStepUpdate.mlpackage/Data/com.apple.CoreML/model.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ profiles/32ch/fp16/ZunaEncoder.mlpackage/Data/com.apple.CoreML/model.mlmodel filter=lfs diff=lfs merge=lfs -text
13
+ profiles/64ch/fp16/ZunaDecoderStep.mlpackage/Data/com.apple.CoreML/model.mlmodel filter=lfs diff=lfs merge=lfs -text
14
+ profiles/64ch/fp16/ZunaDecoderStepUpdate.mlpackage/Data/com.apple.CoreML/model.mlmodel filter=lfs diff=lfs merge=lfs -text
15
+ profiles/64ch/fp16/ZunaEncoder.mlpackage/Data/com.apple.CoreML/model.mlmodel filter=lfs diff=lfs merge=lfs -text
16
+ profiles/64ch/fp32/ZunaDecoderStep.mlpackage/Data/com.apple.CoreML/model.mlmodel filter=lfs diff=lfs merge=lfs -text
17
+ profiles/64ch/fp32/ZunaDecoderStepUpdate.mlpackage/Data/com.apple.CoreML/model.mlmodel filter=lfs diff=lfs merge=lfs -text
18
+ profiles/64ch/fp32/ZunaEncoder.mlpackage/Data/com.apple.CoreML/model.mlmodel filter=lfs diff=lfs merge=lfs -text
19
  profiles/validation/parity_summary.png filter=lfs diff=lfs merge=lfs -text
20
  profiles/validation/waveform_overlay_residual.png filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -61,6 +61,10 @@ These assumptions are what the released pretrained weights were optimized for.
61
  Profile artifacts are organized as:
62
 
63
  - `profiles/14ch/fp16/...`
 
 
 
 
64
 
65
  Each profile contains:
66
 
@@ -82,6 +86,10 @@ Use `DecoderStepUpdate` when you want a minimal host-side loop and fewer host te
82
  | Profile | Channels | Precision | Token Count | Encoder/Decoder Tensor Shape | final-z rel_l2 vs PyTorch |
83
  |---|---:|---|---:|---|---:|
84
  | `14ch-fp16` | 14 | `fp16` | 560 | `[1, 560, 32]` | 0.011380 |
 
 
 
 
85
 
86
  See `profiles/index.json` for machine-readable profile discovery.
87
 
@@ -92,6 +100,10 @@ All published profiles are checked against the original PyTorch weights using a
92
  | Profile | MAE | RMSE | max_abs | rel_l2 | Threshold | Gate |
93
  |---|---:|---:|---:|---:|---:|---|
94
  | `14ch-fp16` | 0.006258 | 0.010665 | 0.193008 | 0.011380 | 0.012000 | **PASS** |
 
 
 
 
95
 
96
  ### Parity Visualization
97
 
 
61
  Profile artifacts are organized as:
62
 
63
  - `profiles/14ch/fp16/...`
64
+ - `profiles/16ch/fp16/...`
65
+ - `profiles/32ch/fp16/...`
66
+ - `profiles/64ch/fp16/...`
67
+ - `profiles/64ch/fp32/...`
68
 
69
  Each profile contains:
70
 
 
86
  | Profile | Channels | Precision | Token Count | Encoder/Decoder Tensor Shape | final-z rel_l2 vs PyTorch |
87
  |---|---:|---|---:|---|---:|
88
  | `14ch-fp16` | 14 | `fp16` | 560 | `[1, 560, 32]` | 0.011380 |
89
+ | `16ch-fp16` | 16 | `fp16` | 640 | `[1, 640, 32]` | 0.006580 |
90
+ | `32ch-fp16` | 32 | `fp16` | 1280 | `[1, 1280, 32]` | 0.005629 |
91
+ | `64ch-fp16` | 64 | `fp16` | 2560 | `[1, 2560, 32]` | 0.004366 |
92
+ | `64ch-fp32` | 64 | `fp32` | 2560 | `[1, 2560, 32]` | 0.000002 |
93
 
94
  See `profiles/index.json` for machine-readable profile discovery.
95
 
 
100
  | Profile | MAE | RMSE | max_abs | rel_l2 | Threshold | Gate |
101
  |---|---:|---:|---:|---:|---:|---|
102
  | `14ch-fp16` | 0.006258 | 0.010665 | 0.193008 | 0.011380 | 0.012000 | **PASS** |
103
+ | `16ch-fp16` | 0.004843 | 0.006150 | 0.057458 | 0.006580 | 0.010000 | **PASS** |
104
+ | `32ch-fp16` | 0.004189 | 0.005253 | 0.020710 | 0.005629 | 0.010000 | **PASS** |
105
+ | `64ch-fp16` | 0.003265 | 0.004077 | 0.018174 | 0.004366 | 0.010000 | **PASS** |
106
+ | `64ch-fp32` | 0.000001 | 0.000001 | 0.000011 | 0.000002 | 0.005000 | **PASS** |
107
 
108
  ### Parity Visualization
109
 
profiles/index.json CHANGED
@@ -1,6 +1,4 @@
1
  {
2
- "format_version": 1,
3
- "layout": "profiles",
4
  "profiles": [
5
  {
6
  "profile_id": "14ch-fp16",
@@ -16,6 +14,66 @@
16
  "decoder_step_coreml": "ZunaDecoderStep.mlpackage",
17
  "decoder_step_update_coreml": "ZunaDecoderStepUpdate.mlpackage"
18
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
20
  ]
21
- }
 
1
  {
 
 
2
  "profiles": [
3
  {
4
  "profile_id": "14ch-fp16",
 
14
  "decoder_step_coreml": "ZunaDecoderStep.mlpackage",
15
  "decoder_step_update_coreml": "ZunaDecoderStepUpdate.mlpackage"
16
  }
17
+ },
18
+ {
19
+ "profile_id": "16ch-fp16",
20
+ "channel_count": 16,
21
+ "precision": "fp16",
22
+ "token_count": 640,
23
+ "profile_dir": "16ch/fp16",
24
+ "artifacts": {
25
+ "encoder_torchscript": "ZunaEncoder.pt",
26
+ "decoder_step_torchscript": "ZunaDecoderStep.pt",
27
+ "decoder_step_update_torchscript": "ZunaDecoderStepUpdate.pt",
28
+ "encoder_coreml": "ZunaEncoder.mlpackage",
29
+ "decoder_step_coreml": "ZunaDecoderStep.mlpackage",
30
+ "decoder_step_update_coreml": "ZunaDecoderStepUpdate.mlpackage"
31
+ }
32
+ },
33
+ {
34
+ "profile_id": "32ch-fp16",
35
+ "channel_count": 32,
36
+ "precision": "fp16",
37
+ "token_count": 1280,
38
+ "profile_dir": "32ch/fp16",
39
+ "artifacts": {
40
+ "encoder_torchscript": "ZunaEncoder.pt",
41
+ "decoder_step_torchscript": "ZunaDecoderStep.pt",
42
+ "decoder_step_update_torchscript": "ZunaDecoderStepUpdate.pt",
43
+ "encoder_coreml": "ZunaEncoder.mlpackage",
44
+ "decoder_step_coreml": "ZunaDecoderStep.mlpackage",
45
+ "decoder_step_update_coreml": "ZunaDecoderStepUpdate.mlpackage"
46
+ }
47
+ },
48
+ {
49
+ "profile_id": "64ch-fp16",
50
+ "channel_count": 64,
51
+ "precision": "fp16",
52
+ "token_count": 2560,
53
+ "profile_dir": "64ch/fp16",
54
+ "artifacts": {
55
+ "encoder_torchscript": "ZunaEncoder.pt",
56
+ "decoder_step_torchscript": "ZunaDecoderStep.pt",
57
+ "decoder_step_update_torchscript": "ZunaDecoderStepUpdate.pt",
58
+ "encoder_coreml": "ZunaEncoder.mlpackage",
59
+ "decoder_step_coreml": "ZunaDecoderStep.mlpackage",
60
+ "decoder_step_update_coreml": "ZunaDecoderStepUpdate.mlpackage"
61
+ }
62
+ },
63
+ {
64
+ "profile_id": "64ch-fp32",
65
+ "channel_count": 64,
66
+ "precision": "fp32",
67
+ "token_count": 2560,
68
+ "profile_dir": "64ch/fp32",
69
+ "artifacts": {
70
+ "encoder_torchscript": "ZunaEncoder.pt",
71
+ "decoder_step_torchscript": "ZunaDecoderStep.pt",
72
+ "decoder_step_update_torchscript": "ZunaDecoderStepUpdate.pt",
73
+ "encoder_coreml": "ZunaEncoder.mlpackage",
74
+ "decoder_step_coreml": "ZunaDecoderStep.mlpackage",
75
+ "decoder_step_update_coreml": "ZunaDecoderStepUpdate.mlpackage"
76
+ }
77
  }
78
  ]
79
+ }
profiles/parity_report.json CHANGED
@@ -1,8 +1,6 @@
1
  {
2
- "thresholds": {
3
- "fp16_rel_l2": 0.012,
4
- "fp32_rel_l2": 0.005
5
- },
6
  "reports": [
7
  {
8
  "profile_id": "14ch-fp16",
@@ -36,9 +34,138 @@
36
  },
37
  "threshold_rel_l2": 0.012,
38
  "pass": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  }
40
- ],
41
- "all_passed": true,
42
- "failed_profiles": [],
43
- "waveform_preview_file": "parity_waveform_preview.json"
44
- }
 
1
  {
2
+ "all_passed": true,
3
+ "failed_profiles": [],
 
 
4
  "reports": [
5
  {
6
  "profile_id": "14ch-fp16",
 
34
  },
35
  "threshold_rel_l2": 0.012,
36
  "pass": true
37
+ },
38
+ {
39
+ "profile_id": "16ch-fp16",
40
+ "channel_count": 16,
41
+ "precision": "fp16",
42
+ "token_count": 640,
43
+ "sample_steps": 20,
44
+ "encoder": {
45
+ "mae": 0.006940255872905254,
46
+ "rmse": 0.008843358606100082,
47
+ "max_abs": 0.03045654296875,
48
+ "rel_l2": 0.01402380783110857
49
+ },
50
+ "final_z_step_loop": {
51
+ "mae": 0.0048432364128530025,
52
+ "rmse": 0.006149778142571449,
53
+ "max_abs": 0.05745832622051239,
54
+ "rel_l2": 0.006579744163900614
55
+ },
56
+ "final_z_update_loop": {
57
+ "mae": 0.0049058133736252785,
58
+ "rmse": 0.006234212778508663,
59
+ "max_abs": 0.057101115584373474,
60
+ "rel_l2": 0.006670081987977028
61
+ },
62
+ "coreml_consistency": {
63
+ "mae": 0.0006364885484799743,
64
+ "rmse": 0.0010369644733145833,
65
+ "max_abs": 0.009854316711425781,
66
+ "rel_l2": 0.001108274213038385
67
+ },
68
+ "threshold_rel_l2": 0.01,
69
+ "pass": true
70
+ },
71
+ {
72
+ "profile_id": "32ch-fp16",
73
+ "channel_count": 32,
74
+ "precision": "fp16",
75
+ "token_count": 1280,
76
+ "sample_steps": 20,
77
+ "encoder": {
78
+ "mae": 0.0068974122405052185,
79
+ "rmse": 0.00882513914257288,
80
+ "max_abs": 0.03042382001876831,
81
+ "rel_l2": 0.014106319285929203
82
+ },
83
+ "final_z_step_loop": {
84
+ "mae": 0.004189030732959509,
85
+ "rmse": 0.005252954084426165,
86
+ "max_abs": 0.020709753036499023,
87
+ "rel_l2": 0.0056286221370100975
88
+ },
89
+ "final_z_update_loop": {
90
+ "mae": 0.004258580505847931,
91
+ "rmse": 0.005357986781746149,
92
+ "max_abs": 0.025191545486450195,
93
+ "rel_l2": 0.005741165950894356
94
+ },
95
+ "coreml_consistency": {
96
+ "mae": 0.0006323954439722002,
97
+ "rmse": 0.0010173922637477517,
98
+ "max_abs": 0.014103889465332031,
99
+ "rel_l2": 0.001088618068024516
100
+ },
101
+ "threshold_rel_l2": 0.01,
102
+ "pass": true
103
+ },
104
+ {
105
+ "profile_id": "64ch-fp16",
106
+ "channel_count": 64,
107
+ "precision": "fp16",
108
+ "token_count": 2560,
109
+ "sample_steps": 20,
110
+ "encoder": {
111
+ "mae": 0.00757429888471961,
112
+ "rmse": 0.009730321355164051,
113
+ "max_abs": 0.03688855469226837,
114
+ "rel_l2": 0.015860911458730698
115
+ },
116
+ "final_z_step_loop": {
117
+ "mae": 0.003264558967202902,
118
+ "rmse": 0.004077223129570484,
119
+ "max_abs": 0.018174409866333008,
120
+ "rel_l2": 0.004365743603557348
121
+ },
122
+ "final_z_update_loop": {
123
+ "mae": 0.0033559747971594334,
124
+ "rmse": 0.004213370848447084,
125
+ "max_abs": 0.02053523063659668,
126
+ "rel_l2": 0.004511525854468346
127
+ },
128
+ "coreml_consistency": {
129
+ "mae": 0.0006285925628617406,
130
+ "rmse": 0.0010274079395458102,
131
+ "max_abs": 0.012227773666381836,
132
+ "rel_l2": 0.0010984123218804598
133
+ },
134
+ "threshold_rel_l2": 0.01,
135
+ "pass": true
136
+ },
137
+ {
138
+ "profile_id": "64ch-fp32",
139
+ "channel_count": 64,
140
+ "precision": "fp32",
141
+ "token_count": 2560,
142
+ "sample_steps": 20,
143
+ "encoder": {
144
+ "mae": 3.0412613796215737e-06,
145
+ "rmse": 4.264039034751477e-06,
146
+ "max_abs": 2.333521842956543e-05,
147
+ "rel_l2": 6.950593615329126e-06
148
+ },
149
+ "final_z_step_loop": {
150
+ "mae": 1.130579448727076e-06,
151
+ "rmse": 1.4336205822473858e-06,
152
+ "max_abs": 1.0848045349121094e-05,
153
+ "rel_l2": 1.5350690318882698e-06
154
+ },
155
+ "final_z_update_loop": {
156
+ "mae": 1.130579448727076e-06,
157
+ "rmse": 1.4336205822473858e-06,
158
+ "max_abs": 1.0848045349121094e-05,
159
+ "rel_l2": 1.5350690318882698e-06
160
+ },
161
+ "coreml_consistency": {
162
+ "mae": 0.0,
163
+ "rmse": 0.0,
164
+ "max_abs": 0.0,
165
+ "rel_l2": 0.0
166
+ },
167
+ "threshold_rel_l2": 0.005,
168
+ "pass": true
169
  }
170
+ ]
171
+ }
 
 
 
profiles/validation/parity_summary.png CHANGED

Git LFS Details

  • SHA256: f11428295ec38b96755e33c731ea08d881abe94ac00460a3866fda387eb5d5a1
  • Pointer size: 130 Bytes
  • Size of remote file: 77.4 kB

Git LFS Details

  • SHA256: 9f8ac94db48ba36c45527b79e44d6425f02e2505ec1c7625021062b4e8b6bc0e
  • Pointer size: 131 Bytes
  • Size of remote file: 129 kB