Yehor commited on
Commit
ae545bd
·
verified ·
1 Parent(s): 2d521d8

Upload checkpoints epoch_0003_step_00060000

Browse files

Automatic checkpoint upload from train.py.

Report stem: epoch_0003_step_00060000
Epoch: 3
Global step: 60000

.hf_upload_sync/validation_epoch_0003_step_00050000.done ADDED
@@ -0,0 +1 @@
 
 
1
+ 1776346794.6550694
.hf_upload_sync/validation_epoch_0003_step_00060000.started ADDED
@@ -0,0 +1 @@
 
 
1
+ 1776376752.642862
checkpoint_best.json CHANGED
@@ -117,125 +117,125 @@
117
  "padding_value": 1.0
118
  },
119
  "epoch": 3,
120
- "global_step": 50000,
121
- "best_val_wer": 0.20322991179181113,
122
  "metrics": {
123
  "epoch": 3,
124
- "global_step": 50000,
125
- "train_loss": 0.5760795246486672,
126
- "train_main_ctc_loss": 0.5760795246486672,
127
  "train_aed_loss": 0.0,
128
  "train_liberta_distill_loss": 0.0,
129
  "train_audio_teacher_loss": 0.0,
130
- "val_loss": 0.33544471835922196,
131
- "val_cer": 0.040319756478638,
132
- "val_wer": 0.20322991179181113,
133
- "val_forward_seconds": 342.28896363277454,
134
  "val_teacher_seconds": 0.0,
135
- "val_decode_seconds": 1.0826134745730087,
136
- "val_gather_seconds": 0.02164362499024719,
137
  "val_model_source": "raw",
138
- "val_main_ctc_loss": 0.33544471835922196,
139
  "val_aed_loss": 0.0,
140
  "val_liberta_distill_loss": 0.0,
141
  "val_audio_teacher_loss": 0.0,
142
- "val_avg_blank_probability": 0.7951695728016561,
143
- "val_argmax_blank_fraction": 0.8015845900661168,
144
- "val_avg_top_nonblank_probability": 0.19574899347442457,
145
  "val_avg_output_frames": 129.72159800249688,
146
  "val_avg_target_tokens": 24.885455680399502,
147
  "val_target_tokens_per_frame": 0.1918374123013849,
148
  "val_impossible_sample_fraction": 0.0,
149
  "val_tight_sample_fraction": 0.0,
150
  "val_samples_short": 1183.0,
151
- "val_wer_short": 0.25123263110712685,
152
- "val_cer_short": 0.04994360637247991,
153
  "val_samples_medium": 2021.0,
154
- "val_wer_medium": 0.19154160982264665,
155
- "val_cer_medium": 0.038046554996836394,
156
  "val_samples_long": 0.0,
157
  "val_decoded_empty_fraction": 0.0,
158
- "val_decoded_avg_char_length": 46.21129837702871,
159
- "val_decoded_avg_word_length": 7.056179775280899,
160
  "val_speaker_count": 0.0,
161
  "val_speaker_macro_wer": 0.0,
162
  "val_speaker_id_available": 0.0,
163
  "val_missing_speaker_id_samples": 3204.0,
164
- "val_hardest_example_0_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/3180.wav",
165
  "val_hardest_example_0_speaker": "",
166
- "val_hardest_example_0_ref": "Яка?",
167
- "val_hardest_example_0_hyp": "Яке?",
168
- "val_hardest_example_1_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/3561.wav",
169
  "val_hardest_example_1_speaker": "",
170
- "val_hardest_example_1_ref": "Двадцять три?",
171
- "val_hardest_example_1_hyp": "Двадцять три.",
172
- "val_hardest_example_2_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/814.wav",
173
  "val_hardest_example_2_speaker": "",
174
- "val_hardest_example_2_ref": "Піди до Ждана.",
175
- "val_hardest_example_2_hyp": "Піде до ждана.",
176
- "val_hardest_example_3_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/2958.wav",
177
  "val_hardest_example_3_speaker": "",
178
- "val_hardest_example_3_ref": "Ходімо?",
179
- "val_hardest_example_3_hyp": "Ходімо.",
180
- "val_hardest_example_4_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/3273.wav",
181
  "val_hardest_example_4_speaker": "",
182
- "val_hardest_example_4_ref": "І за мене часто б'ються.",
183
- "val_hardest_example_4_hyp": "Й за мене часто б'ються.",
184
- "val_random_example_0_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/3097.wav",
185
  "val_random_example_0_speaker": "",
186
- "val_random_example_0_ref": "Ніхто про це нині не хоче думати.",
187
- "val_random_example_0_hyp": "Ніхто про це нині не хоче думати.",
188
- "val_random_example_1_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/2797.wav",
189
  "val_random_example_1_speaker": "",
190
- "val_random_example_1_ref": "Старий почав крутити головою.",
191
- "val_random_example_1_hyp": "Старий почав крутити головою.",
192
- "val_random_example_2_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/953.wav",
193
  "val_random_example_2_speaker": "",
194
- "val_random_example_2_ref": "А що сталося, Андрію?",
195
- "val_random_example_2_hyp": "А що сталося, Андрію?",
196
- "val_random_example_3_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/3112.wav",
197
  "val_random_example_3_speaker": "",
198
- "val_random_example_3_ref": "Застосування мов в Україні гарантується Конституцією України та визначається законом.",
199
- "val_random_example_3_hyp": "Застосування мов в Україні гарантується конституцію України та визначається законом.",
200
  "val_random_example_4_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/2548.wav",
201
  "val_random_example_4_speaker": "",
202
  "val_random_example_4_ref": "Едгар Дега всесвітньо відомий художник, який зображав на полотнах українок.",
203
- "val_random_example_4_hyp": "Едгардега всесвітньо відомий художник, який зображав на полотнах українок.",
204
- "val/loss": 0.33544471835922196,
205
- "val/cer": 0.040319756478638,
206
- "val/wer": 0.20322991179181113,
207
- "val/main_ctc_loss": 0.33544471835922196,
208
  "val/aed_loss": 0.0,
209
  "val/liberta_distill_loss": 0.0,
210
  "val/audio_teacher_loss": 0.0,
211
- "val/avg_blank_probability": 0.7951695728016561,
212
- "val/argmax_blank_fraction": 0.8015845900661168,
213
- "val/avg_top_nonblank_probability": 0.19574899347442457,
214
  "val/avg_output_frames": 129.72159800249688,
215
  "val/avg_target_tokens": 24.885455680399502,
216
  "val/target_tokens_per_frame": 0.1918374123013849,
217
  "val/impossible_sample_fraction": 0.0,
218
  "val/tight_sample_fraction": 0.0,
219
  "val/samples_short": 1183.0,
220
- "val/wer_short": 0.25123263110712685,
221
- "val/cer_short": 0.04994360637247991,
222
  "val/samples_medium": 2021.0,
223
- "val/wer_medium": 0.19154160982264665,
224
- "val/cer_medium": 0.038046554996836394,
225
  "val/samples_long": 0.0,
226
  "val/decoded_empty_fraction": 0.0,
227
- "val/decoded_avg_char_length": 46.21129837702871,
228
- "val/decoded_avg_word_length": 7.056179775280899,
229
  "val/speaker_count": 0.0,
230
  "val/speaker_macro_wer": 0.0,
231
  "val/speaker_id_available": 0.0,
232
  "val/missing_speaker_id_samples": 3204.0,
233
- "val/forward_seconds": 342.28896363277454,
234
  "val/teacher_seconds": 0.0,
235
- "val/decode_seconds": 1.0826134745730087,
236
- "val/gather_seconds": 0.02164362499024719,
237
- "train/loss": 0.5760795246486672,
238
- "train/main_ctc_loss": 0.5760795246486672,
239
  "train/aed_loss": 0.0,
240
  "train/liberta_distill_loss": 0.0,
241
  "train/audio_teacher_loss": 0.0
 
117
  "padding_value": 1.0
118
  },
119
  "epoch": 3,
120
+ "global_step": 60000,
121
+ "best_val_wer": 0.2002018694869882,
122
  "metrics": {
123
  "epoch": 3,
124
+ "global_step": 60000,
125
+ "train_loss": 0.5716550064133741,
126
+ "train_main_ctc_loss": 0.5716550064133741,
127
  "train_aed_loss": 0.0,
128
  "train_liberta_distill_loss": 0.0,
129
  "train_audio_teacher_loss": 0.0,
130
+ "val_loss": 0.3482800841615762,
131
+ "val_cer": 0.04063628037282474,
132
+ "val_wer": 0.2002018694869882,
133
+ "val_forward_seconds": 332.3660257119918,
134
  "val_teacher_seconds": 0.0,
135
+ "val_decode_seconds": 1.0858283494599164,
136
+ "val_gather_seconds": 0.020539424964226782,
137
  "val_model_source": "raw",
138
+ "val_main_ctc_loss": 0.3482800841615762,
139
  "val_aed_loss": 0.0,
140
  "val_liberta_distill_loss": 0.0,
141
  "val_audio_teacher_loss": 0.0,
142
+ "val_avg_blank_probability": 0.7947598638265351,
143
+ "val_argmax_blank_fraction": 0.7999461056521697,
144
+ "val_avg_top_nonblank_probability": 0.19575466599673175,
145
  "val_avg_output_frames": 129.72159800249688,
146
  "val_avg_target_tokens": 24.885455680399502,
147
  "val_target_tokens_per_frame": 0.1918374123013849,
148
  "val_impossible_sample_fraction": 0.0,
149
  "val_tight_sample_fraction": 0.0,
150
  "val_samples_short": 1183.0,
151
+ "val_wer_short": 0.2436127297176154,
152
+ "val_cer_short": 0.05026082052728042,
153
  "val_samples_medium": 2021.0,
154
+ "val_wer_medium": 0.18963165075034105,
155
+ "val_cer_medium": 0.03836291584801359,
156
  "val_samples_long": 0.0,
157
  "val_decoded_empty_fraction": 0.0,
158
+ "val_decoded_avg_char_length": 46.30867665418227,
159
+ "val_decoded_avg_word_length": 7.063670411985019,
160
  "val_speaker_count": 0.0,
161
  "val_speaker_macro_wer": 0.0,
162
  "val_speaker_id_available": 0.0,
163
  "val_missing_speaker_id_samples": 3204.0,
164
+ "val_hardest_example_0_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/2395.wav",
165
  "val_hardest_example_0_speaker": "",
166
+ "val_hardest_example_0_ref": "Я прошу секунду уваги.",
167
+ "val_hardest_example_0_hyp": "Я прошу складду уваги.",
168
+ "val_hardest_example_1_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/662.wav",
169
  "val_hardest_example_1_speaker": "",
170
+ "val_hardest_example_1_ref": "Тире?",
171
+ "val_hardest_example_1_hyp": "Тира.",
172
+ "val_hardest_example_2_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/543.wav",
173
  "val_hardest_example_2_speaker": "",
174
+ "val_hardest_example_2_ref": "А ви спробуйте.",
175
+ "val_hardest_example_2_hyp": "А ви спробуйте?",
176
+ "val_hardest_example_3_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/3426.wav",
177
  "val_hardest_example_3_speaker": "",
178
+ "val_hardest_example_3_ref": "Украли щось?",
179
+ "val_hardest_example_3_hyp": "Украли щось.",
180
+ "val_hardest_example_4_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/2465.wav",
181
  "val_hardest_example_4_speaker": "",
182
+ "val_hardest_example_4_ref": "Але ви?",
183
+ "val_hardest_example_4_hyp": "Але ви.",
184
+ "val_random_example_0_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/592.wav",
185
  "val_random_example_0_speaker": "",
186
+ "val_random_example_0_ref": "Але не будеш і Гатиловою, відповів їй Валтарій Аквітанський.",
187
+ "val_random_example_0_hyp": "Але не будеш І Гатиловою, відповів її Валтарій Аквітанський",
188
+ "val_random_example_1_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/3190.wav",
189
  "val_random_example_1_speaker": "",
190
+ "val_random_example_1_ref": "А ти, княже?",
191
+ "val_random_example_1_hyp": "А ти, княже.",
192
+ "val_random_example_2_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/1073.wav",
193
  "val_random_example_2_speaker": "",
194
+ "val_random_example_2_ref": "Так, як і була.",
195
+ "val_random_example_2_hyp": "Так, як і була.",
196
+ "val_random_example_3_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/2130.wav",
197
  "val_random_example_3_speaker": "",
198
+ "val_random_example_3_ref": "Людина конечна і за своєю суттю хоче закінчених історій.",
199
+ "val_random_example_3_hyp": "Людина конечна і за своєю суті хоче закінчених історій.",
200
  "val_random_example_4_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/2548.wav",
201
  "val_random_example_4_speaker": "",
202
  "val_random_example_4_ref": "Едгар Дега всесвітньо відомий художник, який зображав на полотнах українок.",
203
+ "val_random_example_4_hyp": "Едгард Даяга всесвітньо відомий художник, який зображав на полотнах українок.",
204
+ "val/loss": 0.3482800841615762,
205
+ "val/cer": 0.04063628037282474,
206
+ "val/wer": 0.2002018694869882,
207
+ "val/main_ctc_loss": 0.3482800841615762,
208
  "val/aed_loss": 0.0,
209
  "val/liberta_distill_loss": 0.0,
210
  "val/audio_teacher_loss": 0.0,
211
+ "val/avg_blank_probability": 0.7947598638265351,
212
+ "val/argmax_blank_fraction": 0.7999461056521697,
213
+ "val/avg_top_nonblank_probability": 0.19575466599673175,
214
  "val/avg_output_frames": 129.72159800249688,
215
  "val/avg_target_tokens": 24.885455680399502,
216
  "val/target_tokens_per_frame": 0.1918374123013849,
217
  "val/impossible_sample_fraction": 0.0,
218
  "val/tight_sample_fraction": 0.0,
219
  "val/samples_short": 1183.0,
220
+ "val/wer_short": 0.2436127297176154,
221
+ "val/cer_short": 0.05026082052728042,
222
  "val/samples_medium": 2021.0,
223
+ "val/wer_medium": 0.18963165075034105,
224
+ "val/cer_medium": 0.03836291584801359,
225
  "val/samples_long": 0.0,
226
  "val/decoded_empty_fraction": 0.0,
227
+ "val/decoded_avg_char_length": 46.30867665418227,
228
+ "val/decoded_avg_word_length": 7.063670411985019,
229
  "val/speaker_count": 0.0,
230
  "val/speaker_macro_wer": 0.0,
231
  "val/speaker_id_available": 0.0,
232
  "val/missing_speaker_id_samples": 3204.0,
233
+ "val/forward_seconds": 332.3660257119918,
234
  "val/teacher_seconds": 0.0,
235
+ "val/decode_seconds": 1.0858283494599164,
236
+ "val/gather_seconds": 0.020539424964226782,
237
+ "train/loss": 0.5716550064133741,
238
+ "train/main_ctc_loss": 0.5716550064133741,
239
  "train/aed_loss": 0.0,
240
  "train/liberta_distill_loss": 0.0,
241
  "train/audio_teacher_loss": 0.0
checkpoint_best.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92a43354302411c5ec2b7386276bb372142eda3387198be09d30c3609124ebfd
3
  size 2424913224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b862d8aa342d1c5ed58a8edefdeb676506776c4a28eb13d6d560514d8eaee09f
3
  size 2424913224
checkpoint_step_last.json CHANGED
@@ -117,125 +117,125 @@
117
  "padding_value": 1.0
118
  },
119
  "epoch": 3,
120
- "global_step": 50000,
121
- "best_val_wer": 0.20322991179181113,
122
  "metrics": {
123
  "epoch": 3,
124
- "global_step": 50000,
125
- "train_loss": 0.5760795246486672,
126
- "train_main_ctc_loss": 0.5760795246486672,
127
  "train_aed_loss": 0.0,
128
  "train_liberta_distill_loss": 0.0,
129
  "train_audio_teacher_loss": 0.0,
130
- "val_loss": 0.33544471835922196,
131
- "val_cer": 0.040319756478638,
132
- "val_wer": 0.20322991179181113,
133
- "val_forward_seconds": 342.28896363277454,
134
  "val_teacher_seconds": 0.0,
135
- "val_decode_seconds": 1.0826134745730087,
136
- "val_gather_seconds": 0.02164362499024719,
137
  "val_model_source": "raw",
138
- "val_main_ctc_loss": 0.33544471835922196,
139
  "val_aed_loss": 0.0,
140
  "val_liberta_distill_loss": 0.0,
141
  "val_audio_teacher_loss": 0.0,
142
- "val_avg_blank_probability": 0.7951695728016561,
143
- "val_argmax_blank_fraction": 0.8015845900661168,
144
- "val_avg_top_nonblank_probability": 0.19574899347442457,
145
  "val_avg_output_frames": 129.72159800249688,
146
  "val_avg_target_tokens": 24.885455680399502,
147
  "val_target_tokens_per_frame": 0.1918374123013849,
148
  "val_impossible_sample_fraction": 0.0,
149
  "val_tight_sample_fraction": 0.0,
150
  "val_samples_short": 1183.0,
151
- "val_wer_short": 0.25123263110712685,
152
- "val_cer_short": 0.04994360637247991,
153
  "val_samples_medium": 2021.0,
154
- "val_wer_medium": 0.19154160982264665,
155
- "val_cer_medium": 0.038046554996836394,
156
  "val_samples_long": 0.0,
157
  "val_decoded_empty_fraction": 0.0,
158
- "val_decoded_avg_char_length": 46.21129837702871,
159
- "val_decoded_avg_word_length": 7.056179775280899,
160
  "val_speaker_count": 0.0,
161
  "val_speaker_macro_wer": 0.0,
162
  "val_speaker_id_available": 0.0,
163
  "val_missing_speaker_id_samples": 3204.0,
164
- "val_hardest_example_0_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/3180.wav",
165
  "val_hardest_example_0_speaker": "",
166
- "val_hardest_example_0_ref": "Яка?",
167
- "val_hardest_example_0_hyp": "Яке?",
168
- "val_hardest_example_1_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/3561.wav",
169
  "val_hardest_example_1_speaker": "",
170
- "val_hardest_example_1_ref": "Двадцять три?",
171
- "val_hardest_example_1_hyp": "Двадцять три.",
172
- "val_hardest_example_2_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/814.wav",
173
  "val_hardest_example_2_speaker": "",
174
- "val_hardest_example_2_ref": "Піди до Ждана.",
175
- "val_hardest_example_2_hyp": "Піде до ждана.",
176
- "val_hardest_example_3_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/2958.wav",
177
  "val_hardest_example_3_speaker": "",
178
- "val_hardest_example_3_ref": "Ходімо?",
179
- "val_hardest_example_3_hyp": "Ходімо.",
180
- "val_hardest_example_4_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/3273.wav",
181
  "val_hardest_example_4_speaker": "",
182
- "val_hardest_example_4_ref": "І за мене часто б'ються.",
183
- "val_hardest_example_4_hyp": "Й за мене часто б'ються.",
184
- "val_random_example_0_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/3097.wav",
185
  "val_random_example_0_speaker": "",
186
- "val_random_example_0_ref": "Ніхто про це нині не хоче думати.",
187
- "val_random_example_0_hyp": "Ніхто про це нині не хоче думати.",
188
- "val_random_example_1_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/2797.wav",
189
  "val_random_example_1_speaker": "",
190
- "val_random_example_1_ref": "Старий почав крутити головою.",
191
- "val_random_example_1_hyp": "Старий почав крутити головою.",
192
- "val_random_example_2_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/953.wav",
193
  "val_random_example_2_speaker": "",
194
- "val_random_example_2_ref": "А що сталося, Андрію?",
195
- "val_random_example_2_hyp": "А що сталося, Андрію?",
196
- "val_random_example_3_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/3112.wav",
197
  "val_random_example_3_speaker": "",
198
- "val_random_example_3_ref": "Застосування мов в Україні гарантується Конституцією України та визначається законом.",
199
- "val_random_example_3_hyp": "Застосування мов в Україні гарантується конституцію України та визначається законом.",
200
  "val_random_example_4_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/2548.wav",
201
  "val_random_example_4_speaker": "",
202
  "val_random_example_4_ref": "Едгар Дега всесвітньо відомий художник, який зображав на полотнах українок.",
203
- "val_random_example_4_hyp": "Едгардега всесвітньо відомий художник, який зображав на полотнах українок.",
204
- "val/loss": 0.33544471835922196,
205
- "val/cer": 0.040319756478638,
206
- "val/wer": 0.20322991179181113,
207
- "val/main_ctc_loss": 0.33544471835922196,
208
  "val/aed_loss": 0.0,
209
  "val/liberta_distill_loss": 0.0,
210
  "val/audio_teacher_loss": 0.0,
211
- "val/avg_blank_probability": 0.7951695728016561,
212
- "val/argmax_blank_fraction": 0.8015845900661168,
213
- "val/avg_top_nonblank_probability": 0.19574899347442457,
214
  "val/avg_output_frames": 129.72159800249688,
215
  "val/avg_target_tokens": 24.885455680399502,
216
  "val/target_tokens_per_frame": 0.1918374123013849,
217
  "val/impossible_sample_fraction": 0.0,
218
  "val/tight_sample_fraction": 0.0,
219
  "val/samples_short": 1183.0,
220
- "val/wer_short": 0.25123263110712685,
221
- "val/cer_short": 0.04994360637247991,
222
  "val/samples_medium": 2021.0,
223
- "val/wer_medium": 0.19154160982264665,
224
- "val/cer_medium": 0.038046554996836394,
225
  "val/samples_long": 0.0,
226
  "val/decoded_empty_fraction": 0.0,
227
- "val/decoded_avg_char_length": 46.21129837702871,
228
- "val/decoded_avg_word_length": 7.056179775280899,
229
  "val/speaker_count": 0.0,
230
  "val/speaker_macro_wer": 0.0,
231
  "val/speaker_id_available": 0.0,
232
  "val/missing_speaker_id_samples": 3204.0,
233
- "val/forward_seconds": 342.28896363277454,
234
  "val/teacher_seconds": 0.0,
235
- "val/decode_seconds": 1.0826134745730087,
236
- "val/gather_seconds": 0.02164362499024719,
237
- "train/loss": 0.5760795246486672,
238
- "train/main_ctc_loss": 0.5760795246486672,
239
  "train/aed_loss": 0.0,
240
  "train/liberta_distill_loss": 0.0,
241
  "train/audio_teacher_loss": 0.0
 
117
  "padding_value": 1.0
118
  },
119
  "epoch": 3,
120
+ "global_step": 60000,
121
+ "best_val_wer": 0.2002018694869882,
122
  "metrics": {
123
  "epoch": 3,
124
+ "global_step": 60000,
125
+ "train_loss": 0.5716550064133741,
126
+ "train_main_ctc_loss": 0.5716550064133741,
127
  "train_aed_loss": 0.0,
128
  "train_liberta_distill_loss": 0.0,
129
  "train_audio_teacher_loss": 0.0,
130
+ "val_loss": 0.3482800841615762,
131
+ "val_cer": 0.04063628037282474,
132
+ "val_wer": 0.2002018694869882,
133
+ "val_forward_seconds": 332.3660257119918,
134
  "val_teacher_seconds": 0.0,
135
+ "val_decode_seconds": 1.0858283494599164,
136
+ "val_gather_seconds": 0.020539424964226782,
137
  "val_model_source": "raw",
138
+ "val_main_ctc_loss": 0.3482800841615762,
139
  "val_aed_loss": 0.0,
140
  "val_liberta_distill_loss": 0.0,
141
  "val_audio_teacher_loss": 0.0,
142
+ "val_avg_blank_probability": 0.7947598638265351,
143
+ "val_argmax_blank_fraction": 0.7999461056521697,
144
+ "val_avg_top_nonblank_probability": 0.19575466599673175,
145
  "val_avg_output_frames": 129.72159800249688,
146
  "val_avg_target_tokens": 24.885455680399502,
147
  "val_target_tokens_per_frame": 0.1918374123013849,
148
  "val_impossible_sample_fraction": 0.0,
149
  "val_tight_sample_fraction": 0.0,
150
  "val_samples_short": 1183.0,
151
+ "val_wer_short": 0.2436127297176154,
152
+ "val_cer_short": 0.05026082052728042,
153
  "val_samples_medium": 2021.0,
154
+ "val_wer_medium": 0.18963165075034105,
155
+ "val_cer_medium": 0.03836291584801359,
156
  "val_samples_long": 0.0,
157
  "val_decoded_empty_fraction": 0.0,
158
+ "val_decoded_avg_char_length": 46.30867665418227,
159
+ "val_decoded_avg_word_length": 7.063670411985019,
160
  "val_speaker_count": 0.0,
161
  "val_speaker_macro_wer": 0.0,
162
  "val_speaker_id_available": 0.0,
163
  "val_missing_speaker_id_samples": 3204.0,
164
+ "val_hardest_example_0_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/2395.wav",
165
  "val_hardest_example_0_speaker": "",
166
+ "val_hardest_example_0_ref": "Я прошу секунду уваги.",
167
+ "val_hardest_example_0_hyp": "Я прошу складду уваги.",
168
+ "val_hardest_example_1_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/662.wav",
169
  "val_hardest_example_1_speaker": "",
170
+ "val_hardest_example_1_ref": "Тире?",
171
+ "val_hardest_example_1_hyp": "Тира.",
172
+ "val_hardest_example_2_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/543.wav",
173
  "val_hardest_example_2_speaker": "",
174
+ "val_hardest_example_2_ref": "А ви спробуйте.",
175
+ "val_hardest_example_2_hyp": "А ви спробуйте?",
176
+ "val_hardest_example_3_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/3426.wav",
177
  "val_hardest_example_3_speaker": "",
178
+ "val_hardest_example_3_ref": "Украли щось?",
179
+ "val_hardest_example_3_hyp": "Украли щось.",
180
+ "val_hardest_example_4_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/2465.wav",
181
  "val_hardest_example_4_speaker": "",
182
+ "val_hardest_example_4_ref": "Але ви?",
183
+ "val_hardest_example_4_hyp": "Але ви.",
184
+ "val_random_example_0_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/592.wav",
185
  "val_random_example_0_speaker": "",
186
+ "val_random_example_0_ref": "Але не будеш і Гатиловою, відповів їй Валтарій Аквітанський.",
187
+ "val_random_example_0_hyp": "Але не будеш І Гатиловою, відповів її Валтарій Аквітанський",
188
+ "val_random_example_1_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/3190.wav",
189
  "val_random_example_1_speaker": "",
190
+ "val_random_example_1_ref": "А ти, княже?",
191
+ "val_random_example_1_hyp": "А ти, княже.",
192
+ "val_random_example_2_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/1073.wav",
193
  "val_random_example_2_speaker": "",
194
+ "val_random_example_2_ref": "Так, як і була.",
195
+ "val_random_example_2_hyp": "Так, як і була.",
196
+ "val_random_example_3_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/2130.wav",
197
  "val_random_example_3_speaker": "",
198
+ "val_random_example_3_ref": "Людина конечна і за своєю суттю хоче закінчених історій.",
199
+ "val_random_example_3_hyp": "Людина конечна і за своєю суті хоче закінчених історій.",
200
  "val_random_example_4_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/2548.wav",
201
  "val_random_example_4_speaker": "",
202
  "val_random_example_4_ref": "Едгар Дега всесвітньо відомий художник, який зображав на полотнах українок.",
203
+ "val_random_example_4_hyp": "Едгард Даяга всесвітньо відомий художник, який зображав на полотнах українок.",
204
+ "val/loss": 0.3482800841615762,
205
+ "val/cer": 0.04063628037282474,
206
+ "val/wer": 0.2002018694869882,
207
+ "val/main_ctc_loss": 0.3482800841615762,
208
  "val/aed_loss": 0.0,
209
  "val/liberta_distill_loss": 0.0,
210
  "val/audio_teacher_loss": 0.0,
211
+ "val/avg_blank_probability": 0.7947598638265351,
212
+ "val/argmax_blank_fraction": 0.7999461056521697,
213
+ "val/avg_top_nonblank_probability": 0.19575466599673175,
214
  "val/avg_output_frames": 129.72159800249688,
215
  "val/avg_target_tokens": 24.885455680399502,
216
  "val/target_tokens_per_frame": 0.1918374123013849,
217
  "val/impossible_sample_fraction": 0.0,
218
  "val/tight_sample_fraction": 0.0,
219
  "val/samples_short": 1183.0,
220
+ "val/wer_short": 0.2436127297176154,
221
+ "val/cer_short": 0.05026082052728042,
222
  "val/samples_medium": 2021.0,
223
+ "val/wer_medium": 0.18963165075034105,
224
+ "val/cer_medium": 0.03836291584801359,
225
  "val/samples_long": 0.0,
226
  "val/decoded_empty_fraction": 0.0,
227
+ "val/decoded_avg_char_length": 46.30867665418227,
228
+ "val/decoded_avg_word_length": 7.063670411985019,
229
  "val/speaker_count": 0.0,
230
  "val/speaker_macro_wer": 0.0,
231
  "val/speaker_id_available": 0.0,
232
  "val/missing_speaker_id_samples": 3204.0,
233
+ "val/forward_seconds": 332.3660257119918,
234
  "val/teacher_seconds": 0.0,
235
+ "val/decode_seconds": 1.0858283494599164,
236
+ "val/gather_seconds": 0.020539424964226782,
237
+ "train/loss": 0.5716550064133741,
238
+ "train/main_ctc_loss": 0.5716550064133741,
239
  "train/aed_loss": 0.0,
240
  "train/liberta_distill_loss": 0.0,
241
  "train/audio_teacher_loss": 0.0
checkpoint_step_last.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92a43354302411c5ec2b7386276bb372142eda3387198be09d30c3609124ebfd
3
  size 2424913224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b862d8aa342d1c5ed58a8edefdeb676506776c4a28eb13d6d560514d8eaee09f
3
  size 2424913224
checkpoint_topk_avg.json CHANGED
@@ -117,125 +117,125 @@
117
  "padding_value": 1.0
118
  },
119
  "epoch": 3,
120
- "global_step": 50000,
121
- "best_val_wer": 0.20322991179181113,
122
  "metrics": {
123
  "epoch": 3,
124
- "global_step": 50000,
125
- "train_loss": 0.5760795246486672,
126
- "train_main_ctc_loss": 0.5760795246486672,
127
  "train_aed_loss": 0.0,
128
  "train_liberta_distill_loss": 0.0,
129
  "train_audio_teacher_loss": 0.0,
130
- "val_loss": 0.33544471835922196,
131
- "val_cer": 0.040319756478638,
132
- "val_wer": 0.20322991179181113,
133
- "val_forward_seconds": 342.28896363277454,
134
  "val_teacher_seconds": 0.0,
135
- "val_decode_seconds": 1.0826134745730087,
136
- "val_gather_seconds": 0.02164362499024719,
137
  "val_model_source": "raw",
138
- "val_main_ctc_loss": 0.33544471835922196,
139
  "val_aed_loss": 0.0,
140
  "val_liberta_distill_loss": 0.0,
141
  "val_audio_teacher_loss": 0.0,
142
- "val_avg_blank_probability": 0.7951695728016561,
143
- "val_argmax_blank_fraction": 0.8015845900661168,
144
- "val_avg_top_nonblank_probability": 0.19574899347442457,
145
  "val_avg_output_frames": 129.72159800249688,
146
  "val_avg_target_tokens": 24.885455680399502,
147
  "val_target_tokens_per_frame": 0.1918374123013849,
148
  "val_impossible_sample_fraction": 0.0,
149
  "val_tight_sample_fraction": 0.0,
150
  "val_samples_short": 1183.0,
151
- "val_wer_short": 0.25123263110712685,
152
- "val_cer_short": 0.04994360637247991,
153
  "val_samples_medium": 2021.0,
154
- "val_wer_medium": 0.19154160982264665,
155
- "val_cer_medium": 0.038046554996836394,
156
  "val_samples_long": 0.0,
157
  "val_decoded_empty_fraction": 0.0,
158
- "val_decoded_avg_char_length": 46.21129837702871,
159
- "val_decoded_avg_word_length": 7.056179775280899,
160
  "val_speaker_count": 0.0,
161
  "val_speaker_macro_wer": 0.0,
162
  "val_speaker_id_available": 0.0,
163
  "val_missing_speaker_id_samples": 3204.0,
164
- "val_hardest_example_0_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/3180.wav",
165
  "val_hardest_example_0_speaker": "",
166
- "val_hardest_example_0_ref": "Яка?",
167
- "val_hardest_example_0_hyp": "Яке?",
168
- "val_hardest_example_1_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/3561.wav",
169
  "val_hardest_example_1_speaker": "",
170
- "val_hardest_example_1_ref": "Двадцять три?",
171
- "val_hardest_example_1_hyp": "Двадцять три.",
172
- "val_hardest_example_2_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/814.wav",
173
  "val_hardest_example_2_speaker": "",
174
- "val_hardest_example_2_ref": "Піди до Ждана.",
175
- "val_hardest_example_2_hyp": "Піде до ждана.",
176
- "val_hardest_example_3_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/2958.wav",
177
  "val_hardest_example_3_speaker": "",
178
- "val_hardest_example_3_ref": "Ходімо?",
179
- "val_hardest_example_3_hyp": "Ходімо.",
180
- "val_hardest_example_4_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/3273.wav",
181
  "val_hardest_example_4_speaker": "",
182
- "val_hardest_example_4_ref": "І за мене часто б'ються.",
183
- "val_hardest_example_4_hyp": "Й за мене часто б'ються.",
184
- "val_random_example_0_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/3097.wav",
185
  "val_random_example_0_speaker": "",
186
- "val_random_example_0_ref": "Ніхто про це нині не хоче думати.",
187
- "val_random_example_0_hyp": "Ніхто про це нині не хоче думати.",
188
- "val_random_example_1_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/2797.wav",
189
  "val_random_example_1_speaker": "",
190
- "val_random_example_1_ref": "Старий почав крутити головою.",
191
- "val_random_example_1_hyp": "Старий почав крутити головою.",
192
- "val_random_example_2_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/953.wav",
193
  "val_random_example_2_speaker": "",
194
- "val_random_example_2_ref": "А що сталося, Андрію?",
195
- "val_random_example_2_hyp": "А що сталося, Андрію?",
196
- "val_random_example_3_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/3112.wav",
197
  "val_random_example_3_speaker": "",
198
- "val_random_example_3_ref": "Застосування мов в Україні гарантується Конституцією України та визначається законом.",
199
- "val_random_example_3_hyp": "Застосування мов в Україні гарантується конституцію України та визначається законом.",
200
  "val_random_example_4_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/2548.wav",
201
  "val_random_example_4_speaker": "",
202
  "val_random_example_4_ref": "Едгар Дега всесвітньо відомий художник, який зображав на полотнах українок.",
203
- "val_random_example_4_hyp": "Едгардега всесвітньо відомий художник, який зображав на полотнах українок.",
204
- "val/loss": 0.33544471835922196,
205
- "val/cer": 0.040319756478638,
206
- "val/wer": 0.20322991179181113,
207
- "val/main_ctc_loss": 0.33544471835922196,
208
  "val/aed_loss": 0.0,
209
  "val/liberta_distill_loss": 0.0,
210
  "val/audio_teacher_loss": 0.0,
211
- "val/avg_blank_probability": 0.7951695728016561,
212
- "val/argmax_blank_fraction": 0.8015845900661168,
213
- "val/avg_top_nonblank_probability": 0.19574899347442457,
214
  "val/avg_output_frames": 129.72159800249688,
215
  "val/avg_target_tokens": 24.885455680399502,
216
  "val/target_tokens_per_frame": 0.1918374123013849,
217
  "val/impossible_sample_fraction": 0.0,
218
  "val/tight_sample_fraction": 0.0,
219
  "val/samples_short": 1183.0,
220
- "val/wer_short": 0.25123263110712685,
221
- "val/cer_short": 0.04994360637247991,
222
  "val/samples_medium": 2021.0,
223
- "val/wer_medium": 0.19154160982264665,
224
- "val/cer_medium": 0.038046554996836394,
225
  "val/samples_long": 0.0,
226
  "val/decoded_empty_fraction": 0.0,
227
- "val/decoded_avg_char_length": 46.21129837702871,
228
- "val/decoded_avg_word_length": 7.056179775280899,
229
  "val/speaker_count": 0.0,
230
  "val/speaker_macro_wer": 0.0,
231
  "val/speaker_id_available": 0.0,
232
  "val/missing_speaker_id_samples": 3204.0,
233
- "val/forward_seconds": 342.28896363277454,
234
  "val/teacher_seconds": 0.0,
235
- "val/decode_seconds": 1.0826134745730087,
236
- "val/gather_seconds": 0.02164362499024719,
237
- "train/loss": 0.5760795246486672,
238
- "train/main_ctc_loss": 0.5760795246486672,
239
  "train/aed_loss": 0.0,
240
  "train/liberta_distill_loss": 0.0,
241
  "train/audio_teacher_loss": 0.0
@@ -419,6 +419,12 @@
419
  "w2v_bert_model_source": "/home/yehor/squeezeformer/w2v-bert-uk-v2.1"
420
  },
421
  "averaged_from": [
 
 
 
 
 
 
422
  {
423
  "epoch": 3,
424
  "global_step": 50000,
@@ -442,12 +448,6 @@
442
  "global_step": 17000,
443
  "val_wer": 0.25163470399789356,
444
  "path": "checkpoint_epoch=0002_step=00017000_valwer=0.251635.pt"
445
- },
446
- {
447
- "epoch": 2,
448
- "global_step": 15500,
449
- "val_wer": 0.255847632421995,
450
- "path": "checkpoint_epoch=0002_step=00015500_valwer=0.255848.pt"
451
  }
452
  ]
453
  }
 
117
  "padding_value": 1.0
118
  },
119
  "epoch": 3,
120
+ "global_step": 60000,
121
+ "best_val_wer": 0.2002018694869882,
122
  "metrics": {
123
  "epoch": 3,
124
+ "global_step": 60000,
125
+ "train_loss": 0.5716550064133741,
126
+ "train_main_ctc_loss": 0.5716550064133741,
127
  "train_aed_loss": 0.0,
128
  "train_liberta_distill_loss": 0.0,
129
  "train_audio_teacher_loss": 0.0,
130
+ "val_loss": 0.3482800841615762,
131
+ "val_cer": 0.04063628037282474,
132
+ "val_wer": 0.2002018694869882,
133
+ "val_forward_seconds": 332.3660257119918,
134
  "val_teacher_seconds": 0.0,
135
+ "val_decode_seconds": 1.0858283494599164,
136
+ "val_gather_seconds": 0.020539424964226782,
137
  "val_model_source": "raw",
138
+ "val_main_ctc_loss": 0.3482800841615762,
139
  "val_aed_loss": 0.0,
140
  "val_liberta_distill_loss": 0.0,
141
  "val_audio_teacher_loss": 0.0,
142
+ "val_avg_blank_probability": 0.7947598638265351,
143
+ "val_argmax_blank_fraction": 0.7999461056521697,
144
+ "val_avg_top_nonblank_probability": 0.19575466599673175,
145
  "val_avg_output_frames": 129.72159800249688,
146
  "val_avg_target_tokens": 24.885455680399502,
147
  "val_target_tokens_per_frame": 0.1918374123013849,
148
  "val_impossible_sample_fraction": 0.0,
149
  "val_tight_sample_fraction": 0.0,
150
  "val_samples_short": 1183.0,
151
+ "val_wer_short": 0.2436127297176154,
152
+ "val_cer_short": 0.05026082052728042,
153
  "val_samples_medium": 2021.0,
154
+ "val_wer_medium": 0.18963165075034105,
155
+ "val_cer_medium": 0.03836291584801359,
156
  "val_samples_long": 0.0,
157
  "val_decoded_empty_fraction": 0.0,
158
+ "val_decoded_avg_char_length": 46.30867665418227,
159
+ "val_decoded_avg_word_length": 7.063670411985019,
160
  "val_speaker_count": 0.0,
161
  "val_speaker_macro_wer": 0.0,
162
  "val_speaker_id_available": 0.0,
163
  "val_missing_speaker_id_samples": 3204.0,
164
+ "val_hardest_example_0_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/2395.wav",
165
  "val_hardest_example_0_speaker": "",
166
+ "val_hardest_example_0_ref": "Я прошу секунду уваги.",
167
+ "val_hardest_example_0_hyp": "Я прошу складду уваги.",
168
+ "val_hardest_example_1_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/662.wav",
169
  "val_hardest_example_1_speaker": "",
170
+ "val_hardest_example_1_ref": "Тире?",
171
+ "val_hardest_example_1_hyp": "Тира.",
172
+ "val_hardest_example_2_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/543.wav",
173
  "val_hardest_example_2_speaker": "",
174
+ "val_hardest_example_2_ref": "А ви спробуйте.",
175
+ "val_hardest_example_2_hyp": "А ви спробуйте?",
176
+ "val_hardest_example_3_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/3426.wav",
177
  "val_hardest_example_3_speaker": "",
178
+ "val_hardest_example_3_ref": "Украли щось?",
179
+ "val_hardest_example_3_hyp": "Украли щось.",
180
+ "val_hardest_example_4_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/2465.wav",
181
  "val_hardest_example_4_speaker": "",
182
+ "val_hardest_example_4_ref": "Але ви?",
183
+ "val_hardest_example_4_hyp": "Але ви.",
184
+ "val_random_example_0_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/592.wav",
185
  "val_random_example_0_speaker": "",
186
+ "val_random_example_0_ref": "Але не будеш і Гатиловою, відповів їй Валтарій Аквітанський.",
187
+ "val_random_example_0_hyp": "Але не будеш І Гатиловою, відповів її Валтарій Аквітанський",
188
+ "val_random_example_1_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/3190.wav",
189
  "val_random_example_1_speaker": "",
190
+ "val_random_example_1_ref": "А ти, княже?",
191
+ "val_random_example_1_hyp": "А ти, княже.",
192
+ "val_random_example_2_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/1073.wav",
193
  "val_random_example_2_speaker": "",
194
+ "val_random_example_2_ref": "Так, як і була.",
195
+ "val_random_example_2_hyp": "Так, як і була.",
196
+ "val_random_example_3_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/2130.wav",
197
  "val_random_example_3_speaker": "",
198
+ "val_random_example_3_ref": "Людина конечна і за своєю суттю хоче закінчених історій.",
199
+ "val_random_example_3_hyp": "Людина конечна і за своєю суті хоче закінчених історій.",
200
  "val_random_example_4_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/2548.wav",
201
  "val_random_example_4_speaker": "",
202
  "val_random_example_4_ref": "Едгар Дега всесвітньо відомий художник, який зображав на полотнах українок.",
203
+ "val_random_example_4_hyp": "Едгард Даяга всесвітньо відомий художник, який зображав на полотнах українок.",
204
+ "val/loss": 0.3482800841615762,
205
+ "val/cer": 0.04063628037282474,
206
+ "val/wer": 0.2002018694869882,
207
+ "val/main_ctc_loss": 0.3482800841615762,
208
  "val/aed_loss": 0.0,
209
  "val/liberta_distill_loss": 0.0,
210
  "val/audio_teacher_loss": 0.0,
211
+ "val/avg_blank_probability": 0.7947598638265351,
212
+ "val/argmax_blank_fraction": 0.7999461056521697,
213
+ "val/avg_top_nonblank_probability": 0.19575466599673175,
214
  "val/avg_output_frames": 129.72159800249688,
215
  "val/avg_target_tokens": 24.885455680399502,
216
  "val/target_tokens_per_frame": 0.1918374123013849,
217
  "val/impossible_sample_fraction": 0.0,
218
  "val/tight_sample_fraction": 0.0,
219
  "val/samples_short": 1183.0,
220
+ "val/wer_short": 0.2436127297176154,
221
+ "val/cer_short": 0.05026082052728042,
222
  "val/samples_medium": 2021.0,
223
+ "val/wer_medium": 0.18963165075034105,
224
+ "val/cer_medium": 0.03836291584801359,
225
  "val/samples_long": 0.0,
226
  "val/decoded_empty_fraction": 0.0,
227
+ "val/decoded_avg_char_length": 46.30867665418227,
228
+ "val/decoded_avg_word_length": 7.063670411985019,
229
  "val/speaker_count": 0.0,
230
  "val/speaker_macro_wer": 0.0,
231
  "val/speaker_id_available": 0.0,
232
  "val/missing_speaker_id_samples": 3204.0,
233
+ "val/forward_seconds": 332.3660257119918,
234
  "val/teacher_seconds": 0.0,
235
+ "val/decode_seconds": 1.0858283494599164,
236
+ "val/gather_seconds": 0.020539424964226782,
237
+ "train/loss": 0.5716550064133741,
238
+ "train/main_ctc_loss": 0.5716550064133741,
239
  "train/aed_loss": 0.0,
240
  "train/liberta_distill_loss": 0.0,
241
  "train/audio_teacher_loss": 0.0
 
419
  "w2v_bert_model_source": "/home/yehor/squeezeformer/w2v-bert-uk-v2.1"
420
  },
421
  "averaged_from": [
422
+ {
423
+ "epoch": 3,
424
+ "global_step": 60000,
425
+ "val_wer": 0.2002018694869882,
426
+ "path": "checkpoint_epoch=0003_step=00060000_valwer=0.200202.pt"
427
+ },
428
  {
429
  "epoch": 3,
430
  "global_step": 50000,
 
448
  "global_step": 17000,
449
  "val_wer": 0.25163470399789356,
450
  "path": "checkpoint_epoch=0002_step=00017000_valwer=0.251635.pt"
 
 
 
 
 
 
451
  }
452
  ]
453
  }
checkpoint_topk_avg.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45cf9e0c84bb28753c97817fdf4603e09b433adc894aec0fd4d86af950c6a3bc
3
  size 2424913224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22c91e8f3726832938b191ff9fdfb62524aa38632b6a7a1c54bdbc2b9cf42020
3
  size 2424913224
checkpoints_topk/metadata.json CHANGED
@@ -1,6 +1,12 @@
1
  {
2
  "compatibility_signature": "ecb1c79a9364dfe2fac014f491da9e093b8dc939f19e5994004884370168b512",
3
  "items": [
 
 
 
 
 
 
4
  {
5
  "epoch": 3,
6
  "global_step": 50000,
@@ -24,12 +30,6 @@
24
  "global_step": 17000,
25
  "val_wer": 0.25163470399789356,
26
  "path": "checkpoint_epoch=0002_step=00017000_valwer=0.251635.pt"
27
- },
28
- {
29
- "epoch": 2,
30
- "global_step": 15500,
31
- "val_wer": 0.255847632421995,
32
- "path": "checkpoint_epoch=0002_step=00015500_valwer=0.255848.pt"
33
  }
34
  ]
35
  }
 
1
  {
2
  "compatibility_signature": "ecb1c79a9364dfe2fac014f491da9e093b8dc939f19e5994004884370168b512",
3
  "items": [
4
+ {
5
+ "epoch": 3,
6
+ "global_step": 60000,
7
+ "val_wer": 0.2002018694869882,
8
+ "path": "checkpoint_epoch=0003_step=00060000_valwer=0.200202.pt"
9
+ },
10
  {
11
  "epoch": 3,
12
  "global_step": 50000,
 
30
  "global_step": 17000,
31
  "val_wer": 0.25163470399789356,
32
  "path": "checkpoint_epoch=0002_step=00017000_valwer=0.251635.pt"
 
 
 
 
 
 
33
  }
34
  ]
35
  }
eval_reports/epoch_0003_step_00060000.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3,
3
+ "global_step": 60000,
4
+ "model_source": "raw",
5
+ "metrics": {
6
+ "loss": 0.3482800841615762,
7
+ "main_ctc_loss": 0.3482800841615762,
8
+ "aed_loss": 0.0,
9
+ "liberta_distill_loss": 0.0,
10
+ "audio_teacher_loss": 0.0,
11
+ "cer": 0.04063628037282474,
12
+ "wer": 0.2002018694869882,
13
+ "avg_blank_probability": 0.7947598638265351,
14
+ "argmax_blank_fraction": 0.7999461056521697,
15
+ "avg_top_nonblank_probability": 0.19575466599673175,
16
+ "avg_output_frames": 129.72159800249688,
17
+ "avg_target_tokens": 24.885455680399502,
18
+ "target_tokens_per_frame": 0.1918374123013849,
19
+ "impossible_sample_fraction": 0.0,
20
+ "tight_sample_fraction": 0.0,
21
+ "samples_short": 1183.0,
22
+ "wer_short": 0.2436127297176154,
23
+ "cer_short": 0.05026082052728042,
24
+ "samples_medium": 2021.0,
25
+ "wer_medium": 0.18963165075034105,
26
+ "cer_medium": 0.03836291584801359,
27
+ "samples_long": 0.0,
28
+ "decoded_empty_fraction": 0.0,
29
+ "decoded_avg_char_length": 46.30867665418227,
30
+ "decoded_avg_word_length": 7.063670411985019,
31
+ "speaker_count": 0.0,
32
+ "speaker_macro_wer": 0.0,
33
+ "speaker_id_available": 0.0,
34
+ "missing_speaker_id_samples": 3204.0
35
+ },
36
+ "hardest_examples": [
37
+ {
38
+ "utterance_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/2395.wav",
39
+ "speaker_id": "",
40
+ "reference": "Я прошу секунду уваги.",
41
+ "hypothesis": "Я прошу складду уваги."
42
+ },
43
+ {
44
+ "utterance_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/662.wav",
45
+ "speaker_id": "",
46
+ "reference": "Тире?",
47
+ "hypothesis": "Тира."
48
+ },
49
+ {
50
+ "utterance_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/543.wav",
51
+ "speaker_id": "",
52
+ "reference": "А ви спробуйте.",
53
+ "hypothesis": "А ви спробуйте?"
54
+ },
55
+ {
56
+ "utterance_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/3426.wav",
57
+ "speaker_id": "",
58
+ "reference": "Украли щось?",
59
+ "hypothesis": "Украли щось."
60
+ },
61
+ {
62
+ "utterance_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/2465.wav",
63
+ "speaker_id": "",
64
+ "reference": "Але ви?",
65
+ "hypothesis": "Але ви."
66
+ }
67
+ ],
68
+ "random_examples": [
69
+ {
70
+ "utterance_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/592.wav",
71
+ "speaker_id": "",
72
+ "reference": "Але не будеш і Гатиловою, відповів їй Валтарій Аквітанський.",
73
+ "hypothesis": "Але не будеш І Гатиловою, відповів її Валтарій Аквітанський"
74
+ },
75
+ {
76
+ "utterance_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/3190.wav",
77
+ "speaker_id": "",
78
+ "reference": "А ти, княже?",
79
+ "hypothesis": "А ти, княже."
80
+ },
81
+ {
82
+ "utterance_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/1073.wav",
83
+ "speaker_id": "",
84
+ "reference": "Так, як і була.",
85
+ "hypothesis": "Так, як і була."
86
+ },
87
+ {
88
+ "utterance_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/2130.wav",
89
+ "speaker_id": "",
90
+ "reference": "Людина конечна і за своєю суттю хоче закінчених історій.",
91
+ "hypothesis": "Людина конечна і за своєю суті хоче закінчених історій."
92
+ },
93
+ {
94
+ "utterance_id": "/home/yehor/squeezeformer/data/cv10-uk-testset-clean-punctuated/2548.wav",
95
+ "speaker_id": "",
96
+ "reference": "Едгар Дега всесвітньо відомий художник, який зображав на полотнах українок.",
97
+ "hypothesis": "Едгард Даяга всесвітньо відомий художник, який зображав на полотнах українок."
98
+ }
99
+ ],
100
+ "speaker_metrics": {
101
+ "speaker_count": 0,
102
+ "speaker_macro_wer": 0.0,
103
+ "speaker_id_available": false,
104
+ "missing_speaker_id_samples": 3204,
105
+ "per_speaker": {}
106
+ },
107
+ "split_audit": {
108
+ "counts": {
109
+ "train": {
110
+ "samples": 648240,
111
+ "speakers": 0,
112
+ "records_with_speaker_id": 0,
113
+ "hours": 1264.353836267957
114
+ },
115
+ "validation": {
116
+ "samples": 3204,
117
+ "speakers": 0,
118
+ "records_with_speaker_id": 0,
119
+ "hours": 4.629311440972232
120
+ }
121
+ },
122
+ "hours": {
123
+ "train": 1264.353836267957,
124
+ "validation": 4.629311440972232
125
+ },
126
+ "total_hours": 1268.9831477089292,
127
+ "speaker_overlaps": {
128
+ "train_vs_validation": 0
129
+ },
130
+ "speaker_balance_ratio": 1.0,
131
+ "speaker_id_available": false
132
+ }
133
+ }
training_20260415_090229.log CHANGED
The diff for this file is too large to render. See raw diff