Baselhany commited on
Commit
249b6d2
·
verified ·
1 Parent(s): 07e202e

Training in progress, step 2800, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3b114ec9aae5f56c896dc809331fe15ed8ccb9c22a63d8091cf763b49c85e3d
3
  size 223144592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07cc1e0dffcd80eac14f87d0d9619540d3f5cf1771ecd556cf377b27a876168c
3
  size 223144592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edcd52e16b677aaaa2c66be498a66202ce1cc335af39cbbad43ce8a2ad734b75
3
  size 281574266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4654516b018d8c1d8ee7f380300e2cfab5a3d33f9a966babd44885140fecda5d
3
  size 281574266
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87f3e0f4d936e870e79e3be44a85542644bf76c70c4bbcaa7cf95399760c586a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9590931db28029d3072811ed01a2ef3a2eb67fd96a33277147916dc1a126b69f
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df198c0270152417dac5be430450a9752660b128bc2ee48e34c53ddc72b40e6e
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a378d0eab4cd862a8b75ae52dc72877722c788e485acf876520c8995544ef8d
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d75303d12b74496ad035c93f7f537a1c9eccd4f5cc765415d2bf1655e9477134
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0d22202ff71180e976c20ff3356dc9e86ce8ae5966c95daf7b725e761a03663
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 2400,
3
  "best_metric": 0.2371058060713233,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-2400",
5
- "epoch": 1.7097808658471405,
6
  "eval_steps": 400,
7
- "global_step": 2400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -230,6 +230,43 @@
230
  "eval_steps_per_second": 0.404,
231
  "eval_wer": 0.2371058060713233,
232
  "step": 2400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  }
234
  ],
235
  "logging_steps": 100,
@@ -249,7 +286,7 @@
249
  "attributes": {}
250
  }
251
  },
252
- "total_flos": 3.12079259271168e+18,
253
  "train_batch_size": 8,
254
  "trial_name": null,
255
  "trial_params": null
 
2
  "best_global_step": 2400,
3
  "best_metric": 0.2371058060713233,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-2400",
5
+ "epoch": 1.9948334224122573,
6
  "eval_steps": 400,
7
+ "global_step": 2800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
230
  "eval_steps_per_second": 0.404,
231
  "eval_wer": 0.2371058060713233,
232
  "step": 2400
233
+ },
234
+ {
235
+ "epoch": 1.7810440049884197,
236
+ "grad_norm": 44.21736145019531,
237
+ "learning_rate": 4.6481531410083585e-05,
238
+ "loss": 5.6636,
239
+ "step": 2500
240
+ },
241
+ {
242
+ "epoch": 1.852307144129699,
243
+ "grad_norm": 47.173553466796875,
244
+ "learning_rate": 4.3785386896737665e-05,
245
+ "loss": 5.5392,
246
+ "step": 2600
247
+ },
248
+ {
249
+ "epoch": 1.923570283270978,
250
+ "grad_norm": 52.33578872680664,
251
+ "learning_rate": 4.108924238339175e-05,
252
+ "loss": 5.6191,
253
+ "step": 2700
254
+ },
255
+ {
256
+ "epoch": 1.9948334224122573,
257
+ "grad_norm": 76.79695892333984,
258
+ "learning_rate": 3.839309787004584e-05,
259
+ "loss": 5.2035,
260
+ "step": 2800
261
+ },
262
+ {
263
+ "epoch": 1.9948334224122573,
264
+ "eval_loss": 0.11128760129213333,
265
+ "eval_runtime": 157.0495,
266
+ "eval_samples_per_second": 3.184,
267
+ "eval_steps_per_second": 0.401,
268
+ "eval_wer": 0.23946360153256704,
269
+ "step": 2800
270
  }
271
  ],
272
  "logging_steps": 100,
 
286
  "attributes": {}
287
  }
288
  },
289
+ "total_flos": 3.64112116973568e+18,
290
  "train_batch_size": 8,
291
  "trial_name": null,
292
  "trial_params": null