Baselhany commited on
Commit
17d7f1b
·
verified ·
1 Parent(s): 678951e

Training in progress, step 3200, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07cc1e0dffcd80eac14f87d0d9619540d3f5cf1771ecd556cf377b27a876168c
3
  size 223144592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77c42fb89ca5e7eeacd8dee347614384694d8e88a850be6f81c4be67614baaea
3
  size 223144592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4654516b018d8c1d8ee7f380300e2cfab5a3d33f9a966babd44885140fecda5d
3
  size 281574266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d33bb8913e40252c03691681597e0dda6da0e5d3ecd015fac2bf0734e9edc3fd
3
  size 281574266
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9590931db28029d3072811ed01a2ef3a2eb67fd96a33277147916dc1a126b69f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0b9074bf4ad76d06be1f1774df5d1b38e357952bea3c631cc895318d367b54d
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a378d0eab4cd862a8b75ae52dc72877722c788e485acf876520c8995544ef8d
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:124d751b7518ff7dfcb4911295c0150ebaf171d8ca363a18657a8580b6e7df96
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0d22202ff71180e976c20ff3356dc9e86ce8ae5966c95daf7b725e761a03663
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0560bb7d987b0d9f7026aea630b8dd8d632a86b445987fc463b416ee149d3af2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 2400,
3
- "best_metric": 0.2371058060713233,
4
- "best_model_checkpoint": "./distil-whisper/checkpoint-2400",
5
- "epoch": 1.9948334224122573,
6
  "eval_steps": 400,
7
- "global_step": 2800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -267,6 +267,43 @@
267
  "eval_steps_per_second": 0.401,
268
  "eval_wer": 0.23946360153256704,
269
  "step": 2800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  }
271
  ],
272
  "logging_steps": 100,
@@ -286,7 +323,7 @@
286
  "attributes": {}
287
  }
288
  },
289
- "total_flos": 3.64112116973568e+18,
290
  "train_batch_size": 8,
291
  "trial_name": null,
292
  "trial_params": null
 
1
  {
2
+ "best_global_step": 3200,
3
+ "best_metric": 0.22575891541408782,
4
+ "best_model_checkpoint": "./distil-whisper/checkpoint-3200",
5
+ "epoch": 2.2793515054338145,
6
  "eval_steps": 400,
7
+ "global_step": 3200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
267
  "eval_steps_per_second": 0.401,
268
  "eval_wer": 0.23946360153256704,
269
  "step": 2800
270
+ },
271
+ {
272
+ "epoch": 2.065562088009977,
273
+ "grad_norm": 41.731910705566406,
274
+ "learning_rate": 3.569695335669992e-05,
275
+ "loss": 4.2295,
276
+ "step": 2900
277
+ },
278
+ {
279
+ "epoch": 2.136825227151256,
280
+ "grad_norm": 67.33615112304688,
281
+ "learning_rate": 3.3000808843354005e-05,
282
+ "loss": 4.2595,
283
+ "step": 3000
284
+ },
285
+ {
286
+ "epoch": 2.208088366292535,
287
+ "grad_norm": 94.42093658447266,
288
+ "learning_rate": 3.030466433000809e-05,
289
+ "loss": 4.8801,
290
+ "step": 3100
291
+ },
292
+ {
293
+ "epoch": 2.2793515054338145,
294
+ "grad_norm": 31.315675735473633,
295
+ "learning_rate": 2.7608519816662175e-05,
296
+ "loss": 4.7207,
297
+ "step": 3200
298
+ },
299
+ {
300
+ "epoch": 2.2793515054338145,
301
+ "eval_loss": 0.11118239909410477,
302
+ "eval_runtime": 154.6651,
303
+ "eval_samples_per_second": 3.233,
304
+ "eval_steps_per_second": 0.407,
305
+ "eval_wer": 0.22575891541408782,
306
+ "step": 3200
307
  }
308
  ],
309
  "logging_steps": 100,
 
323
  "attributes": {}
324
  }
325
  },
326
+ "total_flos": 4.16027087732736e+18,
327
  "train_batch_size": 8,
328
  "trial_name": null,
329
  "trial_params": null