Baselhany commited on
Commit
dfe4aff
·
verified ·
1 Parent(s): 3fc87f3

Training in progress, step 4000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69193aaf7ea71a75c6a53dabfa0a65ddf34dc8a499a127bc128b2aa53579f9fa
3
  size 223144592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03e9251a067bed69b21bc9690ad088ddf17d9ac223235998288adb3c1c31e717
3
  size 223144592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b8a538434890bc9b555c3d707ff536182a1ce9e72fd2075b1e4b25a5bfb7b54
3
  size 281574266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dd56076323262e092e8fb333e8cf217ac4791a1b18c896b25c674975daab33b
3
  size 281574266
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d30d045d4139d3f55dfce92596184610f9efa25b6cf0587ae9fb8624b44b114d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fca3a6c28d869556336316e3c340a17810e0e98c5f965a843b6c555529cd4e4
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8dfdc3004aaa3171852de1590839deeeabef4dbb22c233c90f7014a88b5dd61
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b18eb86bc2b5eae896b5ecef9def5e77dce376ad484fc085505ee2339d389cbd
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93bedcf8402af55b095bf6d5c557a90f995c7c5096c156990bbf9a11e0c8faf9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63b1d9922aa71edcb1afa04ca1f0f8d6e132dcc1b7ce072f4614fb8854bdfe62
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 3200,
3
- "best_metric": 0.22575891541408782,
4
- "best_model_checkpoint": "./distil-whisper/checkpoint-3200",
5
- "epoch": 2.564404061998931,
6
  "eval_steps": 400,
7
- "global_step": 3600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -341,6 +341,43 @@
341
  "eval_steps_per_second": 0.399,
342
  "eval_wer": 0.23887415266725612,
343
  "step": 3600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
  }
345
  ],
346
  "logging_steps": 100,
@@ -360,7 +397,7 @@
360
  "attributes": {}
361
  }
362
  },
363
- "total_flos": 4.68059945435136e+18,
364
  "train_batch_size": 8,
365
  "trial_name": null,
366
  "trial_params": null
 
1
  {
2
+ "best_global_step": 4000,
3
+ "best_metric": 0.22192749778956675,
4
+ "best_model_checkpoint": "./distil-whisper/checkpoint-4000",
5
+ "epoch": 2.849456618564048,
6
  "eval_steps": 400,
7
+ "global_step": 4000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
341
  "eval_steps_per_second": 0.399,
342
  "eval_wer": 0.23887415266725612,
343
  "step": 3600
344
+ },
345
+ {
346
+ "epoch": 2.63566720114021,
347
+ "grad_norm": 30.755069732666016,
348
+ "learning_rate": 1.4127797249932598e-05,
349
+ "loss": 4.1464,
350
+ "step": 3700
351
+ },
352
+ {
353
+ "epoch": 2.7069303402814895,
354
+ "grad_norm": 39.35337829589844,
355
+ "learning_rate": 1.1431652736586681e-05,
356
+ "loss": 4.0822,
357
+ "step": 3800
358
+ },
359
+ {
360
+ "epoch": 2.7781934794227685,
361
+ "grad_norm": 59.027923583984375,
362
+ "learning_rate": 8.735508223240766e-06,
363
+ "loss": 4.0861,
364
+ "step": 3900
365
+ },
366
+ {
367
+ "epoch": 2.849456618564048,
368
+ "grad_norm": 42.845191955566406,
369
+ "learning_rate": 6.03936370989485e-06,
370
+ "loss": 4.0903,
371
+ "step": 4000
372
+ },
373
+ {
374
+ "epoch": 2.849456618564048,
375
+ "eval_loss": 0.1093602254986763,
376
+ "eval_runtime": 154.6964,
377
+ "eval_samples_per_second": 3.232,
378
+ "eval_steps_per_second": 0.407,
379
+ "eval_wer": 0.22192749778956675,
380
+ "step": 4000
381
  }
382
  ],
383
  "logging_steps": 100,
 
397
  "attributes": {}
398
  }
399
  },
400
+ "total_flos": 5.20092803137536e+18,
401
  "train_batch_size": 8,
402
  "trial_name": null,
403
  "trial_params": null