Baselhany commited on
Commit
8465281
·
verified ·
1 Parent(s): a90393d

Training in progress, step 51000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e1784f1d25af07b45e862c4a0f18d2f6d0549046f3f54afeb55faa861b3b968
3
  size 223144592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3319e82e3a312fa6faba8a237003ca93f3ecdd36457f19215227deb690e99af7
3
  size 223144592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c09d09bfb770e147d6891f3302213053e1a022a24af4a2d9430678fca53c5889
3
  size 281574266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa51d384b7f561bf1e67da73024206fffa4e8dda22efaad2fab35ebc2b490555
3
  size 281574266
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc163130310cc0fad2fddcc38714d51f6915d3f47e6c614dafc06aed2feeb570
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77f7aee25049de87d8cdf4f74b8e35dd47f80345ce100bfb1c453db7271d2eae
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4594d66c70672d97baa16e5f06b02f70c6e8738e3310267169ff9f456018b9c5
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa84d3b0b595baf750d6ab584421db48e3a346e0b07a20e76b7ce6ee4aa3ec8b
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97740748b07ae07da4d66ff3c5cb91ade92139aba050688e69efb5834ada4c0a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:242d37357dc73797e49eb5568ec9ca45b32042b44f1e8681ac659f27ecd237fb
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 28000,
3
  "best_metric": 0.18110816386678455,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-28000",
5
- "epoch": 29.291154071470416,
6
  "eval_steps": 1000,
7
- "global_step": 50000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3958,6 +3958,85 @@
3958
  "eval_steps_per_second": 0.431,
3959
  "eval_wer": 0.18773946360153257,
3960
  "step": 50000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3961
  }
3962
  ],
3963
  "logging_steps": 100,
@@ -3977,7 +4056,7 @@
3977
  "attributes": {}
3978
  }
3979
  },
3980
- "total_flos": 6.503635665027072e+19,
3981
  "train_batch_size": 8,
3982
  "trial_name": null,
3983
  "trial_params": null
 
2
  "best_global_step": 28000,
3
  "best_metric": 0.18110816386678455,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-28000",
5
+ "epoch": 29.876977152899823,
6
  "eval_steps": 1000,
7
+ "global_step": 51000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3958
  "eval_steps_per_second": 0.431,
3959
  "eval_wer": 0.18773946360153257,
3960
  "step": 50000
3961
+ },
3962
+ {
3963
+ "epoch": 29.349736379613358,
3964
+ "grad_norm": 5.5835652351379395,
3965
+ "learning_rate": 2.234273318872018e-06,
3966
+ "loss": 0.633,
3967
+ "step": 50100
3968
+ },
3969
+ {
3970
+ "epoch": 29.408318687756296,
3971
+ "grad_norm": 5.577920436859131,
3972
+ "learning_rate": 2.0390455531453363e-06,
3973
+ "loss": 0.6456,
3974
+ "step": 50200
3975
+ },
3976
+ {
3977
+ "epoch": 29.466900995899238,
3978
+ "grad_norm": 6.7036566734313965,
3979
+ "learning_rate": 1.8418457897850522e-06,
3980
+ "loss": 0.6582,
3981
+ "step": 50300
3982
+ },
3983
+ {
3984
+ "epoch": 29.52548330404218,
3985
+ "grad_norm": 8.638516426086426,
3986
+ "learning_rate": 1.6446460264247684e-06,
3987
+ "loss": 0.6469,
3988
+ "step": 50400
3989
+ },
3990
+ {
3991
+ "epoch": 29.58406561218512,
3992
+ "grad_norm": 7.847275257110596,
3993
+ "learning_rate": 1.4474462630644845e-06,
3994
+ "loss": 0.6314,
3995
+ "step": 50500
3996
+ },
3997
+ {
3998
+ "epoch": 29.64264792032806,
3999
+ "grad_norm": 4.795884609222412,
4000
+ "learning_rate": 1.2502464997042004e-06,
4001
+ "loss": 0.6355,
4002
+ "step": 50600
4003
+ },
4004
+ {
4005
+ "epoch": 29.701230228471,
4006
+ "grad_norm": 5.026218414306641,
4007
+ "learning_rate": 1.0530467363439164e-06,
4008
+ "loss": 0.6476,
4009
+ "step": 50700
4010
+ },
4011
+ {
4012
+ "epoch": 29.759812536613943,
4013
+ "grad_norm": 6.32857608795166,
4014
+ "learning_rate": 8.558469729836324e-07,
4015
+ "loss": 0.6515,
4016
+ "step": 50800
4017
+ },
4018
+ {
4019
+ "epoch": 29.818394844756885,
4020
+ "grad_norm": 8.374032974243164,
4021
+ "learning_rate": 6.586472096233485e-07,
4022
+ "loss": 0.6615,
4023
+ "step": 50900
4024
+ },
4025
+ {
4026
+ "epoch": 29.876977152899823,
4027
+ "grad_norm": 8.55248737335205,
4028
+ "learning_rate": 4.6144744626306455e-07,
4029
+ "loss": 0.6645,
4030
+ "step": 51000
4031
+ },
4032
+ {
4033
+ "epoch": 29.876977152899823,
4034
+ "eval_loss": 0.08314584940671921,
4035
+ "eval_runtime": 145.2309,
4036
+ "eval_samples_per_second": 3.443,
4037
+ "eval_steps_per_second": 0.434,
4038
+ "eval_wer": 0.18980253463012084,
4039
+ "step": 51000
4040
  }
4041
  ],
4042
  "logging_steps": 100,
 
4056
  "attributes": {}
4057
  }
4058
  },
4059
+ "total_flos": 6.633717809283072e+19,
4060
  "train_batch_size": 8,
4061
  "trial_name": null,
4062
  "trial_params": null