Baselhany commited on
Commit
e39a676
·
verified ·
1 Parent(s): e1b818a

Training in progress, step 24000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0bab03553a8f77e3f2e75a219bf665b1e186562895618ce139e5848329d72c0
3
  size 223144592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03838466207713eada91f2fe35ebdce379d78e8d934980b6a41a370b4b241fc4
3
  size 223144592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7360ae04b8536c8e7734abe26a3cb990db4674fc1a73c36fccd184dbe7d1cf2
3
  size 281574266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39381cb8b076341e0af800d6001aac00e14f538bf29b54baa76c4fc71f6897cf
3
  size 281574266
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88042024e6d5504b01726176e8796950dc6789ee4ca9b044a81439e6b5885a63
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a545a607a6cfc5a7cda494aeb18574f85c33430e90245257ba8c7ea46b1621b
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75504dd6b832b35e51d6c4316ef57c81197d178d6353c029e3628a6d2fc3a886
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40d683cb85559512f55ce631fbe47e10637f39943ba3b0a1a0e4edcbc56eadf7
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa8d73ca82fcf012384d060c02e2014b86a7e07a27ad6eb60874128d1cc9f9d5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b664452bdc5abce0bae066b21b5f9a31052c253dc280d18e5bee7ce919977cb2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 4000,
3
  "best_metric": 0.18950781019746538,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-4000",
5
- "epoch": 13.989330171902786,
6
  "eval_steps": 400,
7
- "global_step": 23600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2191,6 +2191,43 @@
2191
  "eval_steps_per_second": 0.421,
2192
  "eval_wer": 0.20100206307102858,
2193
  "step": 23600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2194
  }
2195
  ],
2196
  "logging_steps": 100,
@@ -2210,7 +2247,7 @@
2210
  "attributes": {}
2211
  }
2212
  },
2213
- "total_flos": 3.069568683343872e+19,
2214
  "train_batch_size": 8,
2215
  "trial_name": null,
2216
  "trial_params": null
 
2
  "best_global_step": 4000,
3
  "best_metric": 0.18950781019746538,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-4000",
5
+ "epoch": 14.226437462951985,
6
  "eval_steps": 400,
7
+ "global_step": 24000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2191
  "eval_steps_per_second": 0.421,
2192
  "eval_wer": 0.20100206307102858,
2193
  "step": 23600
2194
+ },
2195
+ {
2196
+ "epoch": 14.048606994665086,
2197
+ "grad_norm": 9.336543083190918,
2198
+ "learning_rate": 6.5269098971981455e-06,
2199
+ "loss": 0.9795,
2200
+ "step": 23700
2201
+ },
2202
+ {
2203
+ "epoch": 14.107883817427386,
2204
+ "grad_norm": 7.267581462860107,
2205
+ "learning_rate": 6.1237653698851035e-06,
2206
+ "loss": 0.9808,
2207
+ "step": 23800
2208
+ },
2209
+ {
2210
+ "epoch": 14.167160640189685,
2211
+ "grad_norm": 11.863088607788086,
2212
+ "learning_rate": 5.720620842572062e-06,
2213
+ "loss": 0.8712,
2214
+ "step": 23900
2215
+ },
2216
+ {
2217
+ "epoch": 14.226437462951985,
2218
+ "grad_norm": 9.668085098266602,
2219
+ "learning_rate": 5.317476315259021e-06,
2220
+ "loss": 0.9332,
2221
+ "step": 24000
2222
+ },
2223
+ {
2224
+ "epoch": 14.226437462951985,
2225
+ "eval_loss": 0.08777602016925812,
2226
+ "eval_runtime": 147.5257,
2227
+ "eval_samples_per_second": 3.389,
2228
+ "eval_steps_per_second": 0.427,
2229
+ "eval_wer": 0.19923371647509577,
2230
+ "step": 24000
2231
  }
2232
  ],
2233
  "logging_steps": 100,
 
2247
  "attributes": {}
2248
  }
2249
  },
2250
+ "total_flos": 3.121573085577216e+19,
2251
  "train_batch_size": 8,
2252
  "trial_name": null,
2253
  "trial_params": null