Baselhany commited on
Commit
d28c8a1
·
verified ·
1 Parent(s): 4fec15e

Training in progress, step 29000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e9c5b5c507c25afe3cf4d5aec82c7836af315794f646ccd78331b1ef69b9976
3
  size 223144592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec88174490cfeceb1eeed4a5fd12e3ce8c444f5222aa1c0363ca14c82aca47ab
3
  size 223144592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f37dd8023aeda17192fa3cd957ae713351d08b56ea14a8b05ae51168a5029d04
3
  size 281574266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a72f5c17a9f73a7ce9c5f0a27e6dbf2b25e9148a69728ce4404d21158eea6b0
3
  size 281574266
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2940908599d69a8410ea56cae7b048228912580fd3318d429840a0c58b1c925b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9b02826eed8232c5d6f49534a02071ef467235ea90c4dc0efe574539d40a7df
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fbd563d83932cc7d0da7780f68883324db1a55e2e28771f70432de2590ff8acb
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8138f4edda6774d231042061422daa95c14292f7d8422fb63a164ad1a3d05072
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a62d34f5456d530d45292bdc7a7b57a95952306ee03d49e82ed6ce0cb924518d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:632f25ed3184110f036602f4425c7c10f68cae7f3c7d7478f62304f907d5484f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 28000,
3
  "best_metric": 0.18110816386678455,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-28000",
5
- "epoch": 16.403046280023432,
6
  "eval_steps": 1000,
7
- "global_step": 28000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2220,6 +2220,85 @@
2220
  "eval_steps_per_second": 0.425,
2221
  "eval_wer": 0.18110816386678455,
2222
  "step": 28000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2223
  }
2224
  ],
2225
  "logging_steps": 100,
@@ -2239,7 +2318,7 @@
2239
  "attributes": {}
2240
  }
2241
  },
2242
- "total_flos": 3.642039874879488e+19,
2243
  "train_batch_size": 8,
2244
  "trial_name": null,
2245
  "trial_params": null
 
2
  "best_global_step": 28000,
3
  "best_metric": 0.18110816386678455,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-28000",
5
+ "epoch": 16.988869361452842,
6
  "eval_steps": 1000,
7
+ "global_step": 29000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2220
  "eval_steps_per_second": 0.425,
2221
  "eval_wer": 0.18110816386678455,
2222
  "step": 28000
2223
+ },
2224
+ {
2225
+ "epoch": 16.461628588166374,
2226
+ "grad_norm": 9.010910034179688,
2227
+ "learning_rate": 2.5554595995034276e-05,
2228
+ "loss": 0.7898,
2229
+ "step": 28100
2230
+ },
2231
+ {
2232
+ "epoch": 16.520210896309315,
2233
+ "grad_norm": 11.093189239501953,
2234
+ "learning_rate": 2.5284719598423923e-05,
2235
+ "loss": 0.7682,
2236
+ "step": 28200
2237
+ },
2238
+ {
2239
+ "epoch": 16.578793204452257,
2240
+ "grad_norm": 7.964006423950195,
2241
+ "learning_rate": 2.501484320181357e-05,
2242
+ "loss": 0.7481,
2243
+ "step": 28300
2244
+ },
2245
+ {
2246
+ "epoch": 16.637375512595195,
2247
+ "grad_norm": 10.423765182495117,
2248
+ "learning_rate": 2.474496680520322e-05,
2249
+ "loss": 0.7515,
2250
+ "step": 28400
2251
+ },
2252
+ {
2253
+ "epoch": 16.695957820738137,
2254
+ "grad_norm": 6.754664897918701,
2255
+ "learning_rate": 2.4475090408592866e-05,
2256
+ "loss": 0.8024,
2257
+ "step": 28500
2258
+ },
2259
+ {
2260
+ "epoch": 16.75454012888108,
2261
+ "grad_norm": 10.269820213317871,
2262
+ "learning_rate": 2.4205214011982513e-05,
2263
+ "loss": 0.7533,
2264
+ "step": 28600
2265
+ },
2266
+ {
2267
+ "epoch": 16.81312243702402,
2268
+ "grad_norm": 14.783397674560547,
2269
+ "learning_rate": 2.3935337615372163e-05,
2270
+ "loss": 0.7937,
2271
+ "step": 28700
2272
+ },
2273
+ {
2274
+ "epoch": 16.87170474516696,
2275
+ "grad_norm": 8.369039535522461,
2276
+ "learning_rate": 2.366546121876181e-05,
2277
+ "loss": 0.8246,
2278
+ "step": 28800
2279
+ },
2280
+ {
2281
+ "epoch": 16.9302870533099,
2282
+ "grad_norm": 8.320894241333008,
2283
+ "learning_rate": 2.3395584822151456e-05,
2284
+ "loss": 0.7703,
2285
+ "step": 28900
2286
+ },
2287
+ {
2288
+ "epoch": 16.988869361452842,
2289
+ "grad_norm": 10.491236686706543,
2290
+ "learning_rate": 2.3125708425541102e-05,
2291
+ "loss": 0.8261,
2292
+ "step": 29000
2293
+ },
2294
+ {
2295
+ "epoch": 16.988869361452842,
2296
+ "eval_loss": 0.0840698629617691,
2297
+ "eval_runtime": 145.4812,
2298
+ "eval_samples_per_second": 3.437,
2299
+ "eval_steps_per_second": 0.433,
2300
+ "eval_wer": 0.18493958149130563,
2301
+ "step": 29000
2302
  }
2303
  ],
2304
  "logging_steps": 100,
 
2318
  "attributes": {}
2319
  }
2320
  },
2321
+ "total_flos": 3.772122019135488e+19,
2322
  "train_batch_size": 8,
2323
  "trial_name": null,
2324
  "trial_params": null