Baselhany commited on
Commit
bee3bb0
·
verified ·
1 Parent(s): fb4d0cc

Training in progress, step 30000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3327b0b8af85925610a41cee7c7ae4963a7b7dde6cc08465efc2826ef5c0af2
3
  size 223144592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2105cf7a5c87ba982f87194bf389b220d816f9425c25ae107f042200ab4b16f2
3
  size 223144592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93bad43f4b8af45a608a4b225343bd214941a8efeb3c4c469a14c0c86c0ab957
3
  size 281574266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:051e18e97a9194053216c43776da56ec3eda3abdc1ab8afb6b1b39ca733cdb32
3
  size 281574266
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4b9421ad0d5b73228617c1cf302e1d33ef76aeb1e4831b98c6e9c5c8cf44bf4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b5cc2e059bf5994720338ae5c6e6dec69e298afa0568c75f8adaffc4768726d
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c416cfd8c4766d6cc7a298c9543da4fe2068ac18114961cd2b770fab5f0668d7
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ba137982c64fa8b963ffa3da35dfd85bc4ae996706d0bfce9f15caad6a862fa
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68304b2d0c95d2b28b071e7dc26d0215ae7596f8485d75f9bb15fd5d82eef471
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4690afe34793ca3a29158fb149768d32cc1f45ba8e1ff250dc324b500662d817
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 12000,
3
  "best_metric": 0.1958443854995579,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-12000",
5
- "epoch": 17.190278601066982,
6
  "eval_steps": 1000,
7
- "global_step": 29000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2299,6 +2299,85 @@
2299
  "eval_steps_per_second": 0.426,
2300
  "eval_wer": 0.1989389920424403,
2301
  "step": 29000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2302
  }
2303
  ],
2304
  "logging_steps": 100,
@@ -2318,7 +2397,7 @@
2318
  "attributes": {}
2319
  }
2320
  },
2321
- "total_flos": 3.771898440450048e+19,
2322
  "train_batch_size": 8,
2323
  "trial_name": null,
2324
  "trial_params": null
 
2
  "best_global_step": 12000,
3
  "best_metric": 0.1958443854995579,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-12000",
5
+ "epoch": 17.783046828689983,
6
  "eval_steps": 1000,
7
+ "global_step": 30000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2299
  "eval_steps_per_second": 0.426,
2300
  "eval_wer": 0.1989389920424403,
2301
  "step": 29000
2302
+ },
2303
+ {
2304
+ "epoch": 17.249555423829282,
2305
+ "grad_norm": 6.731088638305664,
2306
+ "learning_rate": 4.292506529163597e-06,
2307
+ "loss": 0.9345,
2308
+ "step": 29100
2309
+ },
2310
+ {
2311
+ "epoch": 17.30883224659158,
2312
+ "grad_norm": 12.75145435333252,
2313
+ "learning_rate": 3.95767762673274e-06,
2314
+ "loss": 0.8958,
2315
+ "step": 29200
2316
+ },
2317
+ {
2318
+ "epoch": 17.36810906935388,
2319
+ "grad_norm": 6.983468055725098,
2320
+ "learning_rate": 3.6228487243018817e-06,
2321
+ "loss": 0.8837,
2322
+ "step": 29300
2323
+ },
2324
+ {
2325
+ "epoch": 17.42738589211618,
2326
+ "grad_norm": 6.845942974090576,
2327
+ "learning_rate": 3.288019821871024e-06,
2328
+ "loss": 0.8323,
2329
+ "step": 29400
2330
+ },
2331
+ {
2332
+ "epoch": 17.48666271487848,
2333
+ "grad_norm": 7.515448570251465,
2334
+ "learning_rate": 2.9531909194401663e-06,
2335
+ "loss": 0.8625,
2336
+ "step": 29500
2337
+ },
2338
+ {
2339
+ "epoch": 17.545939537640784,
2340
+ "grad_norm": 6.841838836669922,
2341
+ "learning_rate": 2.6183620170093084e-06,
2342
+ "loss": 0.8454,
2343
+ "step": 29600
2344
+ },
2345
+ {
2346
+ "epoch": 17.605216360403084,
2347
+ "grad_norm": 16.295787811279297,
2348
+ "learning_rate": 2.2835331145784505e-06,
2349
+ "loss": 0.8664,
2350
+ "step": 29700
2351
+ },
2352
+ {
2353
+ "epoch": 17.664493183165384,
2354
+ "grad_norm": 7.235702991485596,
2355
+ "learning_rate": 1.9487042121475926e-06,
2356
+ "loss": 0.8713,
2357
+ "step": 29800
2358
+ },
2359
+ {
2360
+ "epoch": 17.723770005927683,
2361
+ "grad_norm": 7.121592998504639,
2362
+ "learning_rate": 1.6138753097167347e-06,
2363
+ "loss": 0.8724,
2364
+ "step": 29900
2365
+ },
2366
+ {
2367
+ "epoch": 17.783046828689983,
2368
+ "grad_norm": 7.38678503036499,
2369
+ "learning_rate": 1.279046407285877e-06,
2370
+ "loss": 0.8303,
2371
+ "step": 30000
2372
+ },
2373
+ {
2374
+ "epoch": 17.783046828689983,
2375
+ "eval_loss": 0.08706488460302353,
2376
+ "eval_runtime": 146.9212,
2377
+ "eval_samples_per_second": 3.403,
2378
+ "eval_steps_per_second": 0.429,
2379
+ "eval_wer": 0.20218096080165046,
2380
+ "step": 30000
2381
  }
2382
  ],
2383
  "logging_steps": 100,
 
2397
  "attributes": {}
2398
  }
2399
  },
2400
+ "total_flos": 3.901980584706048e+19,
2401
  "train_batch_size": 8,
2402
  "trial_name": null,
2403
  "trial_params": null