Baselhany commited on
Commit
75a8a65
·
verified ·
1 Parent(s): 370eea0

Training in progress, step 30000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec88174490cfeceb1eeed4a5fd12e3ce8c444f5222aa1c0363ca14c82aca47ab
3
  size 223144592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aac9fb703442293b19e405bf4164788bf426a64df5cc82596cf5e220aaa680b7
3
  size 223144592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a72f5c17a9f73a7ce9c5f0a27e6dbf2b25e9148a69728ce4404d21158eea6b0
3
  size 281574266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d2a33825f6a5fff2b41693fbcc0f91ce6163ef73cdbd1c03e6554d0b82c5f42
3
  size 281574266
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9b02826eed8232c5d6f49534a02071ef467235ea90c4dc0efe574539d40a7df
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b5cc2e059bf5994720338ae5c6e6dec69e298afa0568c75f8adaffc4768726d
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8138f4edda6774d231042061422daa95c14292f7d8422fb63a164ad1a3d05072
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6d46aec3833efc2c92b1486514727c87c4f32a04aeafab142f4a8c156f76f8d
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:632f25ed3184110f036602f4425c7c10f68cae7f3c7d7478f62304f907d5484f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ba53b40b2a30a25e436425c9258142131e66d9a345f8a16e78444d8fcb29696
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 28000,
3
  "best_metric": 0.18110816386678455,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-28000",
5
- "epoch": 16.988869361452842,
6
  "eval_steps": 1000,
7
- "global_step": 29000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2299,6 +2299,85 @@
2299
  "eval_steps_per_second": 0.433,
2300
  "eval_wer": 0.18493958149130563,
2301
  "step": 29000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2302
  }
2303
  ],
2304
  "logging_steps": 100,
@@ -2318,7 +2397,7 @@
2318
  "attributes": {}
2319
  }
2320
  },
2321
- "total_flos": 3.772122019135488e+19,
2322
  "train_batch_size": 8,
2323
  "trial_name": null,
2324
  "trial_params": null
 
2
  "best_global_step": 28000,
3
  "best_metric": 0.18110816386678455,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-28000",
5
+ "epoch": 17.57469244288225,
6
  "eval_steps": 1000,
7
+ "global_step": 30000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2299
  "eval_steps_per_second": 0.433,
2300
  "eval_wer": 0.18493958149130563,
2301
  "step": 29000
2302
+ },
2303
+ {
2304
+ "epoch": 17.047451669595784,
2305
+ "grad_norm": 6.7529377937316895,
2306
+ "learning_rate": 2.2855832028930752e-05,
2307
+ "loss": 0.8091,
2308
+ "step": 29100
2309
+ },
2310
+ {
2311
+ "epoch": 17.106033977738722,
2312
+ "grad_norm": 6.652218341827393,
2313
+ "learning_rate": 2.25859556323204e-05,
2314
+ "loss": 0.7695,
2315
+ "step": 29200
2316
+ },
2317
+ {
2318
+ "epoch": 17.164616285881664,
2319
+ "grad_norm": 7.654794692993164,
2320
+ "learning_rate": 2.2316079235710046e-05,
2321
+ "loss": 0.7926,
2322
+ "step": 29300
2323
+ },
2324
+ {
2325
+ "epoch": 17.223198594024606,
2326
+ "grad_norm": 8.277422904968262,
2327
+ "learning_rate": 2.2046202839099696e-05,
2328
+ "loss": 0.7702,
2329
+ "step": 29400
2330
+ },
2331
+ {
2332
+ "epoch": 17.281780902167544,
2333
+ "grad_norm": 15.797304153442383,
2334
+ "learning_rate": 2.1776326442489342e-05,
2335
+ "loss": 0.7597,
2336
+ "step": 29500
2337
+ },
2338
+ {
2339
+ "epoch": 17.340363210310485,
2340
+ "grad_norm": 6.769285202026367,
2341
+ "learning_rate": 2.150645004587899e-05,
2342
+ "loss": 0.7588,
2343
+ "step": 29600
2344
+ },
2345
+ {
2346
+ "epoch": 17.398945518453427,
2347
+ "grad_norm": 8.328302383422852,
2348
+ "learning_rate": 2.123657364926864e-05,
2349
+ "loss": 0.7719,
2350
+ "step": 29700
2351
+ },
2352
+ {
2353
+ "epoch": 17.45752782659637,
2354
+ "grad_norm": 5.7514190673828125,
2355
+ "learning_rate": 2.0966697252658282e-05,
2356
+ "loss": 0.7985,
2357
+ "step": 29800
2358
+ },
2359
+ {
2360
+ "epoch": 17.516110134739307,
2361
+ "grad_norm": 5.553383827209473,
2362
+ "learning_rate": 2.069682085604793e-05,
2363
+ "loss": 0.7602,
2364
+ "step": 29900
2365
+ },
2366
+ {
2367
+ "epoch": 17.57469244288225,
2368
+ "grad_norm": 7.833782196044922,
2369
+ "learning_rate": 2.042694445943758e-05,
2370
+ "loss": 0.7531,
2371
+ "step": 30000
2372
+ },
2373
+ {
2374
+ "epoch": 17.57469244288225,
2375
+ "eval_loss": 0.08400005102157593,
2376
+ "eval_runtime": 147.1645,
2377
+ "eval_samples_per_second": 3.398,
2378
+ "eval_steps_per_second": 0.428,
2379
+ "eval_wer": 0.18670792808723843,
2380
+ "step": 30000
2381
  }
2382
  ],
2383
  "logging_steps": 100,
 
2397
  "attributes": {}
2398
  }
2399
  },
2400
+ "total_flos": 3.902187903123456e+19,
2401
  "train_batch_size": 8,
2402
  "trial_name": null,
2403
  "trial_params": null