Baselhany commited on
Commit
8b65a13
·
verified ·
1 Parent(s): 475b60b

Training in progress, step 14000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7060279d45e7459b967e15db711d8cd9a1a17424e52d7e90ff35cb16d2ba87fb
3
  size 223144592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df99a406fd4e7a9f486934f8fc91ca5fea5fbeda134839a6397edae2c28f810d
3
  size 223144592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79848332ea3785016fa03281822579bfa2a3e4281c3070d1112792454b5a75b5
3
  size 281574266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c89b7f6ce34f4634f681a539266445fc82cf8655d87d75223f7f3a5be780b8d
3
  size 281574266
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c380529cf06397e865a2ec4a90a8b0334d0b540ca21dea720996a4d58cbfe97
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4eb8b589bc0f9fc2ded989d3a40e07724301f383d8307d4ec1d41c66b9f6bfc8
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d13fec0d58a698a1588dfadf5d00d4fb69b490cb8fc2864fda3b2a2af6f17146
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe901f3488345253ab27036dffafec4bebac63e87632ab024a1c268eea13b76c
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45c8cf259ef238a9913feac351553475b48cc5d3f6153c06161cc934160ae3d6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7dd03a3ec49e4f6d8256968a34ec8adb177fe3d2cfce1011557149c70298286
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 13200,
3
  "best_metric": 0.20100206307102858,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-13200",
5
- "epoch": 9.692677712453234,
6
  "eval_steps": 400,
7
- "global_step": 13600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1266,6 +1266,43 @@
1266
  "eval_steps_per_second": 0.432,
1267
  "eval_wer": 0.20837017388741527,
1268
  "step": 13600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1269
  }
1270
  ],
1271
  "logging_steps": 100,
@@ -1285,7 +1322,7 @@
1285
  "attributes": {}
1286
  }
1287
  },
1288
- "total_flos": 1.768186261536768e+19,
1289
  "train_batch_size": 8,
1290
  "trial_name": null,
1291
  "trial_params": null
 
2
  "best_global_step": 13200,
3
  "best_metric": 0.20100206307102858,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-13200",
5
+ "epoch": 9.97773026901835,
6
  "eval_steps": 400,
7
+ "global_step": 14000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1266
  "eval_steps_per_second": 0.432,
1267
  "eval_wer": 0.20837017388741527,
1268
  "step": 13600
1269
+ },
1270
+ {
1271
+ "epoch": 9.763940851594512,
1272
+ "grad_norm": 26.37179183959961,
1273
+ "learning_rate": 2.549889135254989e-06,
1274
+ "loss": 1.546,
1275
+ "step": 13700
1276
+ },
1277
+ {
1278
+ "epoch": 9.835203990735792,
1279
+ "grad_norm": 17.592809677124023,
1280
+ "learning_rate": 1.8107908351810791e-06,
1281
+ "loss": 1.4815,
1282
+ "step": 13800
1283
+ },
1284
+ {
1285
+ "epoch": 9.906467129877072,
1286
+ "grad_norm": 14.37030029296875,
1287
+ "learning_rate": 1.0716925351071693e-06,
1288
+ "loss": 1.414,
1289
+ "step": 13900
1290
+ },
1291
+ {
1292
+ "epoch": 9.97773026901835,
1293
+ "grad_norm": 21.16905975341797,
1294
+ "learning_rate": 3.3259423503325944e-07,
1295
+ "loss": 1.4709,
1296
+ "step": 14000
1297
+ },
1298
+ {
1299
+ "epoch": 9.97773026901835,
1300
+ "eval_loss": 0.097068190574646,
1301
+ "eval_runtime": 145.6677,
1302
+ "eval_samples_per_second": 3.432,
1303
+ "eval_steps_per_second": 0.432,
1304
+ "eval_wer": 0.20439139404656645,
1305
+ "step": 14000
1306
  }
1307
  ],
1308
  "logging_steps": 100,
 
1322
  "attributes": {}
1323
  }
1324
  },
1325
+ "total_flos": 1.820219119239168e+19,
1326
  "train_batch_size": 8,
1327
  "trial_name": null,
1328
  "trial_params": null