Baselhany commited on
Commit
4ceca94
·
verified ·
1 Parent(s): dd1d156

Training in progress, step 17000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3e3cd7750a71ba24a73246f69fdf7daec1177ea5853eea232a257d18883c36c
3
  size 223144592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41d75042039ec4d32876afbd52589e36b4560c805379cab200f3ce3db89e6b04
3
  size 223144592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1da40d74928c909c113322ad282f56feadf39a270ef5d886cdca23750487bd7a
3
  size 281574266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f06215d1e625b605955176334bf3e5f8df0e088cf46977fbc17da848149bd68
3
  size 281574266
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d89c80e2c9bcd130c179737ff89fd355d5633e2baca37c40e9a81d122a9d5d9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94266a02ae7a39354e0a8bc897bc6f4b9fd0120fa08f142ab755db626dfd5a68
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:281dc54ed0520d353628c056d22e94e782117e47679a149519cb09d64d5041fb
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8559dd5151eb91b4bc1c697f71b54aac56e37d941b8e4e5b6323525a70bc632e
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e08be80793261ac03b00ebc0b1eac4cdf6646c0ac612f93a04f7f6b012b5292c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50324fb65c01951e10fb31eef598ae7506e9fbf9305b5c6400eda15379c4a48e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 14000,
3
  "best_metric": 0.18538166814028884,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-14000",
5
- "epoch": 9.373169302870533,
6
  "eval_steps": 1000,
7
- "global_step": 16000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1272,6 +1272,85 @@
1272
  "eval_steps_per_second": 0.418,
1273
  "eval_wer": 0.19820218096080164,
1274
  "step": 16000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1275
  }
1276
  ],
1277
  "logging_steps": 100,
@@ -1291,7 +1370,7 @@
1291
  "attributes": {}
1292
  }
1293
  },
1294
- "total_flos": 2.081167965683712e+19,
1295
  "train_batch_size": 8,
1296
  "trial_name": null,
1297
  "trial_params": null
 
2
  "best_global_step": 14000,
3
  "best_metric": 0.18538166814028884,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-14000",
5
+ "epoch": 9.958992384299941,
6
  "eval_steps": 1000,
7
+ "global_step": 17000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1272
  "eval_steps_per_second": 0.418,
1273
  "eval_wer": 0.19820218096080164,
1274
  "step": 16000
1275
+ },
1276
+ {
1277
+ "epoch": 9.431751611013475,
1278
+ "grad_norm": 5.505492687225342,
1279
+ "learning_rate": 5.914302957151479e-06,
1280
+ "loss": 0.7991,
1281
+ "step": 16100
1282
+ },
1283
+ {
1284
+ "epoch": 9.490333919156415,
1285
+ "grad_norm": 6.690750598907471,
1286
+ "learning_rate": 5.310802655401328e-06,
1287
+ "loss": 0.8071,
1288
+ "step": 16200
1289
+ },
1290
+ {
1291
+ "epoch": 9.548916227299356,
1292
+ "grad_norm": 6.654877185821533,
1293
+ "learning_rate": 4.707302353651177e-06,
1294
+ "loss": 0.834,
1295
+ "step": 16300
1296
+ },
1297
+ {
1298
+ "epoch": 9.607498535442296,
1299
+ "grad_norm": 9.937077522277832,
1300
+ "learning_rate": 4.1038020519010266e-06,
1301
+ "loss": 0.8055,
1302
+ "step": 16400
1303
+ },
1304
+ {
1305
+ "epoch": 9.666080843585238,
1306
+ "grad_norm": 6.015642166137695,
1307
+ "learning_rate": 3.500301750150875e-06,
1308
+ "loss": 0.8734,
1309
+ "step": 16500
1310
+ },
1311
+ {
1312
+ "epoch": 9.724663151728178,
1313
+ "grad_norm": 12.434464454650879,
1314
+ "learning_rate": 2.896801448400724e-06,
1315
+ "loss": 0.8544,
1316
+ "step": 16600
1317
+ },
1318
+ {
1319
+ "epoch": 9.783245459871118,
1320
+ "grad_norm": 6.330708980560303,
1321
+ "learning_rate": 2.2933011466505732e-06,
1322
+ "loss": 0.798,
1323
+ "step": 16700
1324
+ },
1325
+ {
1326
+ "epoch": 9.84182776801406,
1327
+ "grad_norm": 5.820682048797607,
1328
+ "learning_rate": 1.6898008449004227e-06,
1329
+ "loss": 0.7989,
1330
+ "step": 16800
1331
+ },
1332
+ {
1333
+ "epoch": 9.900410076157002,
1334
+ "grad_norm": 8.209725379943848,
1335
+ "learning_rate": 1.0863005431502715e-06,
1336
+ "loss": 0.8454,
1337
+ "step": 16900
1338
+ },
1339
+ {
1340
+ "epoch": 9.958992384299941,
1341
+ "grad_norm": 10.676623344421387,
1342
+ "learning_rate": 4.828002414001208e-07,
1343
+ "loss": 0.8034,
1344
+ "step": 17000
1345
+ },
1346
+ {
1347
+ "epoch": 9.958992384299941,
1348
+ "eval_loss": 0.08400186896324158,
1349
+ "eval_runtime": 148.9273,
1350
+ "eval_samples_per_second": 3.357,
1351
+ "eval_steps_per_second": 0.423,
1352
+ "eval_wer": 0.197465369879163,
1353
+ "step": 17000
1354
  }
1355
  ],
1356
  "logging_steps": 100,
 
1370
  "attributes": {}
1371
  }
1372
  },
1373
+ "total_flos": 2.211250109939712e+19,
1374
  "train_batch_size": 8,
1375
  "trial_name": null,
1376
  "trial_params": null