mgh6 commited on
Commit
be0df76
·
verified ·
1 Parent(s): a6d209a

Training in progress, epoch 13, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9273b0b5b72839b135e5ba3d001ba6d7289e641b1303d533d3f16dcafb32b4ac
3
  size 2682482800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e47f1cdc6bb30de3a42755c5e856705a84b91db75b58534b95cf4e4f4bf5059
3
  size 2682482800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f523e103b2aa541067a3a863044f1b45be43e5156c2986397eee07006ae38a5
3
  size 5365108834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12fb571ddaddabec29d8df0695efe3813dca0abe64dc223bcca27d75770eef43
3
  size 5365108834
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f1e7cc261bf54ef1e547376788ce8abb284b4d32ff4f5a42cd1d1aecd5e3670
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:613d74de19e0fb35eeea5689475f5e1332f3b307a0a9c8eeaa1e3e8d8c5fe1aa
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac558eb50fb0d07f8ffcf509322d18bbb69a3f968f832820a9d4adbe07047818
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:962ad854d2adfe879aa22d99e411b4b35f04f8c2df9821494bc2d1fe0b1197ed
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 12.997999636297509,
5
  "eval_steps": 50,
6
- "global_step": 4459,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1342,6 +1342,111 @@
1342
  "eval_samples_per_second": 41.463,
1343
  "eval_steps_per_second": 20.731,
1344
  "step": 4450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1345
  }
1346
  ],
1347
  "logging_steps": 50,
@@ -1361,7 +1466,7 @@
1361
  "attributes": {}
1362
  }
1363
  },
1364
- "total_flos": 1.1614435613679288e+18,
1365
  "train_batch_size": 2,
1366
  "trial_name": null,
1367
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 13.997999636297509,
5
  "eval_steps": 50,
6
+ "global_step": 4802,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1342
  "eval_samples_per_second": 41.463,
1343
  "eval_steps_per_second": 20.731,
1344
  "step": 4450
1345
+ },
1346
+ {
1347
+ "epoch": 13.119294417166758,
1348
+ "grad_norm": 64.73822021484375,
1349
+ "learning_rate": 3.4402332361516035e-05,
1350
+ "loss": 0.7291,
1351
+ "step": 4500
1352
+ },
1353
+ {
1354
+ "epoch": 13.119294417166758,
1355
+ "eval_loss": 0.705399751663208,
1356
+ "eval_runtime": 116.3819,
1357
+ "eval_samples_per_second": 41.467,
1358
+ "eval_steps_per_second": 20.733,
1359
+ "step": 4500
1360
+ },
1361
+ {
1362
+ "epoch": 13.264775413711584,
1363
+ "grad_norm": 48.628440856933594,
1364
+ "learning_rate": 3.36734693877551e-05,
1365
+ "loss": 0.726,
1366
+ "step": 4550
1367
+ },
1368
+ {
1369
+ "epoch": 13.264775413711584,
1370
+ "eval_loss": 0.6991727352142334,
1371
+ "eval_runtime": 116.4083,
1372
+ "eval_samples_per_second": 41.458,
1373
+ "eval_steps_per_second": 20.729,
1374
+ "step": 4550
1375
+ },
1376
+ {
1377
+ "epoch": 13.41025641025641,
1378
+ "grad_norm": 42.37076187133789,
1379
+ "learning_rate": 3.294460641399417e-05,
1380
+ "loss": 0.7257,
1381
+ "step": 4600
1382
+ },
1383
+ {
1384
+ "epoch": 13.41025641025641,
1385
+ "eval_loss": 0.6997016668319702,
1386
+ "eval_runtime": 116.654,
1387
+ "eval_samples_per_second": 41.37,
1388
+ "eval_steps_per_second": 20.685,
1389
+ "step": 4600
1390
+ },
1391
+ {
1392
+ "epoch": 13.555737406801237,
1393
+ "grad_norm": 54.22138977050781,
1394
+ "learning_rate": 3.221574344023324e-05,
1395
+ "loss": 0.721,
1396
+ "step": 4650
1397
+ },
1398
+ {
1399
+ "epoch": 13.555737406801237,
1400
+ "eval_loss": 0.6972126960754395,
1401
+ "eval_runtime": 116.6938,
1402
+ "eval_samples_per_second": 41.356,
1403
+ "eval_steps_per_second": 20.678,
1404
+ "step": 4650
1405
+ },
1406
+ {
1407
+ "epoch": 13.701218403346063,
1408
+ "grad_norm": 70.08407592773438,
1409
+ "learning_rate": 3.148688046647231e-05,
1410
+ "loss": 0.7219,
1411
+ "step": 4700
1412
+ },
1413
+ {
1414
+ "epoch": 13.701218403346063,
1415
+ "eval_loss": 0.697705864906311,
1416
+ "eval_runtime": 116.8913,
1417
+ "eval_samples_per_second": 41.286,
1418
+ "eval_steps_per_second": 20.643,
1419
+ "step": 4700
1420
+ },
1421
+ {
1422
+ "epoch": 13.84669939989089,
1423
+ "grad_norm": 59.16844177246094,
1424
+ "learning_rate": 3.0758017492711373e-05,
1425
+ "loss": 0.7206,
1426
+ "step": 4750
1427
+ },
1428
+ {
1429
+ "epoch": 13.84669939989089,
1430
+ "eval_loss": 0.6945058107376099,
1431
+ "eval_runtime": 117.2096,
1432
+ "eval_samples_per_second": 41.174,
1433
+ "eval_steps_per_second": 20.587,
1434
+ "step": 4750
1435
+ },
1436
+ {
1437
+ "epoch": 13.992180396435716,
1438
+ "grad_norm": 70.3475112915039,
1439
+ "learning_rate": 3.0029154518950437e-05,
1440
+ "loss": 0.7173,
1441
+ "step": 4800
1442
+ },
1443
+ {
1444
+ "epoch": 13.992180396435716,
1445
+ "eval_loss": 0.692059338092804,
1446
+ "eval_runtime": 116.7938,
1447
+ "eval_samples_per_second": 41.321,
1448
+ "eval_steps_per_second": 20.66,
1449
+ "step": 4800
1450
  }
1451
  ],
1452
  "logging_steps": 50,
 
1466
  "attributes": {}
1467
  }
1468
  },
1469
+ "total_flos": 1.250799370400432e+18,
1470
  "train_batch_size": 2,
1471
  "trial_name": null,
1472
  "trial_params": null