ljcamargo commited on
Commit
dd2f9e9
·
verified ·
1 Parent(s): 944c0b1

Training in progress, step 2100, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bea1b48825d279d5ca7532312e7e81957e535191d5f4e4e23c6756d53ffb5dc5
3
  size 3826461296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f44b60552954a3eb7b67cd9bb47c074f5bb2012c7a637b3c8cc3d9edcf8944b3
3
  size 3826461296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc649b7fa91947a37cd4744fb1a38adf59d9a1c0676e9bc59a750dc67ad53fa6
3
  size 2479955235
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6cc5b7d6a2fac731ee6f768114be3787bc9d22aa5f028f118b2c784c9ffca1f
3
  size 2479955235
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53febc76262518d0519b05d74ab6f65dd5851f3bbee84bc1c2b8f6935b1f50de
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f13dd54935d4d1876d05824ed5aab8e787b691f2aec583b5a7e328fd2bead633
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d68747f6eb2bb192bc48db140d8e66025b016a51ccd2dd4f8273e6973eed04b3
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f860d9af78aedd57dafcb10a7d7e5d5d6fe980f28aaf3455f7dda455f8cb9c1
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.8,
6
  "eval_steps": 500,
7
- "global_step": 2000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1408,6 +1408,76 @@
1408
  "learning_rate": 1.0128617363344052e-05,
1409
  "loss": 0.2778,
1410
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1411
  }
1412
  ],
1413
  "logging_steps": 10,
@@ -1427,7 +1497,7 @@
1427
  "attributes": {}
1428
  }
1429
  },
1430
- "total_flos": 3.604261231669248e+16,
1431
  "train_batch_size": 2,
1432
  "trial_name": null,
1433
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.84,
6
  "eval_steps": 500,
7
+ "global_step": 2100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1408
  "learning_rate": 1.0128617363344052e-05,
1409
  "loss": 0.2778,
1410
  "step": 2000
1411
+ },
1412
+ {
1413
+ "epoch": 0.804,
1414
+ "grad_norm": 6.987481594085693,
1415
+ "learning_rate": 9.927652733118971e-06,
1416
+ "loss": 0.2778,
1417
+ "step": 2010
1418
+ },
1419
+ {
1420
+ "epoch": 0.808,
1421
+ "grad_norm": 11.547746658325195,
1422
+ "learning_rate": 9.726688102893891e-06,
1423
+ "loss": 0.3181,
1424
+ "step": 2020
1425
+ },
1426
+ {
1427
+ "epoch": 0.812,
1428
+ "grad_norm": 7.187608242034912,
1429
+ "learning_rate": 9.525723472668812e-06,
1430
+ "loss": 0.3211,
1431
+ "step": 2030
1432
+ },
1433
+ {
1434
+ "epoch": 0.816,
1435
+ "grad_norm": 14.975872039794922,
1436
+ "learning_rate": 9.32475884244373e-06,
1437
+ "loss": 0.2335,
1438
+ "step": 2040
1439
+ },
1440
+ {
1441
+ "epoch": 0.82,
1442
+ "grad_norm": 5.20744514465332,
1443
+ "learning_rate": 9.123794212218651e-06,
1444
+ "loss": 0.3012,
1445
+ "step": 2050
1446
+ },
1447
+ {
1448
+ "epoch": 0.824,
1449
+ "grad_norm": 9.876429557800293,
1450
+ "learning_rate": 8.92282958199357e-06,
1451
+ "loss": 0.3095,
1452
+ "step": 2060
1453
+ },
1454
+ {
1455
+ "epoch": 0.828,
1456
+ "grad_norm": 7.847969055175781,
1457
+ "learning_rate": 8.72186495176849e-06,
1458
+ "loss": 0.3336,
1459
+ "step": 2070
1460
+ },
1461
+ {
1462
+ "epoch": 0.832,
1463
+ "grad_norm": 5.847342014312744,
1464
+ "learning_rate": 8.520900321543409e-06,
1465
+ "loss": 0.3471,
1466
+ "step": 2080
1467
+ },
1468
+ {
1469
+ "epoch": 0.836,
1470
+ "grad_norm": 12.866349220275879,
1471
+ "learning_rate": 8.319935691318329e-06,
1472
+ "loss": 0.5096,
1473
+ "step": 2090
1474
+ },
1475
+ {
1476
+ "epoch": 0.84,
1477
+ "grad_norm": 5.676148891448975,
1478
+ "learning_rate": 8.118971061093248e-06,
1479
+ "loss": 0.3028,
1480
+ "step": 2100
1481
  }
1482
  ],
1483
  "logging_steps": 10,
 
1497
  "attributes": {}
1498
  }
1499
  },
1500
+ "total_flos": 3.789296334928282e+16,
1501
  "train_batch_size": 2,
1502
  "trial_name": null,
1503
  "trial_params": null