mgh6 commited on
Commit
b61a9e7
·
verified ·
1 Parent(s): 39cba97

Training in progress, epoch 15, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e19f56a69991588cb05378ec22af5531354d51618ec45fbad1470b122a478388
3
  size 2682482800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66200acd6c20b3e5f776dd34d1dce04a462cff382acb5ae4670ee560518aaaf9
3
  size 2682482800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d70aa541c9d83a3fda726ee09a4073e42bbb9f06c77938a3a47dfd850c4aea4b
3
  size 5365108834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d55fa5ee514060d7787608dec3a888a028a7841ddb0bd524977d24948976b24e
3
  size 5365108834
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efabaf885831c104861d60c36c90e19e984eff0917f39feee04c399b9ccb139a
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc091a107ead0fd888fc4fea93ea268674953a0350c819218e401bfc73856e7d
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80c2e7e0c05972926e0ae596907bef103bc8973ac4008bedc0435f6468576df4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89399495261c74d8cccbea129b463394fdf91e1c31568f5b62fa945b79a3648c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 14.997999636297509,
5
  "eval_steps": 50,
6
- "global_step": 5145,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1537,6 +1537,111 @@
1537
  "eval_samples_per_second": 41.361,
1538
  "eval_steps_per_second": 20.681,
1539
  "step": 5100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1540
  }
1541
  ],
1542
  "logging_steps": 50,
@@ -1556,7 +1661,7 @@
1556
  "attributes": {}
1557
  }
1558
  },
1559
- "total_flos": 1.3400806487312302e+18,
1560
  "train_batch_size": 2,
1561
  "trial_name": null,
1562
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 15.997999636297509,
5
  "eval_steps": 50,
6
+ "global_step": 5488,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1537
  "eval_samples_per_second": 41.361,
1538
  "eval_steps_per_second": 20.681,
1539
  "step": 5100
1540
+ },
1541
+ {
1542
+ "epoch": 15.014548099654483,
1543
+ "grad_norm": 63.07834243774414,
1544
+ "learning_rate": 2.492711370262391e-05,
1545
+ "loss": 0.7085,
1546
+ "step": 5150
1547
+ },
1548
+ {
1549
+ "epoch": 15.014548099654483,
1550
+ "eval_loss": 0.6828727722167969,
1551
+ "eval_runtime": 117.6601,
1552
+ "eval_samples_per_second": 41.016,
1553
+ "eval_steps_per_second": 20.508,
1554
+ "step": 5150
1555
+ },
1556
+ {
1557
+ "epoch": 15.16002909619931,
1558
+ "grad_norm": 47.388702392578125,
1559
+ "learning_rate": 2.4198250728862976e-05,
1560
+ "loss": 0.6936,
1561
+ "step": 5200
1562
+ },
1563
+ {
1564
+ "epoch": 15.16002909619931,
1565
+ "eval_loss": 0.6810610294342041,
1566
+ "eval_runtime": 116.7247,
1567
+ "eval_samples_per_second": 41.345,
1568
+ "eval_steps_per_second": 20.673,
1569
+ "step": 5200
1570
+ },
1571
+ {
1572
+ "epoch": 15.305510092744136,
1573
+ "grad_norm": 52.29343032836914,
1574
+ "learning_rate": 2.3469387755102043e-05,
1575
+ "loss": 0.6988,
1576
+ "step": 5250
1577
+ },
1578
+ {
1579
+ "epoch": 15.305510092744136,
1580
+ "eval_loss": 0.6771013736724854,
1581
+ "eval_runtime": 116.5346,
1582
+ "eval_samples_per_second": 41.413,
1583
+ "eval_steps_per_second": 20.706,
1584
+ "step": 5250
1585
+ },
1586
+ {
1587
+ "epoch": 15.450991089288962,
1588
+ "grad_norm": 52.001590728759766,
1589
+ "learning_rate": 2.2740524781341106e-05,
1590
+ "loss": 0.6969,
1591
+ "step": 5300
1592
+ },
1593
+ {
1594
+ "epoch": 15.450991089288962,
1595
+ "eval_loss": 0.6796217560768127,
1596
+ "eval_runtime": 116.7294,
1597
+ "eval_samples_per_second": 41.343,
1598
+ "eval_steps_per_second": 20.672,
1599
+ "step": 5300
1600
+ },
1601
+ {
1602
+ "epoch": 15.596472085833788,
1603
+ "grad_norm": 64.73352813720703,
1604
+ "learning_rate": 2.2011661807580177e-05,
1605
+ "loss": 0.6955,
1606
+ "step": 5350
1607
+ },
1608
+ {
1609
+ "epoch": 15.596472085833788,
1610
+ "eval_loss": 0.6759679317474365,
1611
+ "eval_runtime": 116.3825,
1612
+ "eval_samples_per_second": 41.467,
1613
+ "eval_steps_per_second": 20.733,
1614
+ "step": 5350
1615
+ },
1616
+ {
1617
+ "epoch": 15.741953082378615,
1618
+ "grad_norm": 44.738739013671875,
1619
+ "learning_rate": 2.1282798833819244e-05,
1620
+ "loss": 0.6985,
1621
+ "step": 5400
1622
+ },
1623
+ {
1624
+ "epoch": 15.741953082378615,
1625
+ "eval_loss": 0.6740979552268982,
1626
+ "eval_runtime": 116.6554,
1627
+ "eval_samples_per_second": 41.37,
1628
+ "eval_steps_per_second": 20.685,
1629
+ "step": 5400
1630
+ },
1631
+ {
1632
+ "epoch": 15.887434078923441,
1633
+ "grad_norm": 97.4302749633789,
1634
+ "learning_rate": 2.055393586005831e-05,
1635
+ "loss": 0.6959,
1636
+ "step": 5450
1637
+ },
1638
+ {
1639
+ "epoch": 15.887434078923441,
1640
+ "eval_loss": 0.6725562810897827,
1641
+ "eval_runtime": 116.2903,
1642
+ "eval_samples_per_second": 41.5,
1643
+ "eval_steps_per_second": 20.75,
1644
+ "step": 5450
1645
  }
1646
  ],
1647
  "logging_steps": 50,
 
1661
  "attributes": {}
1662
  }
1663
  },
1664
+ "total_flos": 1.42943403473109e+18,
1665
  "train_batch_size": 2,
1666
  "trial_name": null,
1667
  "trial_params": null