ljcamargo commited on
Commit
4bb642e
·
verified ·
1 Parent(s): 79532a3

Training in progress, step 2400, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:801632b13dd650035b8637c9af213bc74194a9ef5cf8b6b65c2a509a34782c30
3
  size 3237829088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c8c03bddde1d45b42f156ee9380731c978903eeb514446180e27d130995337d
3
  size 3237829088
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2776246849e9530b854f7a1af3e71fc651a2697598de16a64d897fa8530e760
3
  size 2062272049
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a2d8576045f18ebc0d44a01c8bb87c6bcc68dc93cada4a8ed13a6805a28e50a
3
  size 2062272049
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6725ac8cfcdd5ed2e94a6dc5c8d88f80e593c5d3e8324e00ee31281fa51f86e
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdd2d9b8a329c8bdf157e8302d4758961f9a282c3f0127e29e492f0c374d2cc5
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f13dd54935d4d1876d05824ed5aab8e787b691f2aec583b5a7e328fd2bead633
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a82daf79aef8e8b5ecd74ff5d2377b7a09a1c4d4504ecc0c2a12006214be596b
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:207e07bf53e1f3c020ec2dfd378c4461a481edafdba7a64484be4547457af2b3
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f95bb9c4b14269e2ef89bd678ab3c3d4b5f143d243a24d6ece8108f7e85154f8
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.6026689625484287,
6
  "eval_steps": 300,
7
- "global_step": 2100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1478,6 +1478,216 @@
1478
  "learning_rate": 7.108156009412176e-05,
1479
  "loss": 0.7569,
1480
  "step": 2100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1481
  }
1482
  ],
1483
  "logging_steps": 10,
@@ -1497,7 +1707,7 @@
1497
  "attributes": {}
1498
  }
1499
  },
1500
- "total_flos": 8.5929538093056e+19,
1501
  "train_batch_size": 6,
1502
  "trial_name": null,
1503
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.6887645286267757,
6
  "eval_steps": 300,
7
+ "global_step": 2400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1478
  "learning_rate": 7.108156009412176e-05,
1479
  "loss": 0.7569,
1480
  "step": 2100
1481
+ },
1482
+ {
1483
+ "epoch": 0.6055388147510403,
1484
+ "grad_norm": 3.5824501514434814,
1485
+ "learning_rate": 7.02024198269733e-05,
1486
+ "loss": 0.7963,
1487
+ "step": 2110
1488
+ },
1489
+ {
1490
+ "epoch": 0.6084086669536519,
1491
+ "grad_norm": 8.07539176940918,
1492
+ "learning_rate": 6.932579980015618e-05,
1493
+ "loss": 0.8183,
1494
+ "step": 2120
1495
+ },
1496
+ {
1497
+ "epoch": 0.6112785191562634,
1498
+ "grad_norm": 5.9698615074157715,
1499
+ "learning_rate": 6.845177415704484e-05,
1500
+ "loss": 0.749,
1501
+ "step": 2130
1502
+ },
1503
+ {
1504
+ "epoch": 0.614148371358875,
1505
+ "grad_norm": 4.034762859344482,
1506
+ "learning_rate": 6.758041682158431e-05,
1507
+ "loss": 0.7853,
1508
+ "step": 2140
1509
+ },
1510
+ {
1511
+ "epoch": 0.6170182235614866,
1512
+ "grad_norm": 8.13531494140625,
1513
+ "learning_rate": 6.671180149203751e-05,
1514
+ "loss": 0.7871,
1515
+ "step": 2150
1516
+ },
1517
+ {
1518
+ "epoch": 0.6198880757640981,
1519
+ "grad_norm": 5.809640884399414,
1520
+ "learning_rate": 6.584600163475222e-05,
1521
+ "loss": 0.8037,
1522
+ "step": 2160
1523
+ },
1524
+ {
1525
+ "epoch": 0.6227579279667097,
1526
+ "grad_norm": 5.849427223205566,
1527
+ "learning_rate": 6.498309047794713e-05,
1528
+ "loss": 0.8076,
1529
+ "step": 2170
1530
+ },
1531
+ {
1532
+ "epoch": 0.6256277801693213,
1533
+ "grad_norm": 4.466967582702637,
1534
+ "learning_rate": 6.412314100551854e-05,
1535
+ "loss": 0.7863,
1536
+ "step": 2180
1537
+ },
1538
+ {
1539
+ "epoch": 0.6284976323719328,
1540
+ "grad_norm": 4.934723377227783,
1541
+ "learning_rate": 6.326622595086722e-05,
1542
+ "loss": 0.7747,
1543
+ "step": 2190
1544
+ },
1545
+ {
1546
+ "epoch": 0.6313674845745444,
1547
+ "grad_norm": 4.067635536193848,
1548
+ "learning_rate": 6.241241779074705e-05,
1549
+ "loss": 0.7804,
1550
+ "step": 2200
1551
+ },
1552
+ {
1553
+ "epoch": 0.634237336777156,
1554
+ "grad_norm": 4.629720687866211,
1555
+ "learning_rate": 6.156178873913468e-05,
1556
+ "loss": 0.7672,
1557
+ "step": 2210
1558
+ },
1559
+ {
1560
+ "epoch": 0.6371071889797676,
1561
+ "grad_norm": 3.9992971420288086,
1562
+ "learning_rate": 6.071441074112194e-05,
1563
+ "loss": 0.7856,
1564
+ "step": 2220
1565
+ },
1566
+ {
1567
+ "epoch": 0.6399770411823791,
1568
+ "grad_norm": 6.1507062911987305,
1569
+ "learning_rate": 5.9870355466830885e-05,
1570
+ "loss": 0.752,
1571
+ "step": 2230
1572
+ },
1573
+ {
1574
+ "epoch": 0.6428468933849907,
1575
+ "grad_norm": 4.305118083953857,
1576
+ "learning_rate": 5.902969430535186e-05,
1577
+ "loss": 0.7506,
1578
+ "step": 2240
1579
+ },
1580
+ {
1581
+ "epoch": 0.6457167455876023,
1582
+ "grad_norm": 3.7307469844818115,
1583
+ "learning_rate": 5.819249835870566e-05,
1584
+ "loss": 0.7744,
1585
+ "step": 2250
1586
+ },
1587
+ {
1588
+ "epoch": 0.6485865977902138,
1589
+ "grad_norm": 5.391602516174316,
1590
+ "learning_rate": 5.7358838435829664e-05,
1591
+ "loss": 0.8067,
1592
+ "step": 2260
1593
+ },
1594
+ {
1595
+ "epoch": 0.6514564499928254,
1596
+ "grad_norm": 4.221368789672852,
1597
+ "learning_rate": 5.6528785046589115e-05,
1598
+ "loss": 0.8257,
1599
+ "step": 2270
1600
+ },
1601
+ {
1602
+ "epoch": 0.654326302195437,
1603
+ "grad_norm": 5.274345397949219,
1604
+ "learning_rate": 5.570240839581323e-05,
1605
+ "loss": 0.7638,
1606
+ "step": 2280
1607
+ },
1608
+ {
1609
+ "epoch": 0.6571961543980485,
1610
+ "grad_norm": 4.528804779052734,
1611
+ "learning_rate": 5.487977837735756e-05,
1612
+ "loss": 0.7805,
1613
+ "step": 2290
1614
+ },
1615
+ {
1616
+ "epoch": 0.6600660066006601,
1617
+ "grad_norm": 4.387100696563721,
1618
+ "learning_rate": 5.406096456819234e-05,
1619
+ "loss": 0.7811,
1620
+ "step": 2300
1621
+ },
1622
+ {
1623
+ "epoch": 0.6629358588032717,
1624
+ "grad_norm": 5.64663028717041,
1625
+ "learning_rate": 5.324603622251797e-05,
1626
+ "loss": 0.771,
1627
+ "step": 2310
1628
+ },
1629
+ {
1630
+ "epoch": 0.6658057110058831,
1631
+ "grad_norm": 4.328652381896973,
1632
+ "learning_rate": 5.243506226590722e-05,
1633
+ "loss": 0.7711,
1634
+ "step": 2320
1635
+ },
1636
+ {
1637
+ "epoch": 0.6686755632084947,
1638
+ "grad_norm": 4.763848781585693,
1639
+ "learning_rate": 5.162811128947602e-05,
1640
+ "loss": 0.7849,
1641
+ "step": 2330
1642
+ },
1643
+ {
1644
+ "epoch": 0.6715454154111064,
1645
+ "grad_norm": 6.142160892486572,
1646
+ "learning_rate": 5.082525154408173e-05,
1647
+ "loss": 0.7587,
1648
+ "step": 2340
1649
+ },
1650
+ {
1651
+ "epoch": 0.6744152676137178,
1652
+ "grad_norm": 6.3459553718566895,
1653
+ "learning_rate": 5.002655093455086e-05,
1654
+ "loss": 0.7762,
1655
+ "step": 2350
1656
+ },
1657
+ {
1658
+ "epoch": 0.6772851198163294,
1659
+ "grad_norm": 5.520603656768799,
1660
+ "learning_rate": 4.9232077013935606e-05,
1661
+ "loss": 0.7854,
1662
+ "step": 2360
1663
+ },
1664
+ {
1665
+ "epoch": 0.680154972018941,
1666
+ "grad_norm": 3.9489786624908447,
1667
+ "learning_rate": 4.844189697780033e-05,
1668
+ "loss": 0.7599,
1669
+ "step": 2370
1670
+ },
1671
+ {
1672
+ "epoch": 0.6830248242215526,
1673
+ "grad_norm": 5.653624057769775,
1674
+ "learning_rate": 4.765607765853828e-05,
1675
+ "loss": 0.7875,
1676
+ "step": 2380
1677
+ },
1678
+ {
1679
+ "epoch": 0.6858946764241641,
1680
+ "grad_norm": 4.3883957862854,
1681
+ "learning_rate": 4.6874685519718945e-05,
1682
+ "loss": 0.7825,
1683
+ "step": 2390
1684
+ },
1685
+ {
1686
+ "epoch": 0.6887645286267757,
1687
+ "grad_norm": 3.743744134902954,
1688
+ "learning_rate": 4.60977866504668e-05,
1689
+ "loss": 0.7796,
1690
+ "step": 2400
1691
  }
1692
  ],
1693
  "logging_steps": 10,
 
1707
  "attributes": {}
1708
  }
1709
  },
1710
+ "total_flos": 9.8205186392064e+19,
1711
  "train_batch_size": 6,
1712
  "trial_name": null,
1713
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb8fac145ce6d3b844b04932d52e4aba260f48f6c9dc5ba626561ea49a834bfb
3
  size 6033
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e7bdac5d864a20d8b4fc428d3cfbb2f8cb185783eb905886cd482fff0f1081a
3
  size 6033