CocoRoF commited on
Commit
c97d175
·
verified ·
1 Parent(s): 31d40d6

Training in progress, step 2500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8691ff71b090554b749389b543d86609cfacdab476bf9bcda11ba39222817d0d
3
  size 737580392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d26418e85327bf4072ba76aa01dde18a1716eaa86660c26fb299ceaf6d71c5cd
3
  size 737580392
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25031bd9459b2ecca16cfe1a60ba2badffd9564c789d995f26a7db2a1e4a8e67
3
  size 1475248442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6df5d87deb87ff09bd2a9c94e587bd25dd1604f3de6ca09e11fd8c4b22cb9b3a
3
  size 1475248442
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:835f869ea325fd6edf27b48b589309fb66641cb92b45f2fc13d1bb6e8814106c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c733f603eddfbf8aea2db46dfb96d2d44052ea3c8d772ba82a9011002700581
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db7eaec1ca57d40804cf0ff2a2ceba3277ffbb4ac8dc6548b227514adb077117
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05888e792c6fd0be576ebc92f377eec054b707510467523fa4c87d94e18c0540
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9372071227741331,
5
  "eval_steps": 2.0,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1407,6 +1407,356 @@
1407
  "learning_rate": 2.8828491096532332e-05,
1408
  "loss": 0.3261,
1409
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1410
  }
1411
  ],
1412
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.1715089034676662,
5
  "eval_steps": 2.0,
6
+ "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1407
  "learning_rate": 2.8828491096532332e-05,
1408
  "loss": 0.3261,
1409
  "step": 2000
1410
+ },
1411
+ {
1412
+ "epoch": 0.9418931583880038,
1413
+ "grad_norm": 1.3600877523422241,
1414
+ "learning_rate": 2.8822633552014998e-05,
1415
+ "loss": 0.2581,
1416
+ "step": 2010
1417
+ },
1418
+ {
1419
+ "epoch": 0.9465791940018744,
1420
+ "grad_norm": 2.0142452716827393,
1421
+ "learning_rate": 2.8816776007497657e-05,
1422
+ "loss": 0.2926,
1423
+ "step": 2020
1424
+ },
1425
+ {
1426
+ "epoch": 0.9512652296157451,
1427
+ "grad_norm": 2.2205429077148438,
1428
+ "learning_rate": 2.881091846298032e-05,
1429
+ "loss": 0.2978,
1430
+ "step": 2030
1431
+ },
1432
+ {
1433
+ "epoch": 0.9559512652296157,
1434
+ "grad_norm": 1.5943437814712524,
1435
+ "learning_rate": 2.880506091846298e-05,
1436
+ "loss": 0.2801,
1437
+ "step": 2040
1438
+ },
1439
+ {
1440
+ "epoch": 0.9606373008434864,
1441
+ "grad_norm": 1.8343549966812134,
1442
+ "learning_rate": 2.8799203373945644e-05,
1443
+ "loss": 0.3144,
1444
+ "step": 2050
1445
+ },
1446
+ {
1447
+ "epoch": 0.9653233364573571,
1448
+ "grad_norm": 2.438775062561035,
1449
+ "learning_rate": 2.8793345829428303e-05,
1450
+ "loss": 0.3031,
1451
+ "step": 2060
1452
+ },
1453
+ {
1454
+ "epoch": 0.9700093720712277,
1455
+ "grad_norm": 1.6122407913208008,
1456
+ "learning_rate": 2.8787488284910966e-05,
1457
+ "loss": 0.3102,
1458
+ "step": 2070
1459
+ },
1460
+ {
1461
+ "epoch": 0.9746954076850984,
1462
+ "grad_norm": 1.8684320449829102,
1463
+ "learning_rate": 2.8781630740393625e-05,
1464
+ "loss": 0.3064,
1465
+ "step": 2080
1466
+ },
1467
+ {
1468
+ "epoch": 0.979381443298969,
1469
+ "grad_norm": 1.4418230056762695,
1470
+ "learning_rate": 2.877577319587629e-05,
1471
+ "loss": 0.2812,
1472
+ "step": 2090
1473
+ },
1474
+ {
1475
+ "epoch": 0.9840674789128397,
1476
+ "grad_norm": 1.684371829032898,
1477
+ "learning_rate": 2.8769915651358953e-05,
1478
+ "loss": 0.2871,
1479
+ "step": 2100
1480
+ },
1481
+ {
1482
+ "epoch": 0.9887535145267105,
1483
+ "grad_norm": 1.8952980041503906,
1484
+ "learning_rate": 2.8764058106841612e-05,
1485
+ "loss": 0.3098,
1486
+ "step": 2110
1487
+ },
1488
+ {
1489
+ "epoch": 0.993439550140581,
1490
+ "grad_norm": 1.9636595249176025,
1491
+ "learning_rate": 2.8758200562324274e-05,
1492
+ "loss": 0.2493,
1493
+ "step": 2120
1494
+ },
1495
+ {
1496
+ "epoch": 0.9981255857544518,
1497
+ "grad_norm": 1.9494707584381104,
1498
+ "learning_rate": 2.8752343017806937e-05,
1499
+ "loss": 0.304,
1500
+ "step": 2130
1501
+ },
1502
+ {
1503
+ "epoch": 1.0028116213683225,
1504
+ "grad_norm": 1.5540398359298706,
1505
+ "learning_rate": 2.87464854732896e-05,
1506
+ "loss": 0.2886,
1507
+ "step": 2140
1508
+ },
1509
+ {
1510
+ "epoch": 1.007497656982193,
1511
+ "grad_norm": 1.4713643789291382,
1512
+ "learning_rate": 2.8740627928772258e-05,
1513
+ "loss": 0.2194,
1514
+ "step": 2150
1515
+ },
1516
+ {
1517
+ "epoch": 1.0121836925960637,
1518
+ "grad_norm": 1.5880275964736938,
1519
+ "learning_rate": 2.8734770384254924e-05,
1520
+ "loss": 0.208,
1521
+ "step": 2160
1522
+ },
1523
+ {
1524
+ "epoch": 1.0168697282099344,
1525
+ "grad_norm": 1.8418326377868652,
1526
+ "learning_rate": 2.8728912839737583e-05,
1527
+ "loss": 0.2111,
1528
+ "step": 2170
1529
+ },
1530
+ {
1531
+ "epoch": 1.021555763823805,
1532
+ "grad_norm": 1.6184002161026,
1533
+ "learning_rate": 2.8723055295220245e-05,
1534
+ "loss": 0.223,
1535
+ "step": 2180
1536
+ },
1537
+ {
1538
+ "epoch": 1.0262417994376758,
1539
+ "grad_norm": 1.714209794998169,
1540
+ "learning_rate": 2.8717197750702904e-05,
1541
+ "loss": 0.2069,
1542
+ "step": 2190
1543
+ },
1544
+ {
1545
+ "epoch": 1.0309278350515463,
1546
+ "grad_norm": 1.788393497467041,
1547
+ "learning_rate": 2.871134020618557e-05,
1548
+ "loss": 0.1906,
1549
+ "step": 2200
1550
+ },
1551
+ {
1552
+ "epoch": 1.035613870665417,
1553
+ "grad_norm": 1.3916633129119873,
1554
+ "learning_rate": 2.870548266166823e-05,
1555
+ "loss": 0.2254,
1556
+ "step": 2210
1557
+ },
1558
+ {
1559
+ "epoch": 1.0402999062792877,
1560
+ "grad_norm": 1.4615910053253174,
1561
+ "learning_rate": 2.869962511715089e-05,
1562
+ "loss": 0.2189,
1563
+ "step": 2220
1564
+ },
1565
+ {
1566
+ "epoch": 1.0449859418931584,
1567
+ "grad_norm": 1.9119755029678345,
1568
+ "learning_rate": 2.869376757263355e-05,
1569
+ "loss": 0.2505,
1570
+ "step": 2230
1571
+ },
1572
+ {
1573
+ "epoch": 1.0496719775070291,
1574
+ "grad_norm": 1.8039140701293945,
1575
+ "learning_rate": 2.8687910028116216e-05,
1576
+ "loss": 0.2127,
1577
+ "step": 2240
1578
+ },
1579
+ {
1580
+ "epoch": 1.0543580131208996,
1581
+ "grad_norm": 1.394958734512329,
1582
+ "learning_rate": 2.8682052483598875e-05,
1583
+ "loss": 0.2105,
1584
+ "step": 2250
1585
+ },
1586
+ {
1587
+ "epoch": 1.0590440487347703,
1588
+ "grad_norm": 1.660672903060913,
1589
+ "learning_rate": 2.8676194939081537e-05,
1590
+ "loss": 0.2337,
1591
+ "step": 2260
1592
+ },
1593
+ {
1594
+ "epoch": 1.063730084348641,
1595
+ "grad_norm": 1.7532225847244263,
1596
+ "learning_rate": 2.8670337394564196e-05,
1597
+ "loss": 0.2435,
1598
+ "step": 2270
1599
+ },
1600
+ {
1601
+ "epoch": 1.0684161199625117,
1602
+ "grad_norm": 1.5861754417419434,
1603
+ "learning_rate": 2.8664479850046862e-05,
1604
+ "loss": 0.2258,
1605
+ "step": 2280
1606
+ },
1607
+ {
1608
+ "epoch": 1.0731021555763824,
1609
+ "grad_norm": 2.068645477294922,
1610
+ "learning_rate": 2.865862230552952e-05,
1611
+ "loss": 0.2332,
1612
+ "step": 2290
1613
+ },
1614
+ {
1615
+ "epoch": 1.077788191190253,
1616
+ "grad_norm": 1.5656896829605103,
1617
+ "learning_rate": 2.8652764761012184e-05,
1618
+ "loss": 0.226,
1619
+ "step": 2300
1620
+ },
1621
+ {
1622
+ "epoch": 1.0824742268041236,
1623
+ "grad_norm": 1.4442079067230225,
1624
+ "learning_rate": 2.8646907216494846e-05,
1625
+ "loss": 0.2371,
1626
+ "step": 2310
1627
+ },
1628
+ {
1629
+ "epoch": 1.0871602624179943,
1630
+ "grad_norm": 1.9563679695129395,
1631
+ "learning_rate": 2.864104967197751e-05,
1632
+ "loss": 0.1806,
1633
+ "step": 2320
1634
+ },
1635
+ {
1636
+ "epoch": 1.091846298031865,
1637
+ "grad_norm": 2.3428823947906494,
1638
+ "learning_rate": 2.863519212746017e-05,
1639
+ "loss": 0.2078,
1640
+ "step": 2330
1641
+ },
1642
+ {
1643
+ "epoch": 1.0965323336457358,
1644
+ "grad_norm": 1.921542763710022,
1645
+ "learning_rate": 2.862933458294283e-05,
1646
+ "loss": 0.1881,
1647
+ "step": 2340
1648
+ },
1649
+ {
1650
+ "epoch": 1.1012183692596063,
1651
+ "grad_norm": 2.0102312564849854,
1652
+ "learning_rate": 2.8623477038425496e-05,
1653
+ "loss": 0.2039,
1654
+ "step": 2350
1655
+ },
1656
+ {
1657
+ "epoch": 1.105904404873477,
1658
+ "grad_norm": 2.0634419918060303,
1659
+ "learning_rate": 2.8617619493908155e-05,
1660
+ "loss": 0.2375,
1661
+ "step": 2360
1662
+ },
1663
+ {
1664
+ "epoch": 1.1105904404873477,
1665
+ "grad_norm": 1.7795848846435547,
1666
+ "learning_rate": 2.8611761949390817e-05,
1667
+ "loss": 0.1827,
1668
+ "step": 2370
1669
+ },
1670
+ {
1671
+ "epoch": 1.1152764761012184,
1672
+ "grad_norm": 1.6190413236618042,
1673
+ "learning_rate": 2.8605904404873476e-05,
1674
+ "loss": 0.2205,
1675
+ "step": 2380
1676
+ },
1677
+ {
1678
+ "epoch": 1.119962511715089,
1679
+ "grad_norm": 1.917844295501709,
1680
+ "learning_rate": 2.8600046860356142e-05,
1681
+ "loss": 0.2144,
1682
+ "step": 2390
1683
+ },
1684
+ {
1685
+ "epoch": 1.1246485473289598,
1686
+ "grad_norm": 1.9821206331253052,
1687
+ "learning_rate": 2.85941893158388e-05,
1688
+ "loss": 0.2305,
1689
+ "step": 2400
1690
+ },
1691
+ {
1692
+ "epoch": 1.1293345829428303,
1693
+ "grad_norm": 1.897702932357788,
1694
+ "learning_rate": 2.8588331771321463e-05,
1695
+ "loss": 0.2189,
1696
+ "step": 2410
1697
+ },
1698
+ {
1699
+ "epoch": 1.134020618556701,
1700
+ "grad_norm": 1.575378179550171,
1701
+ "learning_rate": 2.8582474226804122e-05,
1702
+ "loss": 0.2072,
1703
+ "step": 2420
1704
+ },
1705
+ {
1706
+ "epoch": 1.1387066541705717,
1707
+ "grad_norm": 1.5781910419464111,
1708
+ "learning_rate": 2.8576616682286788e-05,
1709
+ "loss": 0.2167,
1710
+ "step": 2430
1711
+ },
1712
+ {
1713
+ "epoch": 1.1433926897844424,
1714
+ "grad_norm": 1.253761887550354,
1715
+ "learning_rate": 2.8570759137769447e-05,
1716
+ "loss": 0.2054,
1717
+ "step": 2440
1718
+ },
1719
+ {
1720
+ "epoch": 1.148078725398313,
1721
+ "grad_norm": 1.4306617975234985,
1722
+ "learning_rate": 2.856490159325211e-05,
1723
+ "loss": 0.2507,
1724
+ "step": 2450
1725
+ },
1726
+ {
1727
+ "epoch": 1.1527647610121836,
1728
+ "grad_norm": 1.815042495727539,
1729
+ "learning_rate": 2.855904404873477e-05,
1730
+ "loss": 0.1929,
1731
+ "step": 2460
1732
+ },
1733
+ {
1734
+ "epoch": 1.1574507966260543,
1735
+ "grad_norm": 1.771567940711975,
1736
+ "learning_rate": 2.8553186504217434e-05,
1737
+ "loss": 0.2143,
1738
+ "step": 2470
1739
+ },
1740
+ {
1741
+ "epoch": 1.162136832239925,
1742
+ "grad_norm": 1.0400657653808594,
1743
+ "learning_rate": 2.8547328959700093e-05,
1744
+ "loss": 0.223,
1745
+ "step": 2480
1746
+ },
1747
+ {
1748
+ "epoch": 1.1668228678537957,
1749
+ "grad_norm": 1.6412447690963745,
1750
+ "learning_rate": 2.8541471415182755e-05,
1751
+ "loss": 0.1909,
1752
+ "step": 2490
1753
+ },
1754
+ {
1755
+ "epoch": 1.1715089034676662,
1756
+ "grad_norm": 1.7133567333221436,
1757
+ "learning_rate": 2.8535613870665418e-05,
1758
+ "loss": 0.2286,
1759
+ "step": 2500
1760
  }
1761
  ],
1762
  "logging_steps": 10,