CocoRoF commited on
Commit
7d38971
·
verified ·
1 Parent(s): 512003a

Training in progress, step 1192, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa23844813712215c3e317ac601a23c61dbabb2025bb8d9d1761f202dc9648f7
3
  size 791869518
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f41dbd134eb35770af086a2fc18e0d281fd4b1a6f9d1aabf53883de70234ba83
3
  size 791869518
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82665c62b90edf301b70ae4bd2b8ad3351c4dcf4307ab0abb54fcaa59976966d
3
  size 2375752250
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4adf7ed35f9ee07ab90d7682190f703441a80f34395a26cc75716bb6f77e1d5c
3
  size 2375752250
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c18b4e2e89d973663b034f196f33ea51a1543f40916f5c6695e7c3b25fb20c4e
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:378851ced5b160e1084be88052e6491387296da871a84fe5b4200346a2ca2994
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8389041814130292,
5
  "eval_steps": 500,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1423,6 +1423,272 @@
1423
  "eval_samples_per_second": 598.662,
1424
  "eval_steps_per_second": 18.712,
1425
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1426
  }
1427
  ],
1428
  "logging_steps": 5,
@@ -1437,12 +1703,12 @@
1437
  "should_evaluate": false,
1438
  "should_log": false,
1439
  "should_save": true,
1440
- "should_training_stop": false
1441
  },
1442
  "attributes": {}
1443
  }
1444
  },
1445
- "total_flos": 4.332357992788787e+18,
1446
  "train_batch_size": 4,
1447
  "trial_name": null,
1448
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9999737842443308,
5
  "eval_steps": 500,
6
+ "global_step": 1192,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1423
  "eval_samples_per_second": 598.662,
1424
  "eval_steps_per_second": 18.712,
1425
  "step": 1000
1426
+ },
1427
+ {
1428
+ "epoch": 0.8430987023200944,
1429
+ "grad_norm": 45.65625,
1430
+ "learning_rate": 1.7444029850746267e-07,
1431
+ "loss": 147.9916,
1432
+ "step": 1005
1433
+ },
1434
+ {
1435
+ "epoch": 0.8472932232271595,
1436
+ "grad_norm": 48.625,
1437
+ "learning_rate": 1.6977611940298506e-07,
1438
+ "loss": 148.1211,
1439
+ "step": 1010
1440
+ },
1441
+ {
1442
+ "epoch": 0.8514877441342247,
1443
+ "grad_norm": 43.5,
1444
+ "learning_rate": 1.6511194029850746e-07,
1445
+ "loss": 148.0889,
1446
+ "step": 1015
1447
+ },
1448
+ {
1449
+ "epoch": 0.8556822650412899,
1450
+ "grad_norm": 41.84375,
1451
+ "learning_rate": 1.6044776119402983e-07,
1452
+ "loss": 147.4216,
1453
+ "step": 1020
1454
+ },
1455
+ {
1456
+ "epoch": 0.8598767859483549,
1457
+ "grad_norm": 43.28125,
1458
+ "learning_rate": 1.5578358208955222e-07,
1459
+ "loss": 147.1561,
1460
+ "step": 1025
1461
+ },
1462
+ {
1463
+ "epoch": 0.8640713068554201,
1464
+ "grad_norm": 50.4375,
1465
+ "learning_rate": 1.5111940298507462e-07,
1466
+ "loss": 146.7463,
1467
+ "step": 1030
1468
+ },
1469
+ {
1470
+ "epoch": 0.8682658277624853,
1471
+ "grad_norm": 42.84375,
1472
+ "learning_rate": 1.46455223880597e-07,
1473
+ "loss": 146.9864,
1474
+ "step": 1035
1475
+ },
1476
+ {
1477
+ "epoch": 0.8724603486695504,
1478
+ "grad_norm": 44.75,
1479
+ "learning_rate": 1.4179104477611938e-07,
1480
+ "loss": 147.2126,
1481
+ "step": 1040
1482
+ },
1483
+ {
1484
+ "epoch": 0.8766548695766155,
1485
+ "grad_norm": 42.5625,
1486
+ "learning_rate": 1.3712686567164177e-07,
1487
+ "loss": 146.4982,
1488
+ "step": 1045
1489
+ },
1490
+ {
1491
+ "epoch": 0.8808493904836807,
1492
+ "grad_norm": 40.65625,
1493
+ "learning_rate": 1.3246268656716417e-07,
1494
+ "loss": 146.3903,
1495
+ "step": 1050
1496
+ },
1497
+ {
1498
+ "epoch": 0.8850439113907458,
1499
+ "grad_norm": 42.25,
1500
+ "learning_rate": 1.2779850746268656e-07,
1501
+ "loss": 146.2571,
1502
+ "step": 1055
1503
+ },
1504
+ {
1505
+ "epoch": 0.889238432297811,
1506
+ "grad_norm": 43.96875,
1507
+ "learning_rate": 1.2313432835820893e-07,
1508
+ "loss": 146.2127,
1509
+ "step": 1060
1510
+ },
1511
+ {
1512
+ "epoch": 0.8934329532048761,
1513
+ "grad_norm": 44.34375,
1514
+ "learning_rate": 1.1847014925373134e-07,
1515
+ "loss": 146.0634,
1516
+ "step": 1065
1517
+ },
1518
+ {
1519
+ "epoch": 0.8976274741119413,
1520
+ "grad_norm": 42.84375,
1521
+ "learning_rate": 1.1380597014925372e-07,
1522
+ "loss": 146.5255,
1523
+ "step": 1070
1524
+ },
1525
+ {
1526
+ "epoch": 0.9018219950190064,
1527
+ "grad_norm": 41.78125,
1528
+ "learning_rate": 1.0914179104477612e-07,
1529
+ "loss": 145.7911,
1530
+ "step": 1075
1531
+ },
1532
+ {
1533
+ "epoch": 0.9060165159260716,
1534
+ "grad_norm": 43.90625,
1535
+ "learning_rate": 1.044776119402985e-07,
1536
+ "loss": 145.654,
1537
+ "step": 1080
1538
+ },
1539
+ {
1540
+ "epoch": 0.9102110368331368,
1541
+ "grad_norm": 43.96875,
1542
+ "learning_rate": 9.981343283582089e-08,
1543
+ "loss": 146.0228,
1544
+ "step": 1085
1545
+ },
1546
+ {
1547
+ "epoch": 0.9144055577402018,
1548
+ "grad_norm": 42.75,
1549
+ "learning_rate": 9.514925373134327e-08,
1550
+ "loss": 146.2392,
1551
+ "step": 1090
1552
+ },
1553
+ {
1554
+ "epoch": 0.918600078647267,
1555
+ "grad_norm": 42.78125,
1556
+ "learning_rate": 9.048507462686567e-08,
1557
+ "loss": 145.7595,
1558
+ "step": 1095
1559
+ },
1560
+ {
1561
+ "epoch": 0.9227945995543322,
1562
+ "grad_norm": 46.65625,
1563
+ "learning_rate": 8.582089552238805e-08,
1564
+ "loss": 145.3029,
1565
+ "step": 1100
1566
+ },
1567
+ {
1568
+ "epoch": 0.9269891204613973,
1569
+ "grad_norm": 39.03125,
1570
+ "learning_rate": 8.115671641791044e-08,
1571
+ "loss": 145.1068,
1572
+ "step": 1105
1573
+ },
1574
+ {
1575
+ "epoch": 0.9311836413684624,
1576
+ "grad_norm": 40.5,
1577
+ "learning_rate": 7.649253731343283e-08,
1578
+ "loss": 144.9648,
1579
+ "step": 1110
1580
+ },
1581
+ {
1582
+ "epoch": 0.9353781622755276,
1583
+ "grad_norm": 39.3125,
1584
+ "learning_rate": 7.182835820895522e-08,
1585
+ "loss": 145.055,
1586
+ "step": 1115
1587
+ },
1588
+ {
1589
+ "epoch": 0.9395726831825927,
1590
+ "grad_norm": 43.53125,
1591
+ "learning_rate": 6.71641791044776e-08,
1592
+ "loss": 144.7464,
1593
+ "step": 1120
1594
+ },
1595
+ {
1596
+ "epoch": 0.9437672040896579,
1597
+ "grad_norm": 40.0625,
1598
+ "learning_rate": 6.25e-08,
1599
+ "loss": 144.7482,
1600
+ "step": 1125
1601
+ },
1602
+ {
1603
+ "epoch": 0.947961724996723,
1604
+ "grad_norm": 40.3125,
1605
+ "learning_rate": 5.7835820895522385e-08,
1606
+ "loss": 144.9773,
1607
+ "step": 1130
1608
+ },
1609
+ {
1610
+ "epoch": 0.9521562459037882,
1611
+ "grad_norm": 38.34375,
1612
+ "learning_rate": 5.3171641791044774e-08,
1613
+ "loss": 144.8226,
1614
+ "step": 1135
1615
+ },
1616
+ {
1617
+ "epoch": 0.9563507668108533,
1618
+ "grad_norm": 38.28125,
1619
+ "learning_rate": 4.850746268656716e-08,
1620
+ "loss": 144.0508,
1621
+ "step": 1140
1622
+ },
1623
+ {
1624
+ "epoch": 0.9605452877179185,
1625
+ "grad_norm": 36.8125,
1626
+ "learning_rate": 4.384328358208955e-08,
1627
+ "loss": 144.5738,
1628
+ "step": 1145
1629
+ },
1630
+ {
1631
+ "epoch": 0.9647398086249837,
1632
+ "grad_norm": 35.8125,
1633
+ "learning_rate": 3.917910447761194e-08,
1634
+ "loss": 143.7115,
1635
+ "step": 1150
1636
+ },
1637
+ {
1638
+ "epoch": 0.9689343295320487,
1639
+ "grad_norm": 37.375,
1640
+ "learning_rate": 3.4514925373134326e-08,
1641
+ "loss": 144.4058,
1642
+ "step": 1155
1643
+ },
1644
+ {
1645
+ "epoch": 0.9731288504391139,
1646
+ "grad_norm": 35.53125,
1647
+ "learning_rate": 2.9850746268656714e-08,
1648
+ "loss": 143.9776,
1649
+ "step": 1160
1650
+ },
1651
+ {
1652
+ "epoch": 0.9773233713461791,
1653
+ "grad_norm": 34.96875,
1654
+ "learning_rate": 2.5186567164179103e-08,
1655
+ "loss": 143.9102,
1656
+ "step": 1165
1657
+ },
1658
+ {
1659
+ "epoch": 0.9815178922532442,
1660
+ "grad_norm": 34.875,
1661
+ "learning_rate": 2.052238805970149e-08,
1662
+ "loss": 144.5179,
1663
+ "step": 1170
1664
+ },
1665
+ {
1666
+ "epoch": 0.9857124131603093,
1667
+ "grad_norm": 34.6875,
1668
+ "learning_rate": 1.5858208955223882e-08,
1669
+ "loss": 144.5262,
1670
+ "step": 1175
1671
+ },
1672
+ {
1673
+ "epoch": 0.9899069340673745,
1674
+ "grad_norm": 33.53125,
1675
+ "learning_rate": 1.1194029850746267e-08,
1676
+ "loss": 144.4803,
1677
+ "step": 1180
1678
+ },
1679
+ {
1680
+ "epoch": 0.9941014549744397,
1681
+ "grad_norm": 32.90625,
1682
+ "learning_rate": 6.529850746268656e-09,
1683
+ "loss": 144.1151,
1684
+ "step": 1185
1685
+ },
1686
+ {
1687
+ "epoch": 0.9982959758815048,
1688
+ "grad_norm": 32.25,
1689
+ "learning_rate": 1.8656716417910446e-09,
1690
+ "loss": 143.4677,
1691
+ "step": 1190
1692
  }
1693
  ],
1694
  "logging_steps": 5,
 
1703
  "should_evaluate": false,
1704
  "should_log": false,
1705
  "should_save": true,
1706
+ "should_training_stop": true
1707
  },
1708
  "attributes": {}
1709
  }
1710
  },
1711
+ "total_flos": 5.16417072729686e+18,
1712
  "train_batch_size": 4,
1713
  "trial_name": null,
1714
  "trial_params": null