azherali commited on
Commit
87a73ab
·
verified ·
1 Parent(s): 1f6205b

Training in progress, step 24000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6bedaf2b834c17d85aceaca7862c24d85d8d872107727528ad8cce968c65457d
3
  size 3555504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63d8709297afd894af99a163299f58c25d0ecf4c1109fb33c6ca599b695c1163
3
  size 3555504
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b774de3046f6c9826032946233d1806ab5688b30b35bf69f6e2c64418b1af07
3
  size 7141515
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c62a5ee613bedcbe24f1782d277ddaa2c1353f7999a9b5d9c7e67060088462a
3
  size 7141515
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2eaf494bdb773a56e55acb93767a950deb04425b06a47268e2a7e6eb9596f87
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:732f3f8948a77bf9ef00b9a71ed21980be5bb4dfbe32b4f3c8a750669102d82c
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d613eca09cb3ed2cbc2c00b1d0538a9bda3f76b75cb69bac8b3cf4fa9b1dda90
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:809d9e108b4a097a87eb7ca47b7d07656e83c40d06f4eb5e0ec99a3edb4019d0
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:524e618937b19fc406beb64b3f4e048c69ed7a9b18cd89552940d0d92765916b
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23fadf64bbbf065d62df9416650b6887ec1c2b6763b291ed80fa5ee507341cf0
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 20000,
3
  "best_metric": 0.9846426496660109,
4
  "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-20000",
5
- "epoch": 0.64,
6
  "eval_steps": 4000,
7
- "global_step": 20000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1468,6 +1468,298 @@
1468
  "eval_samples_per_second": 122.848,
1469
  "eval_steps_per_second": 7.678,
1470
  "step": 20000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1471
  }
1472
  ],
1473
  "logging_steps": 100,
@@ -1482,7 +1774,7 @@
1482
  "early_stopping_threshold": 0.0
1483
  },
1484
  "attributes": {
1485
- "early_stopping_patience_counter": 0
1486
  }
1487
  },
1488
  "TrainerControl": {
@@ -1496,7 +1788,7 @@
1496
  "attributes": {}
1497
  }
1498
  },
1499
- "total_flos": 8.492350218891494e+16,
1500
  "train_batch_size": 16,
1501
  "trial_name": null,
1502
  "trial_params": null
 
2
  "best_global_step": 20000,
3
  "best_metric": 0.9846426496660109,
4
  "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-20000",
5
+ "epoch": 0.768,
6
  "eval_steps": 4000,
7
+ "global_step": 24000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1468
  "eval_samples_per_second": 122.848,
1469
  "eval_steps_per_second": 7.678,
1470
  "step": 20000
1471
+ },
1472
+ {
1473
+ "epoch": 0.6432,
1474
+ "grad_norm": 25.919387817382812,
1475
+ "learning_rate": 1.7483274478330657e-05,
1476
+ "loss": 0.0704,
1477
+ "step": 20100
1478
+ },
1479
+ {
1480
+ "epoch": 0.6464,
1481
+ "grad_norm": 0.27127301692962646,
1482
+ "learning_rate": 1.7470433386837882e-05,
1483
+ "loss": 0.0749,
1484
+ "step": 20200
1485
+ },
1486
+ {
1487
+ "epoch": 0.6496,
1488
+ "grad_norm": 0.1059252917766571,
1489
+ "learning_rate": 1.7457592295345103e-05,
1490
+ "loss": 0.0793,
1491
+ "step": 20300
1492
+ },
1493
+ {
1494
+ "epoch": 0.6528,
1495
+ "grad_norm": 0.6019250154495239,
1496
+ "learning_rate": 1.7444751203852328e-05,
1497
+ "loss": 0.059,
1498
+ "step": 20400
1499
+ },
1500
+ {
1501
+ "epoch": 0.656,
1502
+ "grad_norm": 0.28291046619415283,
1503
+ "learning_rate": 1.7431910112359553e-05,
1504
+ "loss": 0.0569,
1505
+ "step": 20500
1506
+ },
1507
+ {
1508
+ "epoch": 0.6592,
1509
+ "grad_norm": 0.15100154280662537,
1510
+ "learning_rate": 1.7419069020866774e-05,
1511
+ "loss": 0.0398,
1512
+ "step": 20600
1513
+ },
1514
+ {
1515
+ "epoch": 0.6624,
1516
+ "grad_norm": 0.017900506034493446,
1517
+ "learning_rate": 1.7406227929374e-05,
1518
+ "loss": 0.0559,
1519
+ "step": 20700
1520
+ },
1521
+ {
1522
+ "epoch": 0.6656,
1523
+ "grad_norm": 0.007751260884106159,
1524
+ "learning_rate": 1.739338683788122e-05,
1525
+ "loss": 0.0621,
1526
+ "step": 20800
1527
+ },
1528
+ {
1529
+ "epoch": 0.6688,
1530
+ "grad_norm": 5.354798793792725,
1531
+ "learning_rate": 1.7380545746388445e-05,
1532
+ "loss": 0.085,
1533
+ "step": 20900
1534
+ },
1535
+ {
1536
+ "epoch": 0.672,
1537
+ "grad_norm": 2.312457799911499,
1538
+ "learning_rate": 1.7367704654895667e-05,
1539
+ "loss": 0.0654,
1540
+ "step": 21000
1541
+ },
1542
+ {
1543
+ "epoch": 0.6752,
1544
+ "grad_norm": 0.10008107125759125,
1545
+ "learning_rate": 1.7354863563402892e-05,
1546
+ "loss": 0.0697,
1547
+ "step": 21100
1548
+ },
1549
+ {
1550
+ "epoch": 0.6784,
1551
+ "grad_norm": 0.008539400063455105,
1552
+ "learning_rate": 1.7342022471910113e-05,
1553
+ "loss": 0.0687,
1554
+ "step": 21200
1555
+ },
1556
+ {
1557
+ "epoch": 0.6816,
1558
+ "grad_norm": 1.0686814785003662,
1559
+ "learning_rate": 1.7329181380417338e-05,
1560
+ "loss": 0.0491,
1561
+ "step": 21300
1562
+ },
1563
+ {
1564
+ "epoch": 0.6848,
1565
+ "grad_norm": 42.248897552490234,
1566
+ "learning_rate": 1.731634028892456e-05,
1567
+ "loss": 0.0464,
1568
+ "step": 21400
1569
+ },
1570
+ {
1571
+ "epoch": 0.688,
1572
+ "grad_norm": 17.70836067199707,
1573
+ "learning_rate": 1.7303499197431784e-05,
1574
+ "loss": 0.109,
1575
+ "step": 21500
1576
+ },
1577
+ {
1578
+ "epoch": 0.6912,
1579
+ "grad_norm": 11.702173233032227,
1580
+ "learning_rate": 1.7290658105939006e-05,
1581
+ "loss": 0.0626,
1582
+ "step": 21600
1583
+ },
1584
+ {
1585
+ "epoch": 0.6944,
1586
+ "grad_norm": 0.15207910537719727,
1587
+ "learning_rate": 1.727781701444623e-05,
1588
+ "loss": 0.0617,
1589
+ "step": 21700
1590
+ },
1591
+ {
1592
+ "epoch": 0.6976,
1593
+ "grad_norm": 0.7698332667350769,
1594
+ "learning_rate": 1.7264975922953452e-05,
1595
+ "loss": 0.0508,
1596
+ "step": 21800
1597
+ },
1598
+ {
1599
+ "epoch": 0.7008,
1600
+ "grad_norm": 0.012268565595149994,
1601
+ "learning_rate": 1.7252134831460677e-05,
1602
+ "loss": 0.0518,
1603
+ "step": 21900
1604
+ },
1605
+ {
1606
+ "epoch": 0.704,
1607
+ "grad_norm": 0.07914119213819504,
1608
+ "learning_rate": 1.7239293739967898e-05,
1609
+ "loss": 0.0699,
1610
+ "step": 22000
1611
+ },
1612
+ {
1613
+ "epoch": 0.7072,
1614
+ "grad_norm": 0.5616400241851807,
1615
+ "learning_rate": 1.7226452648475123e-05,
1616
+ "loss": 0.0649,
1617
+ "step": 22100
1618
+ },
1619
+ {
1620
+ "epoch": 0.7104,
1621
+ "grad_norm": 6.950782299041748,
1622
+ "learning_rate": 1.7213611556982345e-05,
1623
+ "loss": 0.0719,
1624
+ "step": 22200
1625
+ },
1626
+ {
1627
+ "epoch": 0.7136,
1628
+ "grad_norm": 0.07157002389431,
1629
+ "learning_rate": 1.720077046548957e-05,
1630
+ "loss": 0.0403,
1631
+ "step": 22300
1632
+ },
1633
+ {
1634
+ "epoch": 0.7168,
1635
+ "grad_norm": 2.784773588180542,
1636
+ "learning_rate": 1.718792937399679e-05,
1637
+ "loss": 0.0468,
1638
+ "step": 22400
1639
+ },
1640
+ {
1641
+ "epoch": 0.72,
1642
+ "grad_norm": 0.088102325797081,
1643
+ "learning_rate": 1.7175088282504012e-05,
1644
+ "loss": 0.0612,
1645
+ "step": 22500
1646
+ },
1647
+ {
1648
+ "epoch": 0.7232,
1649
+ "grad_norm": 0.373806357383728,
1650
+ "learning_rate": 1.7162247191011237e-05,
1651
+ "loss": 0.0607,
1652
+ "step": 22600
1653
+ },
1654
+ {
1655
+ "epoch": 0.7264,
1656
+ "grad_norm": 10.401127815246582,
1657
+ "learning_rate": 1.714940609951846e-05,
1658
+ "loss": 0.0572,
1659
+ "step": 22700
1660
+ },
1661
+ {
1662
+ "epoch": 0.7296,
1663
+ "grad_norm": 0.006438109558075666,
1664
+ "learning_rate": 1.7136565008025683e-05,
1665
+ "loss": 0.0472,
1666
+ "step": 22800
1667
+ },
1668
+ {
1669
+ "epoch": 0.7328,
1670
+ "grad_norm": 0.15433204174041748,
1671
+ "learning_rate": 1.7123723916532905e-05,
1672
+ "loss": 0.0578,
1673
+ "step": 22900
1674
+ },
1675
+ {
1676
+ "epoch": 0.736,
1677
+ "grad_norm": 0.29676365852355957,
1678
+ "learning_rate": 1.711088282504013e-05,
1679
+ "loss": 0.0355,
1680
+ "step": 23000
1681
+ },
1682
+ {
1683
+ "epoch": 0.7392,
1684
+ "grad_norm": 0.007739920634776354,
1685
+ "learning_rate": 1.709804173354735e-05,
1686
+ "loss": 0.0545,
1687
+ "step": 23100
1688
+ },
1689
+ {
1690
+ "epoch": 0.7424,
1691
+ "grad_norm": 16.565767288208008,
1692
+ "learning_rate": 1.7085200642054576e-05,
1693
+ "loss": 0.0662,
1694
+ "step": 23200
1695
+ },
1696
+ {
1697
+ "epoch": 0.7456,
1698
+ "grad_norm": 0.013131607323884964,
1699
+ "learning_rate": 1.7072359550561797e-05,
1700
+ "loss": 0.0734,
1701
+ "step": 23300
1702
+ },
1703
+ {
1704
+ "epoch": 0.7488,
1705
+ "grad_norm": 1.746962308883667,
1706
+ "learning_rate": 1.7059518459069022e-05,
1707
+ "loss": 0.0558,
1708
+ "step": 23400
1709
+ },
1710
+ {
1711
+ "epoch": 0.752,
1712
+ "grad_norm": 6.599545955657959,
1713
+ "learning_rate": 1.7046677367576247e-05,
1714
+ "loss": 0.0485,
1715
+ "step": 23500
1716
+ },
1717
+ {
1718
+ "epoch": 0.7552,
1719
+ "grad_norm": 4.333959102630615,
1720
+ "learning_rate": 1.703383627608347e-05,
1721
+ "loss": 0.0554,
1722
+ "step": 23600
1723
+ },
1724
+ {
1725
+ "epoch": 0.7584,
1726
+ "grad_norm": 0.16271114349365234,
1727
+ "learning_rate": 1.7020995184590693e-05,
1728
+ "loss": 0.0367,
1729
+ "step": 23700
1730
+ },
1731
+ {
1732
+ "epoch": 0.7616,
1733
+ "grad_norm": 17.760648727416992,
1734
+ "learning_rate": 1.7008154093097915e-05,
1735
+ "loss": 0.0871,
1736
+ "step": 23800
1737
+ },
1738
+ {
1739
+ "epoch": 0.7648,
1740
+ "grad_norm": 0.15831367671489716,
1741
+ "learning_rate": 1.699531300160514e-05,
1742
+ "loss": 0.0525,
1743
+ "step": 23900
1744
+ },
1745
+ {
1746
+ "epoch": 0.768,
1747
+ "grad_norm": 10.259693145751953,
1748
+ "learning_rate": 1.698247191011236e-05,
1749
+ "loss": 0.0643,
1750
+ "step": 24000
1751
+ },
1752
+ {
1753
+ "epoch": 0.768,
1754
+ "eval_accuracy": 0.98307,
1755
+ "eval_f1": 0.9830752640408086,
1756
+ "eval_loss": 0.07612209022045135,
1757
+ "eval_precision": 0.9832125198389433,
1758
+ "eval_recall": 0.98307,
1759
+ "eval_runtime": 774.4597,
1760
+ "eval_samples_per_second": 129.122,
1761
+ "eval_steps_per_second": 8.07,
1762
+ "step": 24000
1763
  }
1764
  ],
1765
  "logging_steps": 100,
 
1774
  "early_stopping_threshold": 0.0
1775
  },
1776
  "attributes": {
1777
+ "early_stopping_patience_counter": 1
1778
  }
1779
  },
1780
  "TrainerControl": {
 
1788
  "attributes": {}
1789
  }
1790
  },
1791
+ "total_flos": 1.0191521072952346e+17,
1792
  "train_batch_size": 16,
1793
  "trial_name": null,
1794
  "trial_params": null