azherali commited on
Commit
8bb02fa
·
verified ·
1 Parent(s): 05730b9

Training in progress, step 12000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74cf435fb7349e8e54506ebb58af01f10261a616a326d059cbcdb64543d5c394
3
  size 3555504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22bd38c8f37046bd62018eb7378b2b711b978047511760c0db69629ca9b301ad
3
  size 3555504
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43cecfb928f24cc675ff75e697b1a84f4a59512dbd7a99046461ca8ee5703f8c
3
  size 7141515
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:118eb527e3f3d95f6bbf2dc9c4c0579763f722fb5753842653cc84555c706dc2
3
  size 7141515
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f4a3b1d656476cd8e63479b6c20bd7983f231b968166731d8e9104d57790327
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ae035518883f7d805914f164f0380793c8a3934e907b1f7143d4918513187a8
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2fbc30c3a1fa2f1322da3a27761ccd07ada09d565f84aed41120ff0a6bec6f17
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a1d6e49e1244c2136fab7f3074345ca5593ed632d4d0d6d129c6fe8e480d66c
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a70ea2ab6fa7c8143ca6087fc4038185a17c76d37cc14c472f1fd0dba3cd729f
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:645e796050c981f4589900a1facddd99613e38baf853710ef8a2a7fa1c39977d
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 8000,
3
- "best_metric": 0.9760920422665103,
4
- "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-8000",
5
- "epoch": 0.256,
6
  "eval_steps": 4000,
7
- "global_step": 8000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -592,6 +592,298 @@
592
  "eval_samples_per_second": 122.308,
593
  "eval_steps_per_second": 7.644,
594
  "step": 8000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
595
  }
596
  ],
597
  "logging_steps": 100,
@@ -620,7 +912,7 @@
620
  "attributes": {}
621
  }
622
  },
623
- "total_flos": 3.397795082747213e+16,
624
  "train_batch_size": 16,
625
  "trial_name": null,
626
  "trial_params": null
 
1
  {
2
+ "best_global_step": 12000,
3
+ "best_metric": 0.97856556986665,
4
+ "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-12000",
5
+ "epoch": 0.384,
6
  "eval_steps": 4000,
7
+ "global_step": 12000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
592
  "eval_samples_per_second": 122.308,
593
  "eval_steps_per_second": 7.644,
594
  "step": 8000
595
+ },
596
+ {
597
+ "epoch": 0.2592,
598
+ "grad_norm": 6.723151206970215,
599
+ "learning_rate": 1.9024205457463887e-05,
600
+ "loss": 0.0799,
601
+ "step": 8100
602
+ },
603
+ {
604
+ "epoch": 0.2624,
605
+ "grad_norm": 1.805972933769226,
606
+ "learning_rate": 1.901136436597111e-05,
607
+ "loss": 0.1211,
608
+ "step": 8200
609
+ },
610
+ {
611
+ "epoch": 0.2656,
612
+ "grad_norm": 11.118502616882324,
613
+ "learning_rate": 1.8998523274478334e-05,
614
+ "loss": 0.0916,
615
+ "step": 8300
616
+ },
617
+ {
618
+ "epoch": 0.2688,
619
+ "grad_norm": 2.3953633308410645,
620
+ "learning_rate": 1.8985682182985555e-05,
621
+ "loss": 0.0878,
622
+ "step": 8400
623
+ },
624
+ {
625
+ "epoch": 0.272,
626
+ "grad_norm": 0.10008874535560608,
627
+ "learning_rate": 1.8972841091492777e-05,
628
+ "loss": 0.0868,
629
+ "step": 8500
630
+ },
631
+ {
632
+ "epoch": 0.2752,
633
+ "grad_norm": 0.06934285163879395,
634
+ "learning_rate": 1.896e-05,
635
+ "loss": 0.1155,
636
+ "step": 8600
637
+ },
638
+ {
639
+ "epoch": 0.2784,
640
+ "grad_norm": 0.3781879246234894,
641
+ "learning_rate": 1.8947158908507223e-05,
642
+ "loss": 0.0988,
643
+ "step": 8700
644
+ },
645
+ {
646
+ "epoch": 0.2816,
647
+ "grad_norm": 9.6624174118042,
648
+ "learning_rate": 1.8934317817014448e-05,
649
+ "loss": 0.1072,
650
+ "step": 8800
651
+ },
652
+ {
653
+ "epoch": 0.2848,
654
+ "grad_norm": 0.09044591337442398,
655
+ "learning_rate": 1.892147672552167e-05,
656
+ "loss": 0.1085,
657
+ "step": 8900
658
+ },
659
+ {
660
+ "epoch": 0.288,
661
+ "grad_norm": 0.0671633929014206,
662
+ "learning_rate": 1.8908635634028894e-05,
663
+ "loss": 0.1081,
664
+ "step": 9000
665
+ },
666
+ {
667
+ "epoch": 0.2912,
668
+ "grad_norm": 8.130873680114746,
669
+ "learning_rate": 1.8895794542536115e-05,
670
+ "loss": 0.0968,
671
+ "step": 9100
672
+ },
673
+ {
674
+ "epoch": 0.2944,
675
+ "grad_norm": 1.2537904977798462,
676
+ "learning_rate": 1.888295345104334e-05,
677
+ "loss": 0.0877,
678
+ "step": 9200
679
+ },
680
+ {
681
+ "epoch": 0.2976,
682
+ "grad_norm": 6.084417819976807,
683
+ "learning_rate": 1.887011235955056e-05,
684
+ "loss": 0.1022,
685
+ "step": 9300
686
+ },
687
+ {
688
+ "epoch": 0.3008,
689
+ "grad_norm": 6.140512943267822,
690
+ "learning_rate": 1.8857271268057787e-05,
691
+ "loss": 0.1016,
692
+ "step": 9400
693
+ },
694
+ {
695
+ "epoch": 0.304,
696
+ "grad_norm": 1.7347182035446167,
697
+ "learning_rate": 1.8844430176565008e-05,
698
+ "loss": 0.092,
699
+ "step": 9500
700
+ },
701
+ {
702
+ "epoch": 0.3072,
703
+ "grad_norm": 0.6796423196792603,
704
+ "learning_rate": 1.8831589085072233e-05,
705
+ "loss": 0.1172,
706
+ "step": 9600
707
+ },
708
+ {
709
+ "epoch": 0.3104,
710
+ "grad_norm": 10.664779663085938,
711
+ "learning_rate": 1.8818747993579454e-05,
712
+ "loss": 0.0808,
713
+ "step": 9700
714
+ },
715
+ {
716
+ "epoch": 0.3136,
717
+ "grad_norm": 0.4076235294342041,
718
+ "learning_rate": 1.880590690208668e-05,
719
+ "loss": 0.0836,
720
+ "step": 9800
721
+ },
722
+ {
723
+ "epoch": 0.3168,
724
+ "grad_norm": 9.418440818786621,
725
+ "learning_rate": 1.8793065810593904e-05,
726
+ "loss": 0.0675,
727
+ "step": 9900
728
+ },
729
+ {
730
+ "epoch": 0.32,
731
+ "grad_norm": 9.2078857421875,
732
+ "learning_rate": 1.8780224719101125e-05,
733
+ "loss": 0.0686,
734
+ "step": 10000
735
+ },
736
+ {
737
+ "epoch": 0.3232,
738
+ "grad_norm": 0.47173646092414856,
739
+ "learning_rate": 1.876738362760835e-05,
740
+ "loss": 0.1096,
741
+ "step": 10100
742
+ },
743
+ {
744
+ "epoch": 0.3264,
745
+ "grad_norm": 4.5297322273254395,
746
+ "learning_rate": 1.875454253611557e-05,
747
+ "loss": 0.098,
748
+ "step": 10200
749
+ },
750
+ {
751
+ "epoch": 0.3296,
752
+ "grad_norm": 5.099269866943359,
753
+ "learning_rate": 1.8741701444622796e-05,
754
+ "loss": 0.1063,
755
+ "step": 10300
756
+ },
757
+ {
758
+ "epoch": 0.3328,
759
+ "grad_norm": 2.588848114013672,
760
+ "learning_rate": 1.8728860353130018e-05,
761
+ "loss": 0.0989,
762
+ "step": 10400
763
+ },
764
+ {
765
+ "epoch": 0.336,
766
+ "grad_norm": 4.629786968231201,
767
+ "learning_rate": 1.8716019261637243e-05,
768
+ "loss": 0.1018,
769
+ "step": 10500
770
+ },
771
+ {
772
+ "epoch": 0.3392,
773
+ "grad_norm": 11.187308311462402,
774
+ "learning_rate": 1.8703178170144464e-05,
775
+ "loss": 0.0864,
776
+ "step": 10600
777
+ },
778
+ {
779
+ "epoch": 0.3424,
780
+ "grad_norm": 2.476482391357422,
781
+ "learning_rate": 1.869033707865169e-05,
782
+ "loss": 0.0744,
783
+ "step": 10700
784
+ },
785
+ {
786
+ "epoch": 0.3456,
787
+ "grad_norm": 17.418149948120117,
788
+ "learning_rate": 1.867749598715891e-05,
789
+ "loss": 0.1189,
790
+ "step": 10800
791
+ },
792
+ {
793
+ "epoch": 0.3488,
794
+ "grad_norm": 11.753310203552246,
795
+ "learning_rate": 1.8664654895666132e-05,
796
+ "loss": 0.0832,
797
+ "step": 10900
798
+ },
799
+ {
800
+ "epoch": 0.352,
801
+ "grad_norm": 0.41917338967323303,
802
+ "learning_rate": 1.8651813804173357e-05,
803
+ "loss": 0.1063,
804
+ "step": 11000
805
+ },
806
+ {
807
+ "epoch": 0.3552,
808
+ "grad_norm": 14.072111129760742,
809
+ "learning_rate": 1.8638972712680578e-05,
810
+ "loss": 0.1061,
811
+ "step": 11100
812
+ },
813
+ {
814
+ "epoch": 0.3584,
815
+ "grad_norm": 2.6141397953033447,
816
+ "learning_rate": 1.8626131621187803e-05,
817
+ "loss": 0.0934,
818
+ "step": 11200
819
+ },
820
+ {
821
+ "epoch": 0.3616,
822
+ "grad_norm": 3.1363914012908936,
823
+ "learning_rate": 1.8613290529695024e-05,
824
+ "loss": 0.0879,
825
+ "step": 11300
826
+ },
827
+ {
828
+ "epoch": 0.3648,
829
+ "grad_norm": 4.260811805725098,
830
+ "learning_rate": 1.860044943820225e-05,
831
+ "loss": 0.092,
832
+ "step": 11400
833
+ },
834
+ {
835
+ "epoch": 0.368,
836
+ "grad_norm": 0.2677570879459381,
837
+ "learning_rate": 1.858760834670947e-05,
838
+ "loss": 0.1202,
839
+ "step": 11500
840
+ },
841
+ {
842
+ "epoch": 0.3712,
843
+ "grad_norm": 0.056061357259750366,
844
+ "learning_rate": 1.8574767255216696e-05,
845
+ "loss": 0.0773,
846
+ "step": 11600
847
+ },
848
+ {
849
+ "epoch": 0.3744,
850
+ "grad_norm": 7.95279598236084,
851
+ "learning_rate": 1.8561926163723917e-05,
852
+ "loss": 0.0749,
853
+ "step": 11700
854
+ },
855
+ {
856
+ "epoch": 0.3776,
857
+ "grad_norm": 7.4200873374938965,
858
+ "learning_rate": 1.8549085072231142e-05,
859
+ "loss": 0.1054,
860
+ "step": 11800
861
+ },
862
+ {
863
+ "epoch": 0.3808,
864
+ "grad_norm": 0.8819625973701477,
865
+ "learning_rate": 1.8536243980738363e-05,
866
+ "loss": 0.0998,
867
+ "step": 11900
868
+ },
869
+ {
870
+ "epoch": 0.384,
871
+ "grad_norm": 6.32806396484375,
872
+ "learning_rate": 1.8523402889245588e-05,
873
+ "loss": 0.0667,
874
+ "step": 12000
875
+ },
876
+ {
877
+ "epoch": 0.384,
878
+ "eval_accuracy": 0.97856,
879
+ "eval_f1": 0.97856556986665,
880
+ "eval_loss": 0.1008467897772789,
881
+ "eval_precision": 0.9786554480535211,
882
+ "eval_recall": 0.97856,
883
+ "eval_runtime": 822.182,
884
+ "eval_samples_per_second": 121.628,
885
+ "eval_steps_per_second": 7.602,
886
+ "step": 12000
887
  }
888
  ],
889
  "logging_steps": 100,
 
912
  "attributes": {}
913
  }
914
  },
915
+ "total_flos": 5.096626165108723e+16,
916
  "train_batch_size": 16,
917
  "trial_name": null,
918
  "trial_params": null