azherali commited on
Commit
afd9948
·
verified ·
1 Parent(s): 2286235

Training in progress, step 40000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4a4b1c16a17272dd3d5d44145b877d0c14dc6e19c78bd41637a76bc87a0e7ea
3
  size 3555504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33f0f667a5193eb4d35b243c5c1df790f53411abe8add627f8801dc7a6e453fb
3
  size 3555504
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68ca690b9a0c619477d3038e9b1f2189091b893c636b800c671b19aeb6d28dba
3
  size 7141515
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:338d656c8c08afe1535666116a30ec6c8bc16d2218bd3572b0ad6095b0a6fc86
3
  size 7141515
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d04e325551f27b0abb1677ad51b5250cc6db4cb37a98cc2cc710661788306e4
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f75ef4c1eb025b45e99753eb3086841a7a2849ad71d5bd6afa88ad76ffcffe8a
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eba75c9103403d8e6ccb33e21824c3bb41c5512582b8b3e0415b8a731bc74943
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00b9e9e803d002d2b860a18a7caf54803f3c31024a6925df1ba4a7df5d623e98
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bef14adf96a5be9d64a1df76f457ac5b2fcd08c7a9b48176945077bc67a3f2f
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90b69642bdda2390c9b2aaa786137796ae481637fe31199c160a1cb107e6720e
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 36000,
3
  "best_metric": 0.9893807849919393,
4
  "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-36000",
5
- "epoch": 1.152,
6
  "eval_steps": 4000,
7
- "global_step": 36000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2636,6 +2636,298 @@
2636
  "eval_samples_per_second": 128.405,
2637
  "eval_steps_per_second": 8.025,
2638
  "step": 36000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2639
  }
2640
  ],
2641
  "logging_steps": 100,
@@ -2650,7 +2942,7 @@
2650
  "early_stopping_threshold": 0.0
2651
  },
2652
  "attributes": {
2653
- "early_stopping_patience_counter": 0
2654
  }
2655
  },
2656
  "TrainerControl": {
@@ -2664,7 +2956,7 @@
2664
  "attributes": {}
2665
  }
2666
  },
2667
- "total_flos": 1.528656883652379e+17,
2668
  "train_batch_size": 16,
2669
  "trial_name": null,
2670
  "trial_params": null
 
2
  "best_global_step": 36000,
3
  "best_metric": 0.9893807849919393,
4
  "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-36000",
5
+ "epoch": 1.28,
6
  "eval_steps": 4000,
7
+ "global_step": 40000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2636
  "eval_samples_per_second": 128.405,
2637
  "eval_steps_per_second": 8.025,
2638
  "step": 36000
2639
+ },
2640
+ {
2641
+ "epoch": 1.1552,
2642
+ "grad_norm": 7.563354969024658,
2643
+ "learning_rate": 1.542869983948636e-05,
2644
+ "loss": 0.0446,
2645
+ "step": 36100
2646
+ },
2647
+ {
2648
+ "epoch": 1.1584,
2649
+ "grad_norm": 0.017443129792809486,
2650
+ "learning_rate": 1.541585874799358e-05,
2651
+ "loss": 0.0472,
2652
+ "step": 36200
2653
+ },
2654
+ {
2655
+ "epoch": 1.1616,
2656
+ "grad_norm": 0.007690173573791981,
2657
+ "learning_rate": 1.5403017656500805e-05,
2658
+ "loss": 0.0368,
2659
+ "step": 36300
2660
+ },
2661
+ {
2662
+ "epoch": 1.1648,
2663
+ "grad_norm": 0.024819310754537582,
2664
+ "learning_rate": 1.5390176565008027e-05,
2665
+ "loss": 0.0466,
2666
+ "step": 36400
2667
+ },
2668
+ {
2669
+ "epoch": 1.168,
2670
+ "grad_norm": 1.0305715799331665,
2671
+ "learning_rate": 1.5377335473515248e-05,
2672
+ "loss": 0.0396,
2673
+ "step": 36500
2674
+ },
2675
+ {
2676
+ "epoch": 1.1712,
2677
+ "grad_norm": 13.792978286743164,
2678
+ "learning_rate": 1.5364494382022473e-05,
2679
+ "loss": 0.0382,
2680
+ "step": 36600
2681
+ },
2682
+ {
2683
+ "epoch": 1.1743999999999999,
2684
+ "grad_norm": 4.012645721435547,
2685
+ "learning_rate": 1.5351653290529694e-05,
2686
+ "loss": 0.0519,
2687
+ "step": 36700
2688
+ },
2689
+ {
2690
+ "epoch": 1.1776,
2691
+ "grad_norm": 0.015255521982908249,
2692
+ "learning_rate": 1.533881219903692e-05,
2693
+ "loss": 0.0331,
2694
+ "step": 36800
2695
+ },
2696
+ {
2697
+ "epoch": 1.1808,
2698
+ "grad_norm": 0.03518729284405708,
2699
+ "learning_rate": 1.532597110754414e-05,
2700
+ "loss": 0.0442,
2701
+ "step": 36900
2702
+ },
2703
+ {
2704
+ "epoch": 1.184,
2705
+ "grad_norm": 0.12012261152267456,
2706
+ "learning_rate": 1.5313130016051365e-05,
2707
+ "loss": 0.0392,
2708
+ "step": 37000
2709
+ },
2710
+ {
2711
+ "epoch": 1.1872,
2712
+ "grad_norm": 0.03485884144902229,
2713
+ "learning_rate": 1.5300288924558587e-05,
2714
+ "loss": 0.0214,
2715
+ "step": 37100
2716
+ },
2717
+ {
2718
+ "epoch": 1.1904,
2719
+ "grad_norm": 10.218805313110352,
2720
+ "learning_rate": 1.528744783306581e-05,
2721
+ "loss": 0.0528,
2722
+ "step": 37200
2723
+ },
2724
+ {
2725
+ "epoch": 1.1936,
2726
+ "grad_norm": 0.0384359173476696,
2727
+ "learning_rate": 1.5274606741573036e-05,
2728
+ "loss": 0.0625,
2729
+ "step": 37300
2730
+ },
2731
+ {
2732
+ "epoch": 1.1968,
2733
+ "grad_norm": 0.3779418170452118,
2734
+ "learning_rate": 1.5261765650080258e-05,
2735
+ "loss": 0.0328,
2736
+ "step": 37400
2737
+ },
2738
+ {
2739
+ "epoch": 1.2,
2740
+ "grad_norm": 5.618625640869141,
2741
+ "learning_rate": 1.5248924558587481e-05,
2742
+ "loss": 0.065,
2743
+ "step": 37500
2744
+ },
2745
+ {
2746
+ "epoch": 1.2032,
2747
+ "grad_norm": 2.0705819129943848,
2748
+ "learning_rate": 1.5236083467094704e-05,
2749
+ "loss": 0.0594,
2750
+ "step": 37600
2751
+ },
2752
+ {
2753
+ "epoch": 1.2064,
2754
+ "grad_norm": 0.14193743467330933,
2755
+ "learning_rate": 1.5223242375601927e-05,
2756
+ "loss": 0.0417,
2757
+ "step": 37700
2758
+ },
2759
+ {
2760
+ "epoch": 1.2096,
2761
+ "grad_norm": 0.034703925251960754,
2762
+ "learning_rate": 1.521040128410915e-05,
2763
+ "loss": 0.0389,
2764
+ "step": 37800
2765
+ },
2766
+ {
2767
+ "epoch": 1.2128,
2768
+ "grad_norm": 0.011800256557762623,
2769
+ "learning_rate": 1.5197560192616374e-05,
2770
+ "loss": 0.0393,
2771
+ "step": 37900
2772
+ },
2773
+ {
2774
+ "epoch": 1.216,
2775
+ "grad_norm": 0.28774189949035645,
2776
+ "learning_rate": 1.5184719101123597e-05,
2777
+ "loss": 0.0504,
2778
+ "step": 38000
2779
+ },
2780
+ {
2781
+ "epoch": 1.2192,
2782
+ "grad_norm": 0.035763729363679886,
2783
+ "learning_rate": 1.517187800963082e-05,
2784
+ "loss": 0.0534,
2785
+ "step": 38100
2786
+ },
2787
+ {
2788
+ "epoch": 1.2224,
2789
+ "grad_norm": 0.158742755651474,
2790
+ "learning_rate": 1.5159036918138043e-05,
2791
+ "loss": 0.0436,
2792
+ "step": 38200
2793
+ },
2794
+ {
2795
+ "epoch": 1.2256,
2796
+ "grad_norm": 9.794978141784668,
2797
+ "learning_rate": 1.5146195826645266e-05,
2798
+ "loss": 0.0502,
2799
+ "step": 38300
2800
+ },
2801
+ {
2802
+ "epoch": 1.2288000000000001,
2803
+ "grad_norm": 0.021368766203522682,
2804
+ "learning_rate": 1.513335473515249e-05,
2805
+ "loss": 0.0286,
2806
+ "step": 38400
2807
+ },
2808
+ {
2809
+ "epoch": 1.232,
2810
+ "grad_norm": 0.5888408422470093,
2811
+ "learning_rate": 1.5120513643659714e-05,
2812
+ "loss": 0.0674,
2813
+ "step": 38500
2814
+ },
2815
+ {
2816
+ "epoch": 1.2352,
2817
+ "grad_norm": 0.005416017957031727,
2818
+ "learning_rate": 1.5107672552166937e-05,
2819
+ "loss": 0.0381,
2820
+ "step": 38600
2821
+ },
2822
+ {
2823
+ "epoch": 1.2384,
2824
+ "grad_norm": 0.03922798112034798,
2825
+ "learning_rate": 1.5094831460674157e-05,
2826
+ "loss": 0.0747,
2827
+ "step": 38700
2828
+ },
2829
+ {
2830
+ "epoch": 1.2416,
2831
+ "grad_norm": 0.030901480466127396,
2832
+ "learning_rate": 1.508199036918138e-05,
2833
+ "loss": 0.0491,
2834
+ "step": 38800
2835
+ },
2836
+ {
2837
+ "epoch": 1.2448,
2838
+ "grad_norm": 0.02417912147939205,
2839
+ "learning_rate": 1.5069149277688603e-05,
2840
+ "loss": 0.0465,
2841
+ "step": 38900
2842
+ },
2843
+ {
2844
+ "epoch": 1.248,
2845
+ "grad_norm": 15.668951988220215,
2846
+ "learning_rate": 1.5056308186195826e-05,
2847
+ "loss": 0.0535,
2848
+ "step": 39000
2849
+ },
2850
+ {
2851
+ "epoch": 1.2511999999999999,
2852
+ "grad_norm": 0.21102702617645264,
2853
+ "learning_rate": 1.504346709470305e-05,
2854
+ "loss": 0.0517,
2855
+ "step": 39100
2856
+ },
2857
+ {
2858
+ "epoch": 1.2544,
2859
+ "grad_norm": 0.06641241163015366,
2860
+ "learning_rate": 1.5030626003210274e-05,
2861
+ "loss": 0.0416,
2862
+ "step": 39200
2863
+ },
2864
+ {
2865
+ "epoch": 1.2576,
2866
+ "grad_norm": 0.5495890974998474,
2867
+ "learning_rate": 1.5017784911717497e-05,
2868
+ "loss": 0.0357,
2869
+ "step": 39300
2870
+ },
2871
+ {
2872
+ "epoch": 1.2608,
2873
+ "grad_norm": 0.035381533205509186,
2874
+ "learning_rate": 1.500494382022472e-05,
2875
+ "loss": 0.0577,
2876
+ "step": 39400
2877
+ },
2878
+ {
2879
+ "epoch": 1.264,
2880
+ "grad_norm": 0.03879441320896149,
2881
+ "learning_rate": 1.4992102728731944e-05,
2882
+ "loss": 0.0191,
2883
+ "step": 39500
2884
+ },
2885
+ {
2886
+ "epoch": 1.2671999999999999,
2887
+ "grad_norm": 0.014720222912728786,
2888
+ "learning_rate": 1.4979261637239167e-05,
2889
+ "loss": 0.0423,
2890
+ "step": 39600
2891
+ },
2892
+ {
2893
+ "epoch": 1.2704,
2894
+ "grad_norm": 3.2292592525482178,
2895
+ "learning_rate": 1.496642054574639e-05,
2896
+ "loss": 0.0602,
2897
+ "step": 39700
2898
+ },
2899
+ {
2900
+ "epoch": 1.2736,
2901
+ "grad_norm": 1.6030577421188354,
2902
+ "learning_rate": 1.4953579454253613e-05,
2903
+ "loss": 0.0543,
2904
+ "step": 39800
2905
+ },
2906
+ {
2907
+ "epoch": 1.2768,
2908
+ "grad_norm": 0.031688716262578964,
2909
+ "learning_rate": 1.4940738362760836e-05,
2910
+ "loss": 0.0341,
2911
+ "step": 39900
2912
+ },
2913
+ {
2914
+ "epoch": 1.28,
2915
+ "grad_norm": 9.190576553344727,
2916
+ "learning_rate": 1.492789727126806e-05,
2917
+ "loss": 0.0381,
2918
+ "step": 40000
2919
+ },
2920
+ {
2921
+ "epoch": 1.28,
2922
+ "eval_accuracy": 0.98796,
2923
+ "eval_f1": 0.9879625821520611,
2924
+ "eval_loss": 0.04839452728629112,
2925
+ "eval_precision": 0.9880194851769686,
2926
+ "eval_recall": 0.98796,
2927
+ "eval_runtime": 777.527,
2928
+ "eval_samples_per_second": 128.613,
2929
+ "eval_steps_per_second": 8.038,
2930
+ "step": 40000
2931
  }
2932
  ],
2933
  "logging_steps": 100,
 
2942
  "early_stopping_threshold": 0.0
2943
  },
2944
  "attributes": {
2945
+ "early_stopping_patience_counter": 1
2946
  }
2947
  },
2948
  "TrainerControl": {
 
2956
  "attributes": {}
2957
  }
2958
  },
2959
+ "total_flos": 1.698520718775022e+17,
2960
  "train_batch_size": 16,
2961
  "trial_name": null,
2962
  "trial_params": null