azherali commited on
Commit
edd36ec
·
verified ·
1 Parent(s): b2e086c

Training in progress, step 44000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33f0f667a5193eb4d35b243c5c1df790f53411abe8add627f8801dc7a6e453fb
3
  size 3555504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16dadfb9608dcaa99e56c16537431ef4528e7f7edc4ac58dfea4bb46f7e1c8a9
3
  size 3555504
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:338d656c8c08afe1535666116a30ec6c8bc16d2218bd3572b0ad6095b0a6fc86
3
  size 7141515
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a2d226450cdcebcf2615a1d39959652fe5438e10e2d6cd2cb8d2468a792f8b2
3
  size 7141515
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f75ef4c1eb025b45e99753eb3086841a7a2849ad71d5bd6afa88ad76ffcffe8a
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97462624a2fc53c8574a0620aac025280c9bdbbb7138ff03f47f37018b457bf4
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00b9e9e803d002d2b860a18a7caf54803f3c31024a6925df1ba4a7df5d623e98
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e9a989616374c693d6e283e9a661c77047898be59d9e06a73f69b65c271f395
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90b69642bdda2390c9b2aaa786137796ae481637fe31199c160a1cb107e6720e
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7880c38c083e20dc3aacb94693eef3b1547dc3e69aff0279d80323326c2ebc49
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 36000,
3
- "best_metric": 0.9893807849919393,
4
- "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-36000",
5
- "epoch": 1.28,
6
  "eval_steps": 4000,
7
- "global_step": 40000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2928,6 +2928,298 @@
2928
  "eval_samples_per_second": 128.613,
2929
  "eval_steps_per_second": 8.038,
2930
  "step": 40000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2931
  }
2932
  ],
2933
  "logging_steps": 100,
@@ -2942,7 +3234,7 @@
2942
  "early_stopping_threshold": 0.0
2943
  },
2944
  "attributes": {
2945
- "early_stopping_patience_counter": 1
2946
  }
2947
  },
2948
  "TrainerControl": {
@@ -2956,7 +3248,7 @@
2956
  "attributes": {}
2957
  }
2958
  },
2959
- "total_flos": 1.698520718775022e+17,
2960
  "train_batch_size": 16,
2961
  "trial_name": null,
2962
  "trial_params": null
 
1
  {
2
+ "best_global_step": 44000,
3
+ "best_metric": 0.9900904784547742,
4
+ "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-44000",
5
+ "epoch": 1.408,
6
  "eval_steps": 4000,
7
+ "global_step": 44000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2928
  "eval_samples_per_second": 128.613,
2929
  "eval_steps_per_second": 8.038,
2930
  "step": 40000
2931
+ },
2932
+ {
2933
+ "epoch": 1.2832,
2934
+ "grad_norm": 0.879464328289032,
2935
+ "learning_rate": 1.4915056179775283e-05,
2936
+ "loss": 0.0371,
2937
+ "step": 40100
2938
+ },
2939
+ {
2940
+ "epoch": 1.2864,
2941
+ "grad_norm": 0.5318993926048279,
2942
+ "learning_rate": 1.4902215088282506e-05,
2943
+ "loss": 0.0638,
2944
+ "step": 40200
2945
+ },
2946
+ {
2947
+ "epoch": 1.2896,
2948
+ "grad_norm": 0.024928994476795197,
2949
+ "learning_rate": 1.4889373996789729e-05,
2950
+ "loss": 0.039,
2951
+ "step": 40300
2952
+ },
2953
+ {
2954
+ "epoch": 1.2928,
2955
+ "grad_norm": 15.540450096130371,
2956
+ "learning_rate": 1.4876532905296952e-05,
2957
+ "loss": 0.0392,
2958
+ "step": 40400
2959
+ },
2960
+ {
2961
+ "epoch": 1.296,
2962
+ "grad_norm": 3.986953020095825,
2963
+ "learning_rate": 1.4863691813804175e-05,
2964
+ "loss": 0.0518,
2965
+ "step": 40500
2966
+ },
2967
+ {
2968
+ "epoch": 1.2992,
2969
+ "grad_norm": 0.03195634484291077,
2970
+ "learning_rate": 1.4850850722311398e-05,
2971
+ "loss": 0.0614,
2972
+ "step": 40600
2973
+ },
2974
+ {
2975
+ "epoch": 1.3024,
2976
+ "grad_norm": 0.004710075911134481,
2977
+ "learning_rate": 1.4838009630818621e-05,
2978
+ "loss": 0.0384,
2979
+ "step": 40700
2980
+ },
2981
+ {
2982
+ "epoch": 1.3056,
2983
+ "grad_norm": 0.08971494436264038,
2984
+ "learning_rate": 1.4825168539325845e-05,
2985
+ "loss": 0.051,
2986
+ "step": 40800
2987
+ },
2988
+ {
2989
+ "epoch": 1.3088,
2990
+ "grad_norm": 0.00958671048283577,
2991
+ "learning_rate": 1.4812327447833068e-05,
2992
+ "loss": 0.042,
2993
+ "step": 40900
2994
+ },
2995
+ {
2996
+ "epoch": 1.312,
2997
+ "grad_norm": 26.961130142211914,
2998
+ "learning_rate": 1.4799486356340289e-05,
2999
+ "loss": 0.0512,
3000
+ "step": 41000
3001
+ },
3002
+ {
3003
+ "epoch": 1.3152,
3004
+ "grad_norm": 0.12558290362358093,
3005
+ "learning_rate": 1.4786645264847512e-05,
3006
+ "loss": 0.0262,
3007
+ "step": 41100
3008
+ },
3009
+ {
3010
+ "epoch": 1.3184,
3011
+ "grad_norm": 0.020398223772644997,
3012
+ "learning_rate": 1.4773804173354735e-05,
3013
+ "loss": 0.0504,
3014
+ "step": 41200
3015
+ },
3016
+ {
3017
+ "epoch": 1.3216,
3018
+ "grad_norm": 0.04079282656311989,
3019
+ "learning_rate": 1.4760963081861959e-05,
3020
+ "loss": 0.0467,
3021
+ "step": 41300
3022
+ },
3023
+ {
3024
+ "epoch": 1.3248,
3025
+ "grad_norm": 0.01801035739481449,
3026
+ "learning_rate": 1.4748121990369182e-05,
3027
+ "loss": 0.031,
3028
+ "step": 41400
3029
+ },
3030
+ {
3031
+ "epoch": 1.328,
3032
+ "grad_norm": 19.165552139282227,
3033
+ "learning_rate": 1.4735280898876405e-05,
3034
+ "loss": 0.0425,
3035
+ "step": 41500
3036
+ },
3037
+ {
3038
+ "epoch": 1.3312,
3039
+ "grad_norm": 0.06247144564986229,
3040
+ "learning_rate": 1.4722439807383628e-05,
3041
+ "loss": 0.0377,
3042
+ "step": 41600
3043
+ },
3044
+ {
3045
+ "epoch": 1.3344,
3046
+ "grad_norm": 0.07584625482559204,
3047
+ "learning_rate": 1.4709598715890851e-05,
3048
+ "loss": 0.0318,
3049
+ "step": 41700
3050
+ },
3051
+ {
3052
+ "epoch": 1.3376000000000001,
3053
+ "grad_norm": 0.659372866153717,
3054
+ "learning_rate": 1.4696757624398074e-05,
3055
+ "loss": 0.0392,
3056
+ "step": 41800
3057
+ },
3058
+ {
3059
+ "epoch": 1.3408,
3060
+ "grad_norm": 0.027756713330745697,
3061
+ "learning_rate": 1.4683916532905297e-05,
3062
+ "loss": 0.0518,
3063
+ "step": 41900
3064
+ },
3065
+ {
3066
+ "epoch": 1.3439999999999999,
3067
+ "grad_norm": 0.006904853507876396,
3068
+ "learning_rate": 1.467107544141252e-05,
3069
+ "loss": 0.0456,
3070
+ "step": 42000
3071
+ },
3072
+ {
3073
+ "epoch": 1.3472,
3074
+ "grad_norm": 0.005585466045886278,
3075
+ "learning_rate": 1.4658234349919744e-05,
3076
+ "loss": 0.046,
3077
+ "step": 42100
3078
+ },
3079
+ {
3080
+ "epoch": 1.3504,
3081
+ "grad_norm": 5.473335266113281,
3082
+ "learning_rate": 1.4645393258426967e-05,
3083
+ "loss": 0.0428,
3084
+ "step": 42200
3085
+ },
3086
+ {
3087
+ "epoch": 1.3536000000000001,
3088
+ "grad_norm": 10.384184837341309,
3089
+ "learning_rate": 1.4632552166934192e-05,
3090
+ "loss": 0.0512,
3091
+ "step": 42300
3092
+ },
3093
+ {
3094
+ "epoch": 1.3568,
3095
+ "grad_norm": 4.152897357940674,
3096
+ "learning_rate": 1.4619711075441415e-05,
3097
+ "loss": 0.0378,
3098
+ "step": 42400
3099
+ },
3100
+ {
3101
+ "epoch": 1.3599999999999999,
3102
+ "grad_norm": 0.06695935130119324,
3103
+ "learning_rate": 1.4606869983948638e-05,
3104
+ "loss": 0.0411,
3105
+ "step": 42500
3106
+ },
3107
+ {
3108
+ "epoch": 1.3632,
3109
+ "grad_norm": 21.025299072265625,
3110
+ "learning_rate": 1.4594028892455861e-05,
3111
+ "loss": 0.0373,
3112
+ "step": 42600
3113
+ },
3114
+ {
3115
+ "epoch": 1.3664,
3116
+ "grad_norm": 13.606021881103516,
3117
+ "learning_rate": 1.4581187800963084e-05,
3118
+ "loss": 0.0454,
3119
+ "step": 42700
3120
+ },
3121
+ {
3122
+ "epoch": 1.3696,
3123
+ "grad_norm": 0.17352361977100372,
3124
+ "learning_rate": 1.4568346709470307e-05,
3125
+ "loss": 0.0513,
3126
+ "step": 42800
3127
+ },
3128
+ {
3129
+ "epoch": 1.3728,
3130
+ "grad_norm": 1.2343215942382812,
3131
+ "learning_rate": 1.455550561797753e-05,
3132
+ "loss": 0.0699,
3133
+ "step": 42900
3134
+ },
3135
+ {
3136
+ "epoch": 1.376,
3137
+ "grad_norm": 0.01240515150129795,
3138
+ "learning_rate": 1.4542664526484754e-05,
3139
+ "loss": 0.0402,
3140
+ "step": 43000
3141
+ },
3142
+ {
3143
+ "epoch": 1.3792,
3144
+ "grad_norm": 5.457210063934326,
3145
+ "learning_rate": 1.4529823434991977e-05,
3146
+ "loss": 0.0519,
3147
+ "step": 43100
3148
+ },
3149
+ {
3150
+ "epoch": 1.3824,
3151
+ "grad_norm": 0.8150522708892822,
3152
+ "learning_rate": 1.45169823434992e-05,
3153
+ "loss": 0.0605,
3154
+ "step": 43200
3155
+ },
3156
+ {
3157
+ "epoch": 1.3856,
3158
+ "grad_norm": 0.046282608062028885,
3159
+ "learning_rate": 1.4504141252006421e-05,
3160
+ "loss": 0.0523,
3161
+ "step": 43300
3162
+ },
3163
+ {
3164
+ "epoch": 1.3888,
3165
+ "grad_norm": 0.009331628680229187,
3166
+ "learning_rate": 1.4491300160513644e-05,
3167
+ "loss": 0.0453,
3168
+ "step": 43400
3169
+ },
3170
+ {
3171
+ "epoch": 1.392,
3172
+ "grad_norm": 0.15661238133907318,
3173
+ "learning_rate": 1.4478459069020868e-05,
3174
+ "loss": 0.0303,
3175
+ "step": 43500
3176
+ },
3177
+ {
3178
+ "epoch": 1.3952,
3179
+ "grad_norm": 5.842204570770264,
3180
+ "learning_rate": 1.446561797752809e-05,
3181
+ "loss": 0.0369,
3182
+ "step": 43600
3183
+ },
3184
+ {
3185
+ "epoch": 1.3984,
3186
+ "grad_norm": 32.753719329833984,
3187
+ "learning_rate": 1.4452776886035314e-05,
3188
+ "loss": 0.0423,
3189
+ "step": 43700
3190
+ },
3191
+ {
3192
+ "epoch": 1.4016,
3193
+ "grad_norm": 0.0857323631644249,
3194
+ "learning_rate": 1.4439935794542537e-05,
3195
+ "loss": 0.0452,
3196
+ "step": 43800
3197
+ },
3198
+ {
3199
+ "epoch": 1.4048,
3200
+ "grad_norm": 0.03770207613706589,
3201
+ "learning_rate": 1.442709470304976e-05,
3202
+ "loss": 0.0455,
3203
+ "step": 43900
3204
+ },
3205
+ {
3206
+ "epoch": 1.408,
3207
+ "grad_norm": 0.10206503421068192,
3208
+ "learning_rate": 1.4414253611556983e-05,
3209
+ "loss": 0.0423,
3210
+ "step": 44000
3211
+ },
3212
+ {
3213
+ "epoch": 1.408,
3214
+ "eval_accuracy": 0.99009,
3215
+ "eval_f1": 0.9900904784547742,
3216
+ "eval_loss": 0.03986356034874916,
3217
+ "eval_precision": 0.9900932283159651,
3218
+ "eval_recall": 0.99009,
3219
+ "eval_runtime": 775.1079,
3220
+ "eval_samples_per_second": 129.014,
3221
+ "eval_steps_per_second": 8.063,
3222
+ "step": 44000
3223
  }
3224
  ],
3225
  "logging_steps": 100,
 
3234
  "early_stopping_threshold": 0.0
3235
  },
3236
  "attributes": {
3237
+ "early_stopping_patience_counter": 0
3238
  }
3239
  },
3240
  "TrainerControl": {
 
3248
  "attributes": {}
3249
  }
3250
  },
3251
+ "total_flos": 1.8683917813152307e+17,
3252
  "train_batch_size": 16,
3253
  "trial_name": null,
3254
  "trial_params": null