azherali commited on
Commit
b9d773d
·
verified ·
1 Parent(s): 468f250

Training in progress, step 32000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:975b3db65f8ee4e6188b6e86efc13e7b0764db49eec13babdd30b47f2f1608b9
3
  size 3555504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f5cafd52307ebb0d4ad02660f4fa895f4176ab48ccb18d8698902129b3b1209
3
  size 3555504
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b11ba4eb38c9365e25a19aa1849379c94eb8123f5f95fdedf2db233422df02e9
3
  size 7141515
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26d3696bade1cccaee17e0a3be137fdd3e3380bab51d20a657d51f20328ef6ef
3
  size 7141515
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c9c0d47048189414b4e2674d62285eb69167766265a633b317b379e2de5cad7
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:819540c05377c6a98947b46ab098a8f4456dd635badbf1ea38d292430af1a819
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a918540d48e856372c74bc697e7d28a86a7fdf24471d9a4adf2ffeeae4217207
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d960d60d0c4ed45cc4422d14d1920d93529483656b6082492613ed8fd865f5f
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4ab9378d6970094f99c33aa1f077f43da045d25f84f4484908cc97d7399e61f
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25361f5602fa6c5f9d3627497e44e73d38bd7e5fabd543717ca1fb3c15c5a95b
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 28000,
3
- "best_metric": 0.9878109998616666,
4
- "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-28000",
5
- "epoch": 0.896,
6
  "eval_steps": 4000,
7
- "global_step": 28000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2052,6 +2052,298 @@
2052
  "eval_samples_per_second": 128.429,
2053
  "eval_steps_per_second": 8.027,
2054
  "step": 28000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2055
  }
2056
  ],
2057
  "logging_steps": 100,
@@ -2080,7 +2372,7 @@
2080
  "attributes": {}
2081
  }
2082
  },
2083
- "total_flos": 1.1889930140587046e+17,
2084
  "train_batch_size": 16,
2085
  "trial_name": null,
2086
  "trial_params": null
 
1
  {
2
+ "best_global_step": 32000,
3
+ "best_metric": 0.9879419419991998,
4
+ "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-32000",
5
+ "epoch": 1.024,
6
  "eval_steps": 4000,
7
+ "global_step": 32000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2052
  "eval_samples_per_second": 128.429,
2053
  "eval_steps_per_second": 8.027,
2054
  "step": 28000
2055
+ },
2056
+ {
2057
+ "epoch": 0.8992,
2058
+ "grad_norm": 0.008244618773460388,
2059
+ "learning_rate": 1.6455987158908508e-05,
2060
+ "loss": 0.041,
2061
+ "step": 28100
2062
+ },
2063
+ {
2064
+ "epoch": 0.9024,
2065
+ "grad_norm": 0.01580977439880371,
2066
+ "learning_rate": 1.6443146067415733e-05,
2067
+ "loss": 0.057,
2068
+ "step": 28200
2069
+ },
2070
+ {
2071
+ "epoch": 0.9056,
2072
+ "grad_norm": 10.525986671447754,
2073
+ "learning_rate": 1.6430304975922954e-05,
2074
+ "loss": 0.0322,
2075
+ "step": 28300
2076
+ },
2077
+ {
2078
+ "epoch": 0.9088,
2079
+ "grad_norm": 0.016806138679385185,
2080
+ "learning_rate": 1.641746388443018e-05,
2081
+ "loss": 0.0523,
2082
+ "step": 28400
2083
+ },
2084
+ {
2085
+ "epoch": 0.912,
2086
+ "grad_norm": 6.311640739440918,
2087
+ "learning_rate": 1.64046227929374e-05,
2088
+ "loss": 0.0497,
2089
+ "step": 28500
2090
+ },
2091
+ {
2092
+ "epoch": 0.9152,
2093
+ "grad_norm": 0.09537007659673691,
2094
+ "learning_rate": 1.6391781701444625e-05,
2095
+ "loss": 0.0523,
2096
+ "step": 28600
2097
+ },
2098
+ {
2099
+ "epoch": 0.9184,
2100
+ "grad_norm": 0.010998793877661228,
2101
+ "learning_rate": 1.6378940609951847e-05,
2102
+ "loss": 0.0481,
2103
+ "step": 28700
2104
+ },
2105
+ {
2106
+ "epoch": 0.9216,
2107
+ "grad_norm": 0.24172453582286835,
2108
+ "learning_rate": 1.636609951845907e-05,
2109
+ "loss": 0.0537,
2110
+ "step": 28800
2111
+ },
2112
+ {
2113
+ "epoch": 0.9248,
2114
+ "grad_norm": 0.00843421183526516,
2115
+ "learning_rate": 1.6353258426966293e-05,
2116
+ "loss": 0.0345,
2117
+ "step": 28900
2118
+ },
2119
+ {
2120
+ "epoch": 0.928,
2121
+ "grad_norm": 26.664979934692383,
2122
+ "learning_rate": 1.6340417335473518e-05,
2123
+ "loss": 0.0667,
2124
+ "step": 29000
2125
+ },
2126
+ {
2127
+ "epoch": 0.9312,
2128
+ "grad_norm": 0.3694003224372864,
2129
+ "learning_rate": 1.632757624398074e-05,
2130
+ "loss": 0.0555,
2131
+ "step": 29100
2132
+ },
2133
+ {
2134
+ "epoch": 0.9344,
2135
+ "grad_norm": 24.855958938598633,
2136
+ "learning_rate": 1.6314735152487964e-05,
2137
+ "loss": 0.058,
2138
+ "step": 29200
2139
+ },
2140
+ {
2141
+ "epoch": 0.9376,
2142
+ "grad_norm": 2.051378011703491,
2143
+ "learning_rate": 1.6301894060995186e-05,
2144
+ "loss": 0.0427,
2145
+ "step": 29300
2146
+ },
2147
+ {
2148
+ "epoch": 0.9408,
2149
+ "grad_norm": 0.8996158838272095,
2150
+ "learning_rate": 1.6289052969502407e-05,
2151
+ "loss": 0.0462,
2152
+ "step": 29400
2153
+ },
2154
+ {
2155
+ "epoch": 0.944,
2156
+ "grad_norm": 14.316937446594238,
2157
+ "learning_rate": 1.6276211878009632e-05,
2158
+ "loss": 0.0658,
2159
+ "step": 29500
2160
+ },
2161
+ {
2162
+ "epoch": 0.9472,
2163
+ "grad_norm": 11.959718704223633,
2164
+ "learning_rate": 1.6263370786516853e-05,
2165
+ "loss": 0.0396,
2166
+ "step": 29600
2167
+ },
2168
+ {
2169
+ "epoch": 0.9504,
2170
+ "grad_norm": 0.015293744392693043,
2171
+ "learning_rate": 1.6250529695024078e-05,
2172
+ "loss": 0.0489,
2173
+ "step": 29700
2174
+ },
2175
+ {
2176
+ "epoch": 0.9536,
2177
+ "grad_norm": 14.037763595581055,
2178
+ "learning_rate": 1.62376886035313e-05,
2179
+ "loss": 0.0746,
2180
+ "step": 29800
2181
+ },
2182
+ {
2183
+ "epoch": 0.9568,
2184
+ "grad_norm": 0.04080112278461456,
2185
+ "learning_rate": 1.6224847512038524e-05,
2186
+ "loss": 0.0612,
2187
+ "step": 29900
2188
+ },
2189
+ {
2190
+ "epoch": 0.96,
2191
+ "grad_norm": 0.06590835005044937,
2192
+ "learning_rate": 1.6212006420545746e-05,
2193
+ "loss": 0.0597,
2194
+ "step": 30000
2195
+ },
2196
+ {
2197
+ "epoch": 0.9632,
2198
+ "grad_norm": 8.91706657409668,
2199
+ "learning_rate": 1.619916532905297e-05,
2200
+ "loss": 0.0489,
2201
+ "step": 30100
2202
+ },
2203
+ {
2204
+ "epoch": 0.9664,
2205
+ "grad_norm": 0.019029080867767334,
2206
+ "learning_rate": 1.6186324237560192e-05,
2207
+ "loss": 0.0572,
2208
+ "step": 30200
2209
+ },
2210
+ {
2211
+ "epoch": 0.9696,
2212
+ "grad_norm": 4.674193382263184,
2213
+ "learning_rate": 1.6173483146067417e-05,
2214
+ "loss": 0.0587,
2215
+ "step": 30300
2216
+ },
2217
+ {
2218
+ "epoch": 0.9728,
2219
+ "grad_norm": 0.14981134235858917,
2220
+ "learning_rate": 1.6160642054574642e-05,
2221
+ "loss": 0.0323,
2222
+ "step": 30400
2223
+ },
2224
+ {
2225
+ "epoch": 0.976,
2226
+ "grad_norm": 4.462047100067139,
2227
+ "learning_rate": 1.6147800963081863e-05,
2228
+ "loss": 0.053,
2229
+ "step": 30500
2230
+ },
2231
+ {
2232
+ "epoch": 0.9792,
2233
+ "grad_norm": 0.10449015349149704,
2234
+ "learning_rate": 1.6134959871589088e-05,
2235
+ "loss": 0.0465,
2236
+ "step": 30600
2237
+ },
2238
+ {
2239
+ "epoch": 0.9824,
2240
+ "grad_norm": 1.134883999824524,
2241
+ "learning_rate": 1.612211878009631e-05,
2242
+ "loss": 0.0534,
2243
+ "step": 30700
2244
+ },
2245
+ {
2246
+ "epoch": 0.9856,
2247
+ "grad_norm": 21.06899070739746,
2248
+ "learning_rate": 1.6109277688603534e-05,
2249
+ "loss": 0.0515,
2250
+ "step": 30800
2251
+ },
2252
+ {
2253
+ "epoch": 0.9888,
2254
+ "grad_norm": 20.929187774658203,
2255
+ "learning_rate": 1.6096436597110756e-05,
2256
+ "loss": 0.046,
2257
+ "step": 30900
2258
+ },
2259
+ {
2260
+ "epoch": 0.992,
2261
+ "grad_norm": 0.008979029022157192,
2262
+ "learning_rate": 1.608359550561798e-05,
2263
+ "loss": 0.0288,
2264
+ "step": 31000
2265
+ },
2266
+ {
2267
+ "epoch": 0.9952,
2268
+ "grad_norm": 10.832496643066406,
2269
+ "learning_rate": 1.6070754414125202e-05,
2270
+ "loss": 0.0533,
2271
+ "step": 31100
2272
+ },
2273
+ {
2274
+ "epoch": 0.9984,
2275
+ "grad_norm": 10.279513359069824,
2276
+ "learning_rate": 1.6057913322632427e-05,
2277
+ "loss": 0.0535,
2278
+ "step": 31200
2279
+ },
2280
+ {
2281
+ "epoch": 1.0016,
2282
+ "grad_norm": 0.050070084631443024,
2283
+ "learning_rate": 1.6045072231139648e-05,
2284
+ "loss": 0.0488,
2285
+ "step": 31300
2286
+ },
2287
+ {
2288
+ "epoch": 1.0048,
2289
+ "grad_norm": 7.610085964202881,
2290
+ "learning_rate": 1.6032231139646873e-05,
2291
+ "loss": 0.0499,
2292
+ "step": 31400
2293
+ },
2294
+ {
2295
+ "epoch": 1.008,
2296
+ "grad_norm": 0.03847907483577728,
2297
+ "learning_rate": 1.6019390048154095e-05,
2298
+ "loss": 0.0668,
2299
+ "step": 31500
2300
+ },
2301
+ {
2302
+ "epoch": 1.0112,
2303
+ "grad_norm": 0.009822272695600986,
2304
+ "learning_rate": 1.600654895666132e-05,
2305
+ "loss": 0.0403,
2306
+ "step": 31600
2307
+ },
2308
+ {
2309
+ "epoch": 1.0144,
2310
+ "grad_norm": 0.1967863291501999,
2311
+ "learning_rate": 1.599370786516854e-05,
2312
+ "loss": 0.0565,
2313
+ "step": 31700
2314
+ },
2315
+ {
2316
+ "epoch": 1.0176,
2317
+ "grad_norm": 8.522165298461914,
2318
+ "learning_rate": 1.5980866773675762e-05,
2319
+ "loss": 0.0298,
2320
+ "step": 31800
2321
+ },
2322
+ {
2323
+ "epoch": 1.0208,
2324
+ "grad_norm": 0.7742573618888855,
2325
+ "learning_rate": 1.5968025682182987e-05,
2326
+ "loss": 0.0411,
2327
+ "step": 31900
2328
+ },
2329
+ {
2330
+ "epoch": 1.024,
2331
+ "grad_norm": 0.07143627107143402,
2332
+ "learning_rate": 1.595518459069021e-05,
2333
+ "loss": 0.0477,
2334
+ "step": 32000
2335
+ },
2336
+ {
2337
+ "epoch": 1.024,
2338
+ "eval_accuracy": 0.98794,
2339
+ "eval_f1": 0.9879419419991998,
2340
+ "eval_loss": 0.04801899939775467,
2341
+ "eval_precision": 0.9879724568651884,
2342
+ "eval_recall": 0.98794,
2343
+ "eval_runtime": 775.4534,
2344
+ "eval_samples_per_second": 128.957,
2345
+ "eval_steps_per_second": 8.06,
2346
+ "step": 32000
2347
  }
2348
  ],
2349
  "logging_steps": 100,
 
2372
  "attributes": {}
2373
  }
2374
  },
2375
+ "total_flos": 1.3588330900845235e+17,
2376
  "train_batch_size": 16,
2377
  "trial_name": null,
2378
  "trial_params": null