azherali commited on
Commit
a981687
·
verified ·
1 Parent(s): 1b289d5

Training in progress, step 20000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd57384594eb425dd0bbabba65317a1b5777b6c5b289479078bfbc0a2b10c7cd
3
  size 3555504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bedaf2b834c17d85aceaca7862c24d85d8d872107727528ad8cce968c65457d
3
  size 3555504
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a90fca018f105f0c4de5bd49a9f37f48eb4343bbd82fa5c86a766904ed07780
3
  size 7141515
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b774de3046f6c9826032946233d1806ab5688b30b35bf69f6e2c64418b1af07
3
  size 7141515
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60e040819bef00bdfb5631475cda110717053ec4d4c4c67d6781ad7edccde1fd
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2eaf494bdb773a56e55acb93767a950deb04425b06a47268e2a7e6eb9596f87
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9569b0daf1a454e36aca0fe6fc85ec984df4b90957450731328ceedec7505da8
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d613eca09cb3ed2cbc2c00b1d0538a9bda3f76b75cb69bac8b3cf4fa9b1dda90
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8187ef3352672be0a48a06b17757282db7b3ef79dad63ef57d1187dc8f56fd82
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:524e618937b19fc406beb64b3f4e048c69ed7a9b18cd89552940d0d92765916b
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 16000,
3
- "best_metric": 0.9820134202589396,
4
- "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-16000",
5
- "epoch": 0.512,
6
  "eval_steps": 4000,
7
- "global_step": 16000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1176,6 +1176,298 @@
1176
  "eval_samples_per_second": 121.494,
1177
  "eval_steps_per_second": 7.593,
1178
  "step": 16000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1179
  }
1180
  ],
1181
  "logging_steps": 100,
@@ -1204,7 +1496,7 @@
1204
  "attributes": {}
1205
  }
1206
  },
1207
- "total_flos": 6.79459660326359e+16,
1208
  "train_batch_size": 16,
1209
  "trial_name": null,
1210
  "trial_params": null
 
1
  {
2
+ "best_global_step": 20000,
3
+ "best_metric": 0.9846426496660109,
4
+ "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-20000",
5
+ "epoch": 0.64,
6
  "eval_steps": 4000,
7
+ "global_step": 20000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1176
  "eval_samples_per_second": 121.494,
1177
  "eval_steps_per_second": 7.593,
1178
  "step": 16000
1179
+ },
1180
+ {
1181
+ "epoch": 0.5152,
1182
+ "grad_norm": 8.349530220031738,
1183
+ "learning_rate": 1.7996918138041735e-05,
1184
+ "loss": 0.0724,
1185
+ "step": 16100
1186
+ },
1187
+ {
1188
+ "epoch": 0.5184,
1189
+ "grad_norm": 0.05041235312819481,
1190
+ "learning_rate": 1.7984077046548956e-05,
1191
+ "loss": 0.0428,
1192
+ "step": 16200
1193
+ },
1194
+ {
1195
+ "epoch": 0.5216,
1196
+ "grad_norm": 0.3858475685119629,
1197
+ "learning_rate": 1.797123595505618e-05,
1198
+ "loss": 0.0569,
1199
+ "step": 16300
1200
+ },
1201
+ {
1202
+ "epoch": 0.5248,
1203
+ "grad_norm": 8.561657905578613,
1204
+ "learning_rate": 1.7958394863563403e-05,
1205
+ "loss": 0.0758,
1206
+ "step": 16400
1207
+ },
1208
+ {
1209
+ "epoch": 0.528,
1210
+ "grad_norm": 0.025413183495402336,
1211
+ "learning_rate": 1.7945553772070628e-05,
1212
+ "loss": 0.0548,
1213
+ "step": 16500
1214
+ },
1215
+ {
1216
+ "epoch": 0.5312,
1217
+ "grad_norm": 0.010517638176679611,
1218
+ "learning_rate": 1.7932712680577852e-05,
1219
+ "loss": 0.0706,
1220
+ "step": 16600
1221
+ },
1222
+ {
1223
+ "epoch": 0.5344,
1224
+ "grad_norm": 0.08805025368928909,
1225
+ "learning_rate": 1.7919871589085074e-05,
1226
+ "loss": 0.077,
1227
+ "step": 16700
1228
+ },
1229
+ {
1230
+ "epoch": 0.5376,
1231
+ "grad_norm": 5.7931342124938965,
1232
+ "learning_rate": 1.79070304975923e-05,
1233
+ "loss": 0.0669,
1234
+ "step": 16800
1235
+ },
1236
+ {
1237
+ "epoch": 0.5408,
1238
+ "grad_norm": 0.07078564912080765,
1239
+ "learning_rate": 1.789418940609952e-05,
1240
+ "loss": 0.1069,
1241
+ "step": 16900
1242
+ },
1243
+ {
1244
+ "epoch": 0.544,
1245
+ "grad_norm": 1.9886382818222046,
1246
+ "learning_rate": 1.7881348314606745e-05,
1247
+ "loss": 0.0501,
1248
+ "step": 17000
1249
+ },
1250
+ {
1251
+ "epoch": 0.5472,
1252
+ "grad_norm": 6.3300065994262695,
1253
+ "learning_rate": 1.7868507223113966e-05,
1254
+ "loss": 0.0744,
1255
+ "step": 17100
1256
+ },
1257
+ {
1258
+ "epoch": 0.5504,
1259
+ "grad_norm": 0.18114350736141205,
1260
+ "learning_rate": 1.785566613162119e-05,
1261
+ "loss": 0.0782,
1262
+ "step": 17200
1263
+ },
1264
+ {
1265
+ "epoch": 0.5536,
1266
+ "grad_norm": 0.2821557819843292,
1267
+ "learning_rate": 1.7842825040128413e-05,
1268
+ "loss": 0.0477,
1269
+ "step": 17300
1270
+ },
1271
+ {
1272
+ "epoch": 0.5568,
1273
+ "grad_norm": 17.2164306640625,
1274
+ "learning_rate": 1.7829983948635637e-05,
1275
+ "loss": 0.0522,
1276
+ "step": 17400
1277
+ },
1278
+ {
1279
+ "epoch": 0.56,
1280
+ "grad_norm": 11.891914367675781,
1281
+ "learning_rate": 1.781714285714286e-05,
1282
+ "loss": 0.0859,
1283
+ "step": 17500
1284
+ },
1285
+ {
1286
+ "epoch": 0.5632,
1287
+ "grad_norm": 12.457894325256348,
1288
+ "learning_rate": 1.7804301765650084e-05,
1289
+ "loss": 0.0561,
1290
+ "step": 17600
1291
+ },
1292
+ {
1293
+ "epoch": 0.5664,
1294
+ "grad_norm": 1.845371961593628,
1295
+ "learning_rate": 1.7791460674157305e-05,
1296
+ "loss": 0.0598,
1297
+ "step": 17700
1298
+ },
1299
+ {
1300
+ "epoch": 0.5696,
1301
+ "grad_norm": 5.928323268890381,
1302
+ "learning_rate": 1.7778619582664527e-05,
1303
+ "loss": 0.0591,
1304
+ "step": 17800
1305
+ },
1306
+ {
1307
+ "epoch": 0.5728,
1308
+ "grad_norm": 0.10898735374212265,
1309
+ "learning_rate": 1.776577849117175e-05,
1310
+ "loss": 0.068,
1311
+ "step": 17900
1312
+ },
1313
+ {
1314
+ "epoch": 0.576,
1315
+ "grad_norm": 8.659664154052734,
1316
+ "learning_rate": 1.7752937399678973e-05,
1317
+ "loss": 0.0672,
1318
+ "step": 18000
1319
+ },
1320
+ {
1321
+ "epoch": 0.5792,
1322
+ "grad_norm": 0.1051400676369667,
1323
+ "learning_rate": 1.7740096308186198e-05,
1324
+ "loss": 0.0845,
1325
+ "step": 18100
1326
+ },
1327
+ {
1328
+ "epoch": 0.5824,
1329
+ "grad_norm": 2.4647881984710693,
1330
+ "learning_rate": 1.772725521669342e-05,
1331
+ "loss": 0.067,
1332
+ "step": 18200
1333
+ },
1334
+ {
1335
+ "epoch": 0.5856,
1336
+ "grad_norm": 0.04466241970658302,
1337
+ "learning_rate": 1.7714414125200644e-05,
1338
+ "loss": 0.0657,
1339
+ "step": 18300
1340
+ },
1341
+ {
1342
+ "epoch": 0.5888,
1343
+ "grad_norm": 0.014231475070118904,
1344
+ "learning_rate": 1.7701573033707865e-05,
1345
+ "loss": 0.0666,
1346
+ "step": 18400
1347
+ },
1348
+ {
1349
+ "epoch": 0.592,
1350
+ "grad_norm": 10.516510009765625,
1351
+ "learning_rate": 1.768873194221509e-05,
1352
+ "loss": 0.0692,
1353
+ "step": 18500
1354
+ },
1355
+ {
1356
+ "epoch": 0.5952,
1357
+ "grad_norm": 1.2347426414489746,
1358
+ "learning_rate": 1.767589085072231e-05,
1359
+ "loss": 0.057,
1360
+ "step": 18600
1361
+ },
1362
+ {
1363
+ "epoch": 0.5984,
1364
+ "grad_norm": 0.09256485849618912,
1365
+ "learning_rate": 1.7663049759229537e-05,
1366
+ "loss": 0.0726,
1367
+ "step": 18700
1368
+ },
1369
+ {
1370
+ "epoch": 0.6016,
1371
+ "grad_norm": 9.093379020690918,
1372
+ "learning_rate": 1.7650208667736758e-05,
1373
+ "loss": 0.0902,
1374
+ "step": 18800
1375
+ },
1376
+ {
1377
+ "epoch": 0.6048,
1378
+ "grad_norm": 2.5893750190734863,
1379
+ "learning_rate": 1.7637367576243983e-05,
1380
+ "loss": 0.0611,
1381
+ "step": 18900
1382
+ },
1383
+ {
1384
+ "epoch": 0.608,
1385
+ "grad_norm": 0.708980143070221,
1386
+ "learning_rate": 1.7624526484751204e-05,
1387
+ "loss": 0.0726,
1388
+ "step": 19000
1389
+ },
1390
+ {
1391
+ "epoch": 0.6112,
1392
+ "grad_norm": 0.35581639409065247,
1393
+ "learning_rate": 1.761168539325843e-05,
1394
+ "loss": 0.0681,
1395
+ "step": 19100
1396
+ },
1397
+ {
1398
+ "epoch": 0.6144,
1399
+ "grad_norm": 1.3588510751724243,
1400
+ "learning_rate": 1.759884430176565e-05,
1401
+ "loss": 0.0662,
1402
+ "step": 19200
1403
+ },
1404
+ {
1405
+ "epoch": 0.6176,
1406
+ "grad_norm": 9.142585754394531,
1407
+ "learning_rate": 1.7586003210272875e-05,
1408
+ "loss": 0.0559,
1409
+ "step": 19300
1410
+ },
1411
+ {
1412
+ "epoch": 0.6208,
1413
+ "grad_norm": 16.729188919067383,
1414
+ "learning_rate": 1.7573162118780097e-05,
1415
+ "loss": 0.0754,
1416
+ "step": 19400
1417
+ },
1418
+ {
1419
+ "epoch": 0.624,
1420
+ "grad_norm": 11.582767486572266,
1421
+ "learning_rate": 1.756032102728732e-05,
1422
+ "loss": 0.0681,
1423
+ "step": 19500
1424
+ },
1425
+ {
1426
+ "epoch": 0.6272,
1427
+ "grad_norm": 0.046063363552093506,
1428
+ "learning_rate": 1.7547479935794543e-05,
1429
+ "loss": 0.0603,
1430
+ "step": 19600
1431
+ },
1432
+ {
1433
+ "epoch": 0.6304,
1434
+ "grad_norm": 0.294583261013031,
1435
+ "learning_rate": 1.7534638844301768e-05,
1436
+ "loss": 0.0518,
1437
+ "step": 19700
1438
+ },
1439
+ {
1440
+ "epoch": 0.6336,
1441
+ "grad_norm": 0.014278042130172253,
1442
+ "learning_rate": 1.7521797752808993e-05,
1443
+ "loss": 0.0576,
1444
+ "step": 19800
1445
+ },
1446
+ {
1447
+ "epoch": 0.6368,
1448
+ "grad_norm": 0.16866210103034973,
1449
+ "learning_rate": 1.7508956661316214e-05,
1450
+ "loss": 0.0701,
1451
+ "step": 19900
1452
+ },
1453
+ {
1454
+ "epoch": 0.64,
1455
+ "grad_norm": 6.832259654998779,
1456
+ "learning_rate": 1.749611556982344e-05,
1457
+ "loss": 0.0776,
1458
+ "step": 20000
1459
+ },
1460
+ {
1461
+ "epoch": 0.64,
1462
+ "eval_accuracy": 0.98464,
1463
+ "eval_f1": 0.9846426496660109,
1464
+ "eval_loss": 0.061699531972408295,
1465
+ "eval_precision": 0.9846784488090538,
1466
+ "eval_recall": 0.98464,
1467
+ "eval_runtime": 814.0166,
1468
+ "eval_samples_per_second": 122.848,
1469
+ "eval_steps_per_second": 7.678,
1470
+ "step": 20000
1471
  }
1472
  ],
1473
  "logging_steps": 100,
 
1496
  "attributes": {}
1497
  }
1498
  },
1499
+ "total_flos": 8.492350218891494e+16,
1500
  "train_batch_size": 16,
1501
  "trial_name": null,
1502
  "trial_params": null