Muhammed164 commited on
Commit
fe6fd17
·
verified ·
1 Parent(s): d7e630f

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d24df5eae34ec3fdf38b1f2b6c298e42ad8fb07bed978b6111473d86177209f2
3
  size 204500912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:268aa3d2814a792a1ce12fc0ee5a43e0bc3f4dfbe66bca24ad57492c892f8b91
3
  size 204500912
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:789ad1dcf0738c2b103827b3bf6b47de957c032a3a1cd442af3b12552cfbbd4f
3
  size 104062923
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d14bdbb174576769aa6486b61934c2015edc41a72d409074143c0b546c4f989b
3
  size 104062923
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d72352d4b2a5c34b9f8314745f4274a6ceaf25c7fd28797dfe03562830d2faba
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf3f9c1ea54f8f95e6812b6b4e99596105233cd3e123554db760e4aba93f83e4
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 2.122299767364573,
6
  "eval_steps": 500,
7
- "global_step": 800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1208,6 +1208,306 @@
1208
  "rewards/margins": 159.31773376464844,
1209
  "rewards/rejected": -160.49208068847656,
1210
  "step": 800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1211
  }
1212
  ],
1213
  "logging_steps": 10,
@@ -1222,7 +1522,7 @@
1222
  "should_evaluate": false,
1223
  "should_log": false,
1224
  "should_save": true,
1225
- "should_training_stop": false
1226
  },
1227
  "attributes": {}
1228
  }
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.6540378863409773,
6
  "eval_steps": 500,
7
+ "global_step": 1000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1208
  "rewards/margins": 159.31773376464844,
1209
  "rewards/rejected": -160.49208068847656,
1210
  "step": 800
1211
+ },
1212
+ {
1213
+ "epoch": 2.148886673313393,
1214
+ "grad_norm": 0.0003583618381526321,
1215
+ "learning_rate": 2.122222222222222e-07,
1216
+ "logits/chosen": 7.2483720779418945,
1217
+ "logits/rejected": 7.788289546966553,
1218
+ "logps/chosen": -497.52069091796875,
1219
+ "logps/rejected": -1158.642822265625,
1220
+ "loss": 0.1160581350326538,
1221
+ "rewards/accuracies": 0.96875,
1222
+ "rewards/chosen": 2.365230083465576,
1223
+ "rewards/margins": 146.15255737304688,
1224
+ "rewards/rejected": -143.78732299804688,
1225
+ "step": 810
1226
+ },
1227
+ {
1228
+ "epoch": 2.1754735792622135,
1229
+ "grad_norm": 6.3310980796813965,
1230
+ "learning_rate": 2.011111111111111e-07,
1231
+ "logits/chosen": 6.909984588623047,
1232
+ "logits/rejected": 7.344359397888184,
1233
+ "logps/chosen": -432.39764404296875,
1234
+ "logps/rejected": -1212.4569091796875,
1235
+ "loss": 0.9685474395751953,
1236
+ "rewards/accuracies": 0.9437500238418579,
1237
+ "rewards/chosen": -2.124849319458008,
1238
+ "rewards/margins": 155.05654907226562,
1239
+ "rewards/rejected": -157.18141174316406,
1240
+ "step": 820
1241
+ },
1242
+ {
1243
+ "epoch": 2.2020604852110335,
1244
+ "grad_norm": 5.151050697094206e-09,
1245
+ "learning_rate": 1.8999999999999998e-07,
1246
+ "logits/chosen": 7.006634712219238,
1247
+ "logits/rejected": 7.5766754150390625,
1248
+ "logps/chosen": -431.0802307128906,
1249
+ "logps/rejected": -1220.452392578125,
1250
+ "loss": 1.1500192642211915,
1251
+ "rewards/accuracies": 0.9750000238418579,
1252
+ "rewards/chosen": -2.019794464111328,
1253
+ "rewards/margins": 152.16506958007812,
1254
+ "rewards/rejected": -154.18484497070312,
1255
+ "step": 830
1256
+ },
1257
+ {
1258
+ "epoch": 2.228647391159854,
1259
+ "grad_norm": 1.951496702049138e-18,
1260
+ "learning_rate": 1.7888888888888887e-07,
1261
+ "logits/chosen": 6.816000938415527,
1262
+ "logits/rejected": 7.375506401062012,
1263
+ "logps/chosen": -439.57891845703125,
1264
+ "logps/rejected": -1222.27001953125,
1265
+ "loss": 0.3972776889801025,
1266
+ "rewards/accuracies": 0.987500011920929,
1267
+ "rewards/chosen": 1.9101593494415283,
1268
+ "rewards/margins": 158.9185333251953,
1269
+ "rewards/rejected": -157.0083770751953,
1270
+ "step": 840
1271
+ },
1272
+ {
1273
+ "epoch": 2.255234297108674,
1274
+ "grad_norm": 2.449645117964328e-15,
1275
+ "learning_rate": 1.6777777777777778e-07,
1276
+ "logits/chosen": 7.166296482086182,
1277
+ "logits/rejected": 7.5857744216918945,
1278
+ "logps/chosen": -484.2479553222656,
1279
+ "logps/rejected": -1235.645263671875,
1280
+ "loss": 0.15833470821380616,
1281
+ "rewards/accuracies": 0.9750000238418579,
1282
+ "rewards/chosen": -0.6545869708061218,
1283
+ "rewards/margins": 155.0919952392578,
1284
+ "rewards/rejected": -155.74655151367188,
1285
+ "step": 850
1286
+ },
1287
+ {
1288
+ "epoch": 2.2818212030574943,
1289
+ "grad_norm": 67.49964141845703,
1290
+ "learning_rate": 1.5666666666666667e-07,
1291
+ "logits/chosen": 6.9471001625061035,
1292
+ "logits/rejected": 7.408398628234863,
1293
+ "logps/chosen": -406.9446105957031,
1294
+ "logps/rejected": -1206.536376953125,
1295
+ "loss": 0.3223508358001709,
1296
+ "rewards/accuracies": 0.9437500238418579,
1297
+ "rewards/chosen": 4.314828395843506,
1298
+ "rewards/margins": 160.91775512695312,
1299
+ "rewards/rejected": -156.60293579101562,
1300
+ "step": 860
1301
+ },
1302
+ {
1303
+ "epoch": 2.308408109006314,
1304
+ "grad_norm": 3.4588420021464117e-06,
1305
+ "learning_rate": 1.4555555555555555e-07,
1306
+ "logits/chosen": 6.990222930908203,
1307
+ "logits/rejected": 7.685202598571777,
1308
+ "logps/chosen": -426.66973876953125,
1309
+ "logps/rejected": -1176.889404296875,
1310
+ "loss": 0.8611475944519043,
1311
+ "rewards/accuracies": 0.9624999761581421,
1312
+ "rewards/chosen": 1.3860576152801514,
1313
+ "rewards/margins": 144.15267944335938,
1314
+ "rewards/rejected": -142.76663208007812,
1315
+ "step": 870
1316
+ },
1317
+ {
1318
+ "epoch": 2.3349950149551346,
1319
+ "grad_norm": 4.1328581182331625e-12,
1320
+ "learning_rate": 1.3444444444444444e-07,
1321
+ "logits/chosen": 7.256162166595459,
1322
+ "logits/rejected": 7.685450553894043,
1323
+ "logps/chosen": -462.0904846191406,
1324
+ "logps/rejected": -1166.178466796875,
1325
+ "loss": 0.024902737140655516,
1326
+ "rewards/accuracies": 0.987500011920929,
1327
+ "rewards/chosen": 1.212837815284729,
1328
+ "rewards/margins": 140.08041381835938,
1329
+ "rewards/rejected": -138.8675537109375,
1330
+ "step": 880
1331
+ },
1332
+ {
1333
+ "epoch": 2.361581920903955,
1334
+ "grad_norm": 22.725154876708984,
1335
+ "learning_rate": 1.2333333333333333e-07,
1336
+ "logits/chosen": 7.431256294250488,
1337
+ "logits/rejected": 7.865132808685303,
1338
+ "logps/chosen": -456.8827209472656,
1339
+ "logps/rejected": -1153.871337890625,
1340
+ "loss": 0.13207526206970216,
1341
+ "rewards/accuracies": 0.981249988079071,
1342
+ "rewards/chosen": 1.6622031927108765,
1343
+ "rewards/margins": 136.24082946777344,
1344
+ "rewards/rejected": -134.57862854003906,
1345
+ "step": 890
1346
+ },
1347
+ {
1348
+ "epoch": 2.388168826852775,
1349
+ "grad_norm": 132.83956909179688,
1350
+ "learning_rate": 1.1222222222222221e-07,
1351
+ "logits/chosen": 7.010849952697754,
1352
+ "logits/rejected": 7.441749572753906,
1353
+ "logps/chosen": -502.49371337890625,
1354
+ "logps/rejected": -1215.2733154296875,
1355
+ "loss": 0.5922121524810791,
1356
+ "rewards/accuracies": 0.9750000238418579,
1357
+ "rewards/chosen": -2.6824889183044434,
1358
+ "rewards/margins": 148.62466430664062,
1359
+ "rewards/rejected": -151.30715942382812,
1360
+ "step": 900
1361
+ },
1362
+ {
1363
+ "epoch": 2.4147557328015954,
1364
+ "grad_norm": 0.005245895590633154,
1365
+ "learning_rate": 1.011111111111111e-07,
1366
+ "logits/chosen": 6.980523109436035,
1367
+ "logits/rejected": 7.430232048034668,
1368
+ "logps/chosen": -470.41253662109375,
1369
+ "logps/rejected": -1160.4951171875,
1370
+ "loss": 1.195225143432617,
1371
+ "rewards/accuracies": 0.949999988079071,
1372
+ "rewards/chosen": -3.2118802070617676,
1373
+ "rewards/margins": 144.29278564453125,
1374
+ "rewards/rejected": -147.50466918945312,
1375
+ "step": 910
1376
+ },
1377
+ {
1378
+ "epoch": 2.4413426387504154,
1379
+ "grad_norm": 194.52578735351562,
1380
+ "learning_rate": 9e-08,
1381
+ "logits/chosen": 6.884810447692871,
1382
+ "logits/rejected": 7.503731727600098,
1383
+ "logps/chosen": -440.31976318359375,
1384
+ "logps/rejected": -1206.906494140625,
1385
+ "loss": 0.44526066780090334,
1386
+ "rewards/accuracies": 0.956250011920929,
1387
+ "rewards/chosen": 1.156048059463501,
1388
+ "rewards/margins": 149.27732849121094,
1389
+ "rewards/rejected": -148.1212921142578,
1390
+ "step": 920
1391
+ },
1392
+ {
1393
+ "epoch": 2.4679295446992358,
1394
+ "grad_norm": 1.5737574004387467e-14,
1395
+ "learning_rate": 7.888888888888889e-08,
1396
+ "logits/chosen": 7.322862148284912,
1397
+ "logits/rejected": 7.748003959655762,
1398
+ "logps/chosen": -508.33245849609375,
1399
+ "logps/rejected": -1189.603759765625,
1400
+ "loss": 0.18692436218261718,
1401
+ "rewards/accuracies": 0.9750000238418579,
1402
+ "rewards/chosen": 1.148808479309082,
1403
+ "rewards/margins": 146.20956420898438,
1404
+ "rewards/rejected": -145.06076049804688,
1405
+ "step": 930
1406
+ },
1407
+ {
1408
+ "epoch": 2.4945164506480557,
1409
+ "grad_norm": 0.22959347069263458,
1410
+ "learning_rate": 6.777777777777778e-08,
1411
+ "logits/chosen": 7.375940799713135,
1412
+ "logits/rejected": 7.710402011871338,
1413
+ "logps/chosen": -490.12384033203125,
1414
+ "logps/rejected": -1171.1483154296875,
1415
+ "loss": 0.27915282249450685,
1416
+ "rewards/accuracies": 0.9624999761581421,
1417
+ "rewards/chosen": 1.145845651626587,
1418
+ "rewards/margins": 140.29800415039062,
1419
+ "rewards/rejected": -139.1521453857422,
1420
+ "step": 940
1421
+ },
1422
+ {
1423
+ "epoch": 2.521103356596876,
1424
+ "grad_norm": 22.964818954467773,
1425
+ "learning_rate": 5.666666666666666e-08,
1426
+ "logits/chosen": 7.258917331695557,
1427
+ "logits/rejected": 7.766401767730713,
1428
+ "logps/chosen": -467.205322265625,
1429
+ "logps/rejected": -1157.4315185546875,
1430
+ "loss": 1.6006925582885743,
1431
+ "rewards/accuracies": 0.9437500238418579,
1432
+ "rewards/chosen": 0.41268739104270935,
1433
+ "rewards/margins": 135.27273559570312,
1434
+ "rewards/rejected": -134.86004638671875,
1435
+ "step": 950
1436
+ },
1437
+ {
1438
+ "epoch": 2.547690262545696,
1439
+ "grad_norm": 3.8648969441501535e-11,
1440
+ "learning_rate": 4.555555555555556e-08,
1441
+ "logits/chosen": 7.018073081970215,
1442
+ "logits/rejected": 7.558196067810059,
1443
+ "logps/chosen": -449.532958984375,
1444
+ "logps/rejected": -1138.4356689453125,
1445
+ "loss": 0.28522279262542727,
1446
+ "rewards/accuracies": 0.981249988079071,
1447
+ "rewards/chosen": -0.8609614372253418,
1448
+ "rewards/margins": 139.2249298095703,
1449
+ "rewards/rejected": -140.0858917236328,
1450
+ "step": 960
1451
+ },
1452
+ {
1453
+ "epoch": 2.5742771684945165,
1454
+ "grad_norm": 84.71375274658203,
1455
+ "learning_rate": 3.4444444444444444e-08,
1456
+ "logits/chosen": 7.130776405334473,
1457
+ "logits/rejected": 7.609295845031738,
1458
+ "logps/chosen": -438.42694091796875,
1459
+ "logps/rejected": -1249.6336669921875,
1460
+ "loss": 0.4750792980194092,
1461
+ "rewards/accuracies": 0.96875,
1462
+ "rewards/chosen": -1.273829460144043,
1463
+ "rewards/margins": 160.90731811523438,
1464
+ "rewards/rejected": -162.18113708496094,
1465
+ "step": 970
1466
+ },
1467
+ {
1468
+ "epoch": 2.6008640744433364,
1469
+ "grad_norm": 85.9113540649414,
1470
+ "learning_rate": 2.3333333333333334e-08,
1471
+ "logits/chosen": 7.113263130187988,
1472
+ "logits/rejected": 7.686596870422363,
1473
+ "logps/chosen": -434.325439453125,
1474
+ "logps/rejected": -1194.6849365234375,
1475
+ "loss": 0.33106160163879395,
1476
+ "rewards/accuracies": 0.9437500238418579,
1477
+ "rewards/chosen": -1.2038366794586182,
1478
+ "rewards/margins": 146.78440856933594,
1479
+ "rewards/rejected": -147.98825073242188,
1480
+ "step": 980
1481
+ },
1482
+ {
1483
+ "epoch": 2.627450980392157,
1484
+ "grad_norm": 2.4605165866986043e-20,
1485
+ "learning_rate": 1.2222222222222222e-08,
1486
+ "logits/chosen": 7.00030517578125,
1487
+ "logits/rejected": 7.477368354797363,
1488
+ "logps/chosen": -450.455078125,
1489
+ "logps/rejected": -1269.2520751953125,
1490
+ "loss": 0.2776132583618164,
1491
+ "rewards/accuracies": 0.981249988079071,
1492
+ "rewards/chosen": -3.280397891998291,
1493
+ "rewards/margins": 163.57626342773438,
1494
+ "rewards/rejected": -166.85665893554688,
1495
+ "step": 990
1496
+ },
1497
+ {
1498
+ "epoch": 2.6540378863409773,
1499
+ "grad_norm": 80.78559112548828,
1500
+ "learning_rate": 1.111111111111111e-09,
1501
+ "logits/chosen": 7.087013244628906,
1502
+ "logits/rejected": 7.507058620452881,
1503
+ "logps/chosen": -490.28857421875,
1504
+ "logps/rejected": -1223.248046875,
1505
+ "loss": 0.2815593719482422,
1506
+ "rewards/accuracies": 0.9750000238418579,
1507
+ "rewards/chosen": -2.167093276977539,
1508
+ "rewards/margins": 151.61813354492188,
1509
+ "rewards/rejected": -153.78524780273438,
1510
+ "step": 1000
1511
  }
1512
  ],
1513
  "logging_steps": 10,
 
1522
  "should_evaluate": false,
1523
  "should_log": false,
1524
  "should_save": true,
1525
+ "should_training_stop": true
1526
  },
1527
  "attributes": {}
1528
  }