FormlessAI commited on
Commit
58f34fa
·
verified ·
1 Parent(s): 814002c

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72c4bc97664d0aa7a098b0c228a22ae32034b0dbf4ab4da13857ed465b5a65d4
3
  size 1172343536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95d60326c2fb5bdc2227ea23053e3756d83dc36ddea29eca5dc260046692f87a
3
  size 1172343536
last-checkpoint/global_step450/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fec4ba427615f9a1ac4682fa6987540c7cc93575c065657c8e241a4fc48b789f
3
+ size 883824229
last-checkpoint/global_step450/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b766c1393f59ceff3753811c1d8399c3c330ea495add89222f44c4e8d0c07e96
3
+ size 883824293
last-checkpoint/global_step450/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a59ea45fd65e36404fddf28a064870b15927977978069b99a6db5b4ea352e11a
3
+ size 883824293
last-checkpoint/global_step450/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:845db1ea96215d11a9fffaeb5e03c37f5e6840004864dd6829827b2e4e4c1662
3
+ size 883824293
last-checkpoint/global_step450/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e664a44aa87d7fd3709530850d512a9b9b569f4be604b4b3cd6efc30a584551
3
+ size 1172522073
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step350
 
1
+ global_step450
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c372628cf0f11a7814089f880131f006d1271095beb5e7152f3c14aa8985e326
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14e0114f828a13cb0ca5e64776036e4737fc28bdf952551895db70f4b43b75fb
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8627b9687b17d3eb42763f7499513d01033a545f5fdc3224442fa88df7b07c1
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a1299b80d6a33e2be3e9ef939d892cd00826cf75bf71605c4233c7da79f587c
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2861b0db544103a2392f7009e235760e91d4f2dcf2605bc9fda62bad0578110
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53a7808ab1ff80dd1fb0aa15bd8b839fe93e027e522f673e2233ef780746be2f
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5661cf15d465afc34868de007aed00c0a576292f6e776fe25a04f040a9501399
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:829ff85ee41653334c647e740ab9be86f7a1e498563365c34f3b1d0c23c443af
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a14f05b7034e739d561e3e8c467aa7c3cfde2b5a09c502a2513b18be6676f89e
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8693d7d5a684e7cf028814f0eb1563a3103edf4073a4bae9acefa6ff4db05daf
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.04868408665060997,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.30401737242128124,
6
  "eval_steps": 50,
7
- "global_step": 350,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1170,6 +1170,338 @@
1170
  "eval_samples_per_second": 45.201,
1171
  "eval_steps_per_second": 2.833,
1172
  "step": 350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1173
  }
1174
  ],
1175
  "logging_steps": 5,
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.04127497971057892,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.39087947882736157,
6
  "eval_steps": 50,
7
+ "global_step": 450,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1170
  "eval_samples_per_second": 45.201,
1171
  "eval_steps_per_second": 2.833,
1172
  "step": 350
1173
+ },
1174
+ {
1175
+ "epoch": 0.30836047774158526,
1176
+ "grad_norm": 2.1560702323913574,
1177
+ "learning_rate": 2.5948633996090076e-05,
1178
+ "logits/chosen": -0.648144543170929,
1179
+ "logits/rejected": -1.063085913658142,
1180
+ "logps/chosen": -104.30000305175781,
1181
+ "logps/rejected": -205.97500610351562,
1182
+ "loss": 0.0555,
1183
+ "rewards/accuracies": 0.979687511920929,
1184
+ "rewards/chosen": -3.25,
1185
+ "rewards/margins": 14.009374618530273,
1186
+ "rewards/rejected": -17.251562118530273,
1187
+ "step": 355
1188
+ },
1189
+ {
1190
+ "epoch": 0.3127035830618892,
1191
+ "grad_norm": 0.7994560599327087,
1192
+ "learning_rate": 2.5946957521029034e-05,
1193
+ "logits/chosen": -0.589550793170929,
1194
+ "logits/rejected": -1.0056641101837158,
1195
+ "logps/chosen": -102.1875,
1196
+ "logps/rejected": -196.85000610351562,
1197
+ "loss": 0.0448,
1198
+ "rewards/accuracies": 0.987500011920929,
1199
+ "rewards/chosen": -2.729687452316284,
1200
+ "rewards/margins": 12.6953125,
1201
+ "rewards/rejected": -15.425000190734863,
1202
+ "step": 360
1203
+ },
1204
+ {
1205
+ "epoch": 0.31704668838219324,
1206
+ "grad_norm": 1.350748896598816,
1207
+ "learning_rate": 2.5945248440469054e-05,
1208
+ "logits/chosen": -0.571044921875,
1209
+ "logits/rejected": -1.0046875476837158,
1210
+ "logps/chosen": -102.25,
1211
+ "logps/rejected": -198.10000610351562,
1212
+ "loss": 0.0445,
1213
+ "rewards/accuracies": 0.981249988079071,
1214
+ "rewards/chosen": -3.49609375,
1215
+ "rewards/margins": 12.8671875,
1216
+ "rewards/rejected": -16.371875762939453,
1217
+ "step": 365
1218
+ },
1219
+ {
1220
+ "epoch": 0.32138979370249726,
1221
+ "grad_norm": 2.794750452041626,
1222
+ "learning_rate": 2.594350675871275e-05,
1223
+ "logits/chosen": -0.563232421875,
1224
+ "logits/rejected": -0.991992175579071,
1225
+ "logps/chosen": -111.98750305175781,
1226
+ "logps/rejected": -219.9499969482422,
1227
+ "loss": 0.0467,
1228
+ "rewards/accuracies": 0.981249988079071,
1229
+ "rewards/chosen": -4.977734565734863,
1230
+ "rewards/margins": 14.850000381469727,
1231
+ "rewards/rejected": -19.828125,
1232
+ "step": 370
1233
+ },
1234
+ {
1235
+ "epoch": 0.3257328990228013,
1236
+ "grad_norm": 0.9325956106185913,
1237
+ "learning_rate": 2.5941732480144794e-05,
1238
+ "logits/chosen": -0.5328613519668579,
1239
+ "logits/rejected": -0.937207043170929,
1240
+ "logps/chosen": -110.8375015258789,
1241
+ "logps/rejected": -211.3000030517578,
1242
+ "loss": 0.0442,
1243
+ "rewards/accuracies": 0.984375,
1244
+ "rewards/chosen": -4.219140529632568,
1245
+ "rewards/margins": 14.128125190734863,
1246
+ "rewards/rejected": -18.346874237060547,
1247
+ "step": 375
1248
+ },
1249
+ {
1250
+ "epoch": 0.3300760043431053,
1251
+ "grad_norm": 1.3923671245574951,
1252
+ "learning_rate": 2.593992560923194e-05,
1253
+ "logits/chosen": -0.5045410394668579,
1254
+ "logits/rejected": -0.908007800579071,
1255
+ "logps/chosen": -109.01249694824219,
1256
+ "logps/rejected": -205.22500610351562,
1257
+ "loss": 0.0491,
1258
+ "rewards/accuracies": 0.979687511920929,
1259
+ "rewards/chosen": -3.807812452316284,
1260
+ "rewards/margins": 13.123437881469727,
1261
+ "rewards/rejected": -16.9296875,
1262
+ "step": 380
1263
+ },
1264
+ {
1265
+ "epoch": 0.3344191096634093,
1266
+ "grad_norm": 0.972490668296814,
1267
+ "learning_rate": 2.5938086150522983e-05,
1268
+ "logits/chosen": -0.547314465045929,
1269
+ "logits/rejected": -0.972460925579071,
1270
+ "logps/chosen": -108.2125015258789,
1271
+ "logps/rejected": -211.27499389648438,
1272
+ "loss": 0.0426,
1273
+ "rewards/accuracies": 0.987500011920929,
1274
+ "rewards/chosen": -3.686328172683716,
1275
+ "rewards/margins": 13.806249618530273,
1276
+ "rewards/rejected": -17.496875762939453,
1277
+ "step": 385
1278
+ },
1279
+ {
1280
+ "epoch": 0.33876221498371334,
1281
+ "grad_norm": 1.590657353401184,
1282
+ "learning_rate": 2.5936214108648753e-05,
1283
+ "logits/chosen": -0.5133301019668579,
1284
+ "logits/rejected": -0.924511730670929,
1285
+ "logps/chosen": -105.9625015258789,
1286
+ "logps/rejected": -202.375,
1287
+ "loss": 0.0422,
1288
+ "rewards/accuracies": 0.984375,
1289
+ "rewards/chosen": -3.657031297683716,
1290
+ "rewards/margins": 12.839062690734863,
1291
+ "rewards/rejected": -16.495311737060547,
1292
+ "step": 390
1293
+ },
1294
+ {
1295
+ "epoch": 0.34310532030401736,
1296
+ "grad_norm": 0.83821702003479,
1297
+ "learning_rate": 2.5934309488322115e-05,
1298
+ "logits/chosen": -0.47832030057907104,
1299
+ "logits/rejected": -0.9600585699081421,
1300
+ "logps/chosen": -103.4375,
1301
+ "logps/rejected": -204.75,
1302
+ "loss": 0.042,
1303
+ "rewards/accuracies": 0.987500011920929,
1304
+ "rewards/chosen": -3.2427735328674316,
1305
+ "rewards/margins": 13.449999809265137,
1306
+ "rewards/rejected": -16.6953125,
1307
+ "step": 395
1308
+ },
1309
+ {
1310
+ "epoch": 0.3474484256243214,
1311
+ "grad_norm": 3.6917247772216797,
1312
+ "learning_rate": 2.5932372294337945e-05,
1313
+ "logits/chosen": -0.5101073980331421,
1314
+ "logits/rejected": -0.9908202886581421,
1315
+ "logps/chosen": -107.4124984741211,
1316
+ "logps/rejected": -211.47500610351562,
1317
+ "loss": 0.0661,
1318
+ "rewards/accuracies": 0.9781249761581421,
1319
+ "rewards/chosen": -3.83984375,
1320
+ "rewards/margins": 13.987500190734863,
1321
+ "rewards/rejected": -17.837499618530273,
1322
+ "step": 400
1323
+ },
1324
+ {
1325
+ "epoch": 0.3474484256243214,
1326
+ "eval_logits/chosen": -0.45194748044013977,
1327
+ "eval_logits/rejected": -0.9559031128883362,
1328
+ "eval_logps/chosen": -107.63176727294922,
1329
+ "eval_logps/rejected": -211.56678771972656,
1330
+ "eval_loss": 0.05772905796766281,
1331
+ "eval_rewards/accuracies": 0.9812725782394409,
1332
+ "eval_rewards/chosen": -3.8446807861328125,
1333
+ "eval_rewards/margins": 14.318818092346191,
1334
+ "eval_rewards/rejected": -18.15907096862793,
1335
+ "eval_runtime": 97.8489,
1336
+ "eval_samples_per_second": 45.172,
1337
+ "eval_steps_per_second": 2.831,
1338
+ "step": 400
1339
+ },
1340
+ {
1341
+ "epoch": 0.3517915309446254,
1342
+ "grad_norm": 2.334516763687134,
1343
+ "learning_rate": 2.5930402531573135e-05,
1344
+ "logits/chosen": -0.42927247285842896,
1345
+ "logits/rejected": -0.9527343511581421,
1346
+ "logps/chosen": -108.92500305175781,
1347
+ "logps/rejected": -213.22500610351562,
1348
+ "loss": 0.0561,
1349
+ "rewards/accuracies": 0.9765625,
1350
+ "rewards/chosen": -3.4814453125,
1351
+ "rewards/margins": 14.425000190734863,
1352
+ "rewards/rejected": -17.8984375,
1353
+ "step": 405
1354
+ },
1355
+ {
1356
+ "epoch": 0.3561346362649294,
1357
+ "grad_norm": 1.1270828247070312,
1358
+ "learning_rate": 2.5928400204986555e-05,
1359
+ "logits/chosen": -0.3331298828125,
1360
+ "logits/rejected": -0.8285156488418579,
1361
+ "logps/chosen": -103.2125015258789,
1362
+ "logps/rejected": -201.72500610351562,
1363
+ "loss": 0.0509,
1364
+ "rewards/accuracies": 0.981249988079071,
1365
+ "rewards/chosen": -2.831835985183716,
1366
+ "rewards/margins": 13.015625,
1367
+ "rewards/rejected": -15.84375,
1368
+ "step": 410
1369
+ },
1370
+ {
1371
+ "epoch": 0.36047774158523344,
1372
+ "grad_norm": 0.4610127806663513,
1373
+ "learning_rate": 2.5926365319619057e-05,
1374
+ "logits/chosen": -0.23121948540210724,
1375
+ "logits/rejected": -0.729052722454071,
1376
+ "logps/chosen": -99.9000015258789,
1377
+ "logps/rejected": -193.75,
1378
+ "loss": 0.036,
1379
+ "rewards/accuracies": 0.987500011920929,
1380
+ "rewards/chosen": -2.1563477516174316,
1381
+ "rewards/margins": 12.543749809265137,
1382
+ "rewards/rejected": -14.701562881469727,
1383
+ "step": 415
1384
+ },
1385
+ {
1386
+ "epoch": 0.36482084690553745,
1387
+ "grad_norm": 0.6297826170921326,
1388
+ "learning_rate": 2.592429788059347e-05,
1389
+ "logits/chosen": -0.2296913117170334,
1390
+ "logits/rejected": -0.687695324420929,
1391
+ "logps/chosen": -99.42500305175781,
1392
+ "logps/rejected": -192.1750030517578,
1393
+ "loss": 0.06,
1394
+ "rewards/accuracies": 0.9781249761581421,
1395
+ "rewards/chosen": -2.623828172683716,
1396
+ "rewards/margins": 12.537500381469727,
1397
+ "rewards/rejected": -15.157812118530273,
1398
+ "step": 420
1399
+ },
1400
+ {
1401
+ "epoch": 0.3691639522258415,
1402
+ "grad_norm": 0.379153311252594,
1403
+ "learning_rate": 2.592219789311456e-05,
1404
+ "logits/chosen": -0.2949768006801605,
1405
+ "logits/rejected": -0.736132800579071,
1406
+ "logps/chosen": -105.38749694824219,
1407
+ "logps/rejected": -202.52499389648438,
1408
+ "loss": 0.0345,
1409
+ "rewards/accuracies": 0.989062488079071,
1410
+ "rewards/chosen": -3.126757860183716,
1411
+ "rewards/margins": 13.285937309265137,
1412
+ "rewards/rejected": -16.415624618530273,
1413
+ "step": 425
1414
+ },
1415
+ {
1416
+ "epoch": 0.3735070575461455,
1417
+ "grad_norm": 1.750226616859436,
1418
+ "learning_rate": 2.592006536246905e-05,
1419
+ "logits/chosen": -0.30212098360061646,
1420
+ "logits/rejected": -0.7582031488418579,
1421
+ "logps/chosen": -104.73750305175781,
1422
+ "logps/rejected": -200.22500610351562,
1423
+ "loss": 0.051,
1424
+ "rewards/accuracies": 0.979687511920929,
1425
+ "rewards/chosen": -3.115234375,
1426
+ "rewards/margins": 13.2265625,
1427
+ "rewards/rejected": -16.332813262939453,
1428
+ "step": 430
1429
+ },
1430
+ {
1431
+ "epoch": 0.3778501628664495,
1432
+ "grad_norm": 1.2224334478378296,
1433
+ "learning_rate": 2.5917900294025585e-05,
1434
+ "logits/chosen": -0.24736633896827698,
1435
+ "logits/rejected": -0.712597668170929,
1436
+ "logps/chosen": -101.94999694824219,
1437
+ "logps/rejected": -192.72500610351562,
1438
+ "loss": 0.0391,
1439
+ "rewards/accuracies": 0.981249988079071,
1440
+ "rewards/chosen": -2.7787108421325684,
1441
+ "rewards/margins": 12.251562118530273,
1442
+ "rewards/rejected": -15.020312309265137,
1443
+ "step": 435
1444
+ },
1445
+ {
1446
+ "epoch": 0.38219326818675353,
1447
+ "grad_norm": 0.9052988886833191,
1448
+ "learning_rate": 2.5915702693234714e-05,
1449
+ "logits/chosen": -0.2435302734375,
1450
+ "logits/rejected": -0.708789050579071,
1451
+ "logps/chosen": -97.75,
1452
+ "logps/rejected": -188.5,
1453
+ "loss": 0.0276,
1454
+ "rewards/accuracies": 0.989062488079071,
1455
+ "rewards/chosen": -2.202831983566284,
1456
+ "rewards/margins": 11.854687690734863,
1457
+ "rewards/rejected": -14.059374809265137,
1458
+ "step": 440
1459
+ },
1460
+ {
1461
+ "epoch": 0.38653637350705755,
1462
+ "grad_norm": 1.0507088899612427,
1463
+ "learning_rate": 2.59134725656289e-05,
1464
+ "logits/chosen": -0.25420379638671875,
1465
+ "logits/rejected": -0.6980956792831421,
1466
+ "logps/chosen": -100.5250015258789,
1467
+ "logps/rejected": -193.5,
1468
+ "loss": 0.0352,
1469
+ "rewards/accuracies": 0.9859374761581421,
1470
+ "rewards/chosen": -2.911425828933716,
1471
+ "rewards/margins": 12.381250381469727,
1472
+ "rewards/rejected": -15.293749809265137,
1473
+ "step": 445
1474
+ },
1475
+ {
1476
+ "epoch": 0.39087947882736157,
1477
+ "grad_norm": 1.0869028568267822,
1478
+ "learning_rate": 2.5911209916822487e-05,
1479
+ "logits/chosen": -0.2699432373046875,
1480
+ "logits/rejected": -0.7374023199081421,
1481
+ "logps/chosen": -104.23750305175781,
1482
+ "logps/rejected": -201.0,
1483
+ "loss": 0.0237,
1484
+ "rewards/accuracies": 0.989062488079071,
1485
+ "rewards/chosen": -3.2496094703674316,
1486
+ "rewards/margins": 12.903124809265137,
1487
+ "rewards/rejected": -16.1484375,
1488
+ "step": 450
1489
+ },
1490
+ {
1491
+ "epoch": 0.39087947882736157,
1492
+ "eval_logits/chosen": -0.2738350033760071,
1493
+ "eval_logits/rejected": -0.7211896181106567,
1494
+ "eval_logps/chosen": -103.11913299560547,
1495
+ "eval_logps/rejected": -197.62094116210938,
1496
+ "eval_loss": 0.04127497971057892,
1497
+ "eval_rewards/accuracies": 0.984882652759552,
1498
+ "eval_rewards/chosen": -3.015328884124756,
1499
+ "eval_rewards/margins": 12.59047794342041,
1500
+ "eval_rewards/rejected": -15.60582160949707,
1501
+ "eval_runtime": 97.5164,
1502
+ "eval_samples_per_second": 45.326,
1503
+ "eval_steps_per_second": 2.841,
1504
+ "step": 450
1505
  }
1506
  ],
1507
  "logging_steps": 5,