Instructions to use Muhammed164/SDPO with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Muhammed164/SDPO with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("Muhammed164/SDPO", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- Unsloth Studio
How to use Muhammed164/SDPO with Unsloth Studio:
Install Unsloth Studio (macOS, Linux, WSL)
curl -fsSL https://unsloth.ai/install.sh | sh # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for Muhammed164/SDPO to start chatting
Install Unsloth Studio (Windows)
irm https://unsloth.ai/install.ps1 | iex # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for Muhammed164/SDPO to start chatting
Using HuggingFace Spaces for Unsloth
# No setup required # Open https://huggingface.co/spaces/unsloth/studio in your browser # Search for Muhammed164/SDPO to start chatting
Load model with FastModel
pip install unsloth from unsloth import FastModel model, tokenizer = FastModel.from_pretrained( model_name="Muhammed164/SDPO", max_seq_length=2048, )
Training in progress, step 1000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 204500912
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:268aa3d2814a792a1ce12fc0ee5a43e0bc3f4dfbe66bca24ad57492c892f8b91
|
| 3 |
size 204500912
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 104062923
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d14bdbb174576769aa6486b61934c2015edc41a72d409074143c0b546c4f989b
|
| 3 |
size 104062923
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf3f9c1ea54f8f95e6812b6b4e99596105233cd3e123554db760e4aba93f83e4
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1208,6 +1208,306 @@
|
|
| 1208 |
"rewards/margins": 159.31773376464844,
|
| 1209 |
"rewards/rejected": -160.49208068847656,
|
| 1210 |
"step": 800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1211 |
}
|
| 1212 |
],
|
| 1213 |
"logging_steps": 10,
|
|
@@ -1222,7 +1522,7 @@
|
|
| 1222 |
"should_evaluate": false,
|
| 1223 |
"should_log": false,
|
| 1224 |
"should_save": true,
|
| 1225 |
-
"should_training_stop":
|
| 1226 |
},
|
| 1227 |
"attributes": {}
|
| 1228 |
}
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 2.6540378863409773,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 1000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1208 |
"rewards/margins": 159.31773376464844,
|
| 1209 |
"rewards/rejected": -160.49208068847656,
|
| 1210 |
"step": 800
|
| 1211 |
+
},
|
| 1212 |
+
{
|
| 1213 |
+
"epoch": 2.148886673313393,
|
| 1214 |
+
"grad_norm": 0.0003583618381526321,
|
| 1215 |
+
"learning_rate": 2.122222222222222e-07,
|
| 1216 |
+
"logits/chosen": 7.2483720779418945,
|
| 1217 |
+
"logits/rejected": 7.788289546966553,
|
| 1218 |
+
"logps/chosen": -497.52069091796875,
|
| 1219 |
+
"logps/rejected": -1158.642822265625,
|
| 1220 |
+
"loss": 0.1160581350326538,
|
| 1221 |
+
"rewards/accuracies": 0.96875,
|
| 1222 |
+
"rewards/chosen": 2.365230083465576,
|
| 1223 |
+
"rewards/margins": 146.15255737304688,
|
| 1224 |
+
"rewards/rejected": -143.78732299804688,
|
| 1225 |
+
"step": 810
|
| 1226 |
+
},
|
| 1227 |
+
{
|
| 1228 |
+
"epoch": 2.1754735792622135,
|
| 1229 |
+
"grad_norm": 6.3310980796813965,
|
| 1230 |
+
"learning_rate": 2.011111111111111e-07,
|
| 1231 |
+
"logits/chosen": 6.909984588623047,
|
| 1232 |
+
"logits/rejected": 7.344359397888184,
|
| 1233 |
+
"logps/chosen": -432.39764404296875,
|
| 1234 |
+
"logps/rejected": -1212.4569091796875,
|
| 1235 |
+
"loss": 0.9685474395751953,
|
| 1236 |
+
"rewards/accuracies": 0.9437500238418579,
|
| 1237 |
+
"rewards/chosen": -2.124849319458008,
|
| 1238 |
+
"rewards/margins": 155.05654907226562,
|
| 1239 |
+
"rewards/rejected": -157.18141174316406,
|
| 1240 |
+
"step": 820
|
| 1241 |
+
},
|
| 1242 |
+
{
|
| 1243 |
+
"epoch": 2.2020604852110335,
|
| 1244 |
+
"grad_norm": 5.151050697094206e-09,
|
| 1245 |
+
"learning_rate": 1.8999999999999998e-07,
|
| 1246 |
+
"logits/chosen": 7.006634712219238,
|
| 1247 |
+
"logits/rejected": 7.5766754150390625,
|
| 1248 |
+
"logps/chosen": -431.0802307128906,
|
| 1249 |
+
"logps/rejected": -1220.452392578125,
|
| 1250 |
+
"loss": 1.1500192642211915,
|
| 1251 |
+
"rewards/accuracies": 0.9750000238418579,
|
| 1252 |
+
"rewards/chosen": -2.019794464111328,
|
| 1253 |
+
"rewards/margins": 152.16506958007812,
|
| 1254 |
+
"rewards/rejected": -154.18484497070312,
|
| 1255 |
+
"step": 830
|
| 1256 |
+
},
|
| 1257 |
+
{
|
| 1258 |
+
"epoch": 2.228647391159854,
|
| 1259 |
+
"grad_norm": 1.951496702049138e-18,
|
| 1260 |
+
"learning_rate": 1.7888888888888887e-07,
|
| 1261 |
+
"logits/chosen": 6.816000938415527,
|
| 1262 |
+
"logits/rejected": 7.375506401062012,
|
| 1263 |
+
"logps/chosen": -439.57891845703125,
|
| 1264 |
+
"logps/rejected": -1222.27001953125,
|
| 1265 |
+
"loss": 0.3972776889801025,
|
| 1266 |
+
"rewards/accuracies": 0.987500011920929,
|
| 1267 |
+
"rewards/chosen": 1.9101593494415283,
|
| 1268 |
+
"rewards/margins": 158.9185333251953,
|
| 1269 |
+
"rewards/rejected": -157.0083770751953,
|
| 1270 |
+
"step": 840
|
| 1271 |
+
},
|
| 1272 |
+
{
|
| 1273 |
+
"epoch": 2.255234297108674,
|
| 1274 |
+
"grad_norm": 2.449645117964328e-15,
|
| 1275 |
+
"learning_rate": 1.6777777777777778e-07,
|
| 1276 |
+
"logits/chosen": 7.166296482086182,
|
| 1277 |
+
"logits/rejected": 7.5857744216918945,
|
| 1278 |
+
"logps/chosen": -484.2479553222656,
|
| 1279 |
+
"logps/rejected": -1235.645263671875,
|
| 1280 |
+
"loss": 0.15833470821380616,
|
| 1281 |
+
"rewards/accuracies": 0.9750000238418579,
|
| 1282 |
+
"rewards/chosen": -0.6545869708061218,
|
| 1283 |
+
"rewards/margins": 155.0919952392578,
|
| 1284 |
+
"rewards/rejected": -155.74655151367188,
|
| 1285 |
+
"step": 850
|
| 1286 |
+
},
|
| 1287 |
+
{
|
| 1288 |
+
"epoch": 2.2818212030574943,
|
| 1289 |
+
"grad_norm": 67.49964141845703,
|
| 1290 |
+
"learning_rate": 1.5666666666666667e-07,
|
| 1291 |
+
"logits/chosen": 6.9471001625061035,
|
| 1292 |
+
"logits/rejected": 7.408398628234863,
|
| 1293 |
+
"logps/chosen": -406.9446105957031,
|
| 1294 |
+
"logps/rejected": -1206.536376953125,
|
| 1295 |
+
"loss": 0.3223508358001709,
|
| 1296 |
+
"rewards/accuracies": 0.9437500238418579,
|
| 1297 |
+
"rewards/chosen": 4.314828395843506,
|
| 1298 |
+
"rewards/margins": 160.91775512695312,
|
| 1299 |
+
"rewards/rejected": -156.60293579101562,
|
| 1300 |
+
"step": 860
|
| 1301 |
+
},
|
| 1302 |
+
{
|
| 1303 |
+
"epoch": 2.308408109006314,
|
| 1304 |
+
"grad_norm": 3.4588420021464117e-06,
|
| 1305 |
+
"learning_rate": 1.4555555555555555e-07,
|
| 1306 |
+
"logits/chosen": 6.990222930908203,
|
| 1307 |
+
"logits/rejected": 7.685202598571777,
|
| 1308 |
+
"logps/chosen": -426.66973876953125,
|
| 1309 |
+
"logps/rejected": -1176.889404296875,
|
| 1310 |
+
"loss": 0.8611475944519043,
|
| 1311 |
+
"rewards/accuracies": 0.9624999761581421,
|
| 1312 |
+
"rewards/chosen": 1.3860576152801514,
|
| 1313 |
+
"rewards/margins": 144.15267944335938,
|
| 1314 |
+
"rewards/rejected": -142.76663208007812,
|
| 1315 |
+
"step": 870
|
| 1316 |
+
},
|
| 1317 |
+
{
|
| 1318 |
+
"epoch": 2.3349950149551346,
|
| 1319 |
+
"grad_norm": 4.1328581182331625e-12,
|
| 1320 |
+
"learning_rate": 1.3444444444444444e-07,
|
| 1321 |
+
"logits/chosen": 7.256162166595459,
|
| 1322 |
+
"logits/rejected": 7.685450553894043,
|
| 1323 |
+
"logps/chosen": -462.0904846191406,
|
| 1324 |
+
"logps/rejected": -1166.178466796875,
|
| 1325 |
+
"loss": 0.024902737140655516,
|
| 1326 |
+
"rewards/accuracies": 0.987500011920929,
|
| 1327 |
+
"rewards/chosen": 1.212837815284729,
|
| 1328 |
+
"rewards/margins": 140.08041381835938,
|
| 1329 |
+
"rewards/rejected": -138.8675537109375,
|
| 1330 |
+
"step": 880
|
| 1331 |
+
},
|
| 1332 |
+
{
|
| 1333 |
+
"epoch": 2.361581920903955,
|
| 1334 |
+
"grad_norm": 22.725154876708984,
|
| 1335 |
+
"learning_rate": 1.2333333333333333e-07,
|
| 1336 |
+
"logits/chosen": 7.431256294250488,
|
| 1337 |
+
"logits/rejected": 7.865132808685303,
|
| 1338 |
+
"logps/chosen": -456.8827209472656,
|
| 1339 |
+
"logps/rejected": -1153.871337890625,
|
| 1340 |
+
"loss": 0.13207526206970216,
|
| 1341 |
+
"rewards/accuracies": 0.981249988079071,
|
| 1342 |
+
"rewards/chosen": 1.6622031927108765,
|
| 1343 |
+
"rewards/margins": 136.24082946777344,
|
| 1344 |
+
"rewards/rejected": -134.57862854003906,
|
| 1345 |
+
"step": 890
|
| 1346 |
+
},
|
| 1347 |
+
{
|
| 1348 |
+
"epoch": 2.388168826852775,
|
| 1349 |
+
"grad_norm": 132.83956909179688,
|
| 1350 |
+
"learning_rate": 1.1222222222222221e-07,
|
| 1351 |
+
"logits/chosen": 7.010849952697754,
|
| 1352 |
+
"logits/rejected": 7.441749572753906,
|
| 1353 |
+
"logps/chosen": -502.49371337890625,
|
| 1354 |
+
"logps/rejected": -1215.2733154296875,
|
| 1355 |
+
"loss": 0.5922121524810791,
|
| 1356 |
+
"rewards/accuracies": 0.9750000238418579,
|
| 1357 |
+
"rewards/chosen": -2.6824889183044434,
|
| 1358 |
+
"rewards/margins": 148.62466430664062,
|
| 1359 |
+
"rewards/rejected": -151.30715942382812,
|
| 1360 |
+
"step": 900
|
| 1361 |
+
},
|
| 1362 |
+
{
|
| 1363 |
+
"epoch": 2.4147557328015954,
|
| 1364 |
+
"grad_norm": 0.005245895590633154,
|
| 1365 |
+
"learning_rate": 1.011111111111111e-07,
|
| 1366 |
+
"logits/chosen": 6.980523109436035,
|
| 1367 |
+
"logits/rejected": 7.430232048034668,
|
| 1368 |
+
"logps/chosen": -470.41253662109375,
|
| 1369 |
+
"logps/rejected": -1160.4951171875,
|
| 1370 |
+
"loss": 1.195225143432617,
|
| 1371 |
+
"rewards/accuracies": 0.949999988079071,
|
| 1372 |
+
"rewards/chosen": -3.2118802070617676,
|
| 1373 |
+
"rewards/margins": 144.29278564453125,
|
| 1374 |
+
"rewards/rejected": -147.50466918945312,
|
| 1375 |
+
"step": 910
|
| 1376 |
+
},
|
| 1377 |
+
{
|
| 1378 |
+
"epoch": 2.4413426387504154,
|
| 1379 |
+
"grad_norm": 194.52578735351562,
|
| 1380 |
+
"learning_rate": 9e-08,
|
| 1381 |
+
"logits/chosen": 6.884810447692871,
|
| 1382 |
+
"logits/rejected": 7.503731727600098,
|
| 1383 |
+
"logps/chosen": -440.31976318359375,
|
| 1384 |
+
"logps/rejected": -1206.906494140625,
|
| 1385 |
+
"loss": 0.44526066780090334,
|
| 1386 |
+
"rewards/accuracies": 0.956250011920929,
|
| 1387 |
+
"rewards/chosen": 1.156048059463501,
|
| 1388 |
+
"rewards/margins": 149.27732849121094,
|
| 1389 |
+
"rewards/rejected": -148.1212921142578,
|
| 1390 |
+
"step": 920
|
| 1391 |
+
},
|
| 1392 |
+
{
|
| 1393 |
+
"epoch": 2.4679295446992358,
|
| 1394 |
+
"grad_norm": 1.5737574004387467e-14,
|
| 1395 |
+
"learning_rate": 7.888888888888889e-08,
|
| 1396 |
+
"logits/chosen": 7.322862148284912,
|
| 1397 |
+
"logits/rejected": 7.748003959655762,
|
| 1398 |
+
"logps/chosen": -508.33245849609375,
|
| 1399 |
+
"logps/rejected": -1189.603759765625,
|
| 1400 |
+
"loss": 0.18692436218261718,
|
| 1401 |
+
"rewards/accuracies": 0.9750000238418579,
|
| 1402 |
+
"rewards/chosen": 1.148808479309082,
|
| 1403 |
+
"rewards/margins": 146.20956420898438,
|
| 1404 |
+
"rewards/rejected": -145.06076049804688,
|
| 1405 |
+
"step": 930
|
| 1406 |
+
},
|
| 1407 |
+
{
|
| 1408 |
+
"epoch": 2.4945164506480557,
|
| 1409 |
+
"grad_norm": 0.22959347069263458,
|
| 1410 |
+
"learning_rate": 6.777777777777778e-08,
|
| 1411 |
+
"logits/chosen": 7.375940799713135,
|
| 1412 |
+
"logits/rejected": 7.710402011871338,
|
| 1413 |
+
"logps/chosen": -490.12384033203125,
|
| 1414 |
+
"logps/rejected": -1171.1483154296875,
|
| 1415 |
+
"loss": 0.27915282249450685,
|
| 1416 |
+
"rewards/accuracies": 0.9624999761581421,
|
| 1417 |
+
"rewards/chosen": 1.145845651626587,
|
| 1418 |
+
"rewards/margins": 140.29800415039062,
|
| 1419 |
+
"rewards/rejected": -139.1521453857422,
|
| 1420 |
+
"step": 940
|
| 1421 |
+
},
|
| 1422 |
+
{
|
| 1423 |
+
"epoch": 2.521103356596876,
|
| 1424 |
+
"grad_norm": 22.964818954467773,
|
| 1425 |
+
"learning_rate": 5.666666666666666e-08,
|
| 1426 |
+
"logits/chosen": 7.258917331695557,
|
| 1427 |
+
"logits/rejected": 7.766401767730713,
|
| 1428 |
+
"logps/chosen": -467.205322265625,
|
| 1429 |
+
"logps/rejected": -1157.4315185546875,
|
| 1430 |
+
"loss": 1.6006925582885743,
|
| 1431 |
+
"rewards/accuracies": 0.9437500238418579,
|
| 1432 |
+
"rewards/chosen": 0.41268739104270935,
|
| 1433 |
+
"rewards/margins": 135.27273559570312,
|
| 1434 |
+
"rewards/rejected": -134.86004638671875,
|
| 1435 |
+
"step": 950
|
| 1436 |
+
},
|
| 1437 |
+
{
|
| 1438 |
+
"epoch": 2.547690262545696,
|
| 1439 |
+
"grad_norm": 3.8648969441501535e-11,
|
| 1440 |
+
"learning_rate": 4.555555555555556e-08,
|
| 1441 |
+
"logits/chosen": 7.018073081970215,
|
| 1442 |
+
"logits/rejected": 7.558196067810059,
|
| 1443 |
+
"logps/chosen": -449.532958984375,
|
| 1444 |
+
"logps/rejected": -1138.4356689453125,
|
| 1445 |
+
"loss": 0.28522279262542727,
|
| 1446 |
+
"rewards/accuracies": 0.981249988079071,
|
| 1447 |
+
"rewards/chosen": -0.8609614372253418,
|
| 1448 |
+
"rewards/margins": 139.2249298095703,
|
| 1449 |
+
"rewards/rejected": -140.0858917236328,
|
| 1450 |
+
"step": 960
|
| 1451 |
+
},
|
| 1452 |
+
{
|
| 1453 |
+
"epoch": 2.5742771684945165,
|
| 1454 |
+
"grad_norm": 84.71375274658203,
|
| 1455 |
+
"learning_rate": 3.4444444444444444e-08,
|
| 1456 |
+
"logits/chosen": 7.130776405334473,
|
| 1457 |
+
"logits/rejected": 7.609295845031738,
|
| 1458 |
+
"logps/chosen": -438.42694091796875,
|
| 1459 |
+
"logps/rejected": -1249.6336669921875,
|
| 1460 |
+
"loss": 0.4750792980194092,
|
| 1461 |
+
"rewards/accuracies": 0.96875,
|
| 1462 |
+
"rewards/chosen": -1.273829460144043,
|
| 1463 |
+
"rewards/margins": 160.90731811523438,
|
| 1464 |
+
"rewards/rejected": -162.18113708496094,
|
| 1465 |
+
"step": 970
|
| 1466 |
+
},
|
| 1467 |
+
{
|
| 1468 |
+
"epoch": 2.6008640744433364,
|
| 1469 |
+
"grad_norm": 85.9113540649414,
|
| 1470 |
+
"learning_rate": 2.3333333333333334e-08,
|
| 1471 |
+
"logits/chosen": 7.113263130187988,
|
| 1472 |
+
"logits/rejected": 7.686596870422363,
|
| 1473 |
+
"logps/chosen": -434.325439453125,
|
| 1474 |
+
"logps/rejected": -1194.6849365234375,
|
| 1475 |
+
"loss": 0.33106160163879395,
|
| 1476 |
+
"rewards/accuracies": 0.9437500238418579,
|
| 1477 |
+
"rewards/chosen": -1.2038366794586182,
|
| 1478 |
+
"rewards/margins": 146.78440856933594,
|
| 1479 |
+
"rewards/rejected": -147.98825073242188,
|
| 1480 |
+
"step": 980
|
| 1481 |
+
},
|
| 1482 |
+
{
|
| 1483 |
+
"epoch": 2.627450980392157,
|
| 1484 |
+
"grad_norm": 2.4605165866986043e-20,
|
| 1485 |
+
"learning_rate": 1.2222222222222222e-08,
|
| 1486 |
+
"logits/chosen": 7.00030517578125,
|
| 1487 |
+
"logits/rejected": 7.477368354797363,
|
| 1488 |
+
"logps/chosen": -450.455078125,
|
| 1489 |
+
"logps/rejected": -1269.2520751953125,
|
| 1490 |
+
"loss": 0.2776132583618164,
|
| 1491 |
+
"rewards/accuracies": 0.981249988079071,
|
| 1492 |
+
"rewards/chosen": -3.280397891998291,
|
| 1493 |
+
"rewards/margins": 163.57626342773438,
|
| 1494 |
+
"rewards/rejected": -166.85665893554688,
|
| 1495 |
+
"step": 990
|
| 1496 |
+
},
|
| 1497 |
+
{
|
| 1498 |
+
"epoch": 2.6540378863409773,
|
| 1499 |
+
"grad_norm": 80.78559112548828,
|
| 1500 |
+
"learning_rate": 1.111111111111111e-09,
|
| 1501 |
+
"logits/chosen": 7.087013244628906,
|
| 1502 |
+
"logits/rejected": 7.507058620452881,
|
| 1503 |
+
"logps/chosen": -490.28857421875,
|
| 1504 |
+
"logps/rejected": -1223.248046875,
|
| 1505 |
+
"loss": 0.2815593719482422,
|
| 1506 |
+
"rewards/accuracies": 0.9750000238418579,
|
| 1507 |
+
"rewards/chosen": -2.167093276977539,
|
| 1508 |
+
"rewards/margins": 151.61813354492188,
|
| 1509 |
+
"rewards/rejected": -153.78524780273438,
|
| 1510 |
+
"step": 1000
|
| 1511 |
}
|
| 1512 |
],
|
| 1513 |
"logging_steps": 10,
|
|
|
|
| 1522 |
"should_evaluate": false,
|
| 1523 |
"should_log": false,
|
| 1524 |
"should_save": true,
|
| 1525 |
+
"should_training_stop": true
|
| 1526 |
},
|
| 1527 |
"attributes": {}
|
| 1528 |
}
|