Training in progress, epoch 11, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step805/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step805/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step805/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step805/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step805/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +335 -3
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 892897944
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e437eaee19bbd15e83d4a939cae73699cf7c04de79bc02cf69d3835fe0906e8f
|
| 3 |
size 892897944
|
last-checkpoint/global_step805/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0fe24ef50b1c15624fd2003a91f04fa978ea6c7fc7dae2feffb8609e4e5351bd
|
| 3 |
+
size 673148517
|
last-checkpoint/global_step805/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b3413c88245fa91d0398f4c5c91539ff255d7617af0bfd0b1ea4941e609b04f
|
| 3 |
+
size 673148581
|
last-checkpoint/global_step805/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b2c9af2053cea06a16992278b7c3a81170506faeafec1ee9c0d2f18e0e694eb
|
| 3 |
+
size 673148581
|
last-checkpoint/global_step805/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aac879faa1bf1eb50521a4fd4ad39dd6de95d6fde6bfcb6e12f4a1db23c80d78
|
| 3 |
+
size 673148581
|
last-checkpoint/global_step805/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0dc19002c5db0266879cb308fc9e0413a78a9370073a901953e11c9950ad380c
|
| 3 |
+
size 893076569
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step805
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ff16acfda6bbc5cf50b99aa265031ad0b4a77a4ea996fe62e798c2b245c7d95
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a794a90df9c5b0631ebe2e7987dab57982f6a583f0010d55836d1074ee1a40d
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd6668393a5bc1b0de5d288b1691ef425f75f4c9702525195c76ff72f801ebb1
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f24d02dcb6989a54f9b8b9339108aafa5df8c99759987bdb33d7a9ef54169400
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:311eb9812176f51b258a368822f71b70f80b61465aa3d1733f9b098718c1a5ae
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2332,6 +2332,338 @@
|
|
| 2332 |
"eval_samples_per_second": 25.568,
|
| 2333 |
"eval_steps_per_second": 0.862,
|
| 2334 |
"step": 700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2335 |
}
|
| 2336 |
],
|
| 2337 |
"logging_steps": 5,
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 0.03328302875161171,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 11.587813620071685,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 800,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2332 |
"eval_samples_per_second": 25.568,
|
| 2333 |
"eval_steps_per_second": 0.862,
|
| 2334 |
"step": 700
|
| 2335 |
+
},
|
| 2336 |
+
{
|
| 2337 |
+
"epoch": 10.21505376344086,
|
| 2338 |
+
"grad_norm": 3.6100528240203857,
|
| 2339 |
+
"learning_rate": 6.638172079355048e-05,
|
| 2340 |
+
"logits/chosen": -1.1095702648162842,
|
| 2341 |
+
"logits/rejected": -1.202734351158142,
|
| 2342 |
+
"logps/chosen": -385.0,
|
| 2343 |
+
"logps/rejected": -644.2999877929688,
|
| 2344 |
+
"loss": 0.029,
|
| 2345 |
+
"rewards/accuracies": 0.984375,
|
| 2346 |
+
"rewards/chosen": -1.306646704673767,
|
| 2347 |
+
"rewards/margins": 14.65625,
|
| 2348 |
+
"rewards/rejected": -15.978124618530273,
|
| 2349 |
+
"step": 705
|
| 2350 |
+
},
|
| 2351 |
+
{
|
| 2352 |
+
"epoch": 10.28673835125448,
|
| 2353 |
+
"grad_norm": 0.639515221118927,
|
| 2354 |
+
"learning_rate": 6.637155458698307e-05,
|
| 2355 |
+
"logits/chosen": -1.1066405773162842,
|
| 2356 |
+
"logits/rejected": -1.2199218273162842,
|
| 2357 |
+
"logps/chosen": -375.1000061035156,
|
| 2358 |
+
"logps/rejected": -635.5999755859375,
|
| 2359 |
+
"loss": 0.0257,
|
| 2360 |
+
"rewards/accuracies": 0.981249988079071,
|
| 2361 |
+
"rewards/chosen": -1.794921875,
|
| 2362 |
+
"rewards/margins": 14.431249618530273,
|
| 2363 |
+
"rewards/rejected": -16.215625762939453,
|
| 2364 |
+
"step": 710
|
| 2365 |
+
},
|
| 2366 |
+
{
|
| 2367 |
+
"epoch": 10.3584229390681,
|
| 2368 |
+
"grad_norm": 1.197538137435913,
|
| 2369 |
+
"learning_rate": 6.63613056201972e-05,
|
| 2370 |
+
"logits/chosen": -1.065039038658142,
|
| 2371 |
+
"logits/rejected": -1.20703125,
|
| 2372 |
+
"logps/chosen": -403.79998779296875,
|
| 2373 |
+
"logps/rejected": -669.5999755859375,
|
| 2374 |
+
"loss": 0.0463,
|
| 2375 |
+
"rewards/accuracies": 0.9671875238418579,
|
| 2376 |
+
"rewards/chosen": -1.352148413658142,
|
| 2377 |
+
"rewards/margins": 16.912500381469727,
|
| 2378 |
+
"rewards/rejected": -18.253124237060547,
|
| 2379 |
+
"step": 715
|
| 2380 |
+
},
|
| 2381 |
+
{
|
| 2382 |
+
"epoch": 10.43010752688172,
|
| 2383 |
+
"grad_norm": 2.171043872833252,
|
| 2384 |
+
"learning_rate": 6.635097391899463e-05,
|
| 2385 |
+
"logits/chosen": -1.103515625,
|
| 2386 |
+
"logits/rejected": -1.211328148841858,
|
| 2387 |
+
"logps/chosen": -386.6000061035156,
|
| 2388 |
+
"logps/rejected": -628.5,
|
| 2389 |
+
"loss": 0.0346,
|
| 2390 |
+
"rewards/accuracies": 0.981249988079071,
|
| 2391 |
+
"rewards/chosen": -1.0,
|
| 2392 |
+
"rewards/margins": 16.143749237060547,
|
| 2393 |
+
"rewards/rejected": -17.115625381469727,
|
| 2394 |
+
"step": 720
|
| 2395 |
+
},
|
| 2396 |
+
{
|
| 2397 |
+
"epoch": 10.501792114695341,
|
| 2398 |
+
"grad_norm": 2.131438732147217,
|
| 2399 |
+
"learning_rate": 6.63405595093854e-05,
|
| 2400 |
+
"logits/chosen": -1.1662108898162842,
|
| 2401 |
+
"logits/rejected": -1.2726562023162842,
|
| 2402 |
+
"logps/chosen": -396.3999938964844,
|
| 2403 |
+
"logps/rejected": -647.2000122070312,
|
| 2404 |
+
"loss": 0.0233,
|
| 2405 |
+
"rewards/accuracies": 0.989062488079071,
|
| 2406 |
+
"rewards/chosen": -1.1624664068222046,
|
| 2407 |
+
"rewards/margins": 16.649999618530273,
|
| 2408 |
+
"rewards/rejected": -17.818750381469727,
|
| 2409 |
+
"step": 725
|
| 2410 |
+
},
|
| 2411 |
+
{
|
| 2412 |
+
"epoch": 10.57347670250896,
|
| 2413 |
+
"grad_norm": 1.5541068315505981,
|
| 2414 |
+
"learning_rate": 6.633006241758778e-05,
|
| 2415 |
+
"logits/chosen": -1.228124976158142,
|
| 2416 |
+
"logits/rejected": -1.3742187023162842,
|
| 2417 |
+
"logps/chosen": -400.70001220703125,
|
| 2418 |
+
"logps/rejected": -670.2999877929688,
|
| 2419 |
+
"loss": 0.0487,
|
| 2420 |
+
"rewards/accuracies": 0.965624988079071,
|
| 2421 |
+
"rewards/chosen": -2.663867235183716,
|
| 2422 |
+
"rewards/margins": 18.240625381469727,
|
| 2423 |
+
"rewards/rejected": -20.918750762939453,
|
| 2424 |
+
"step": 730
|
| 2425 |
+
},
|
| 2426 |
+
{
|
| 2427 |
+
"epoch": 10.64516129032258,
|
| 2428 |
+
"grad_norm": 4.768660545349121,
|
| 2429 |
+
"learning_rate": 6.631948267002819e-05,
|
| 2430 |
+
"logits/chosen": -1.2703125476837158,
|
| 2431 |
+
"logits/rejected": -1.368749976158142,
|
| 2432 |
+
"logps/chosen": -423.8999938964844,
|
| 2433 |
+
"logps/rejected": -721.2000122070312,
|
| 2434 |
+
"loss": 0.0192,
|
| 2435 |
+
"rewards/accuracies": 0.9906250238418579,
|
| 2436 |
+
"rewards/chosen": -3.223828077316284,
|
| 2437 |
+
"rewards/margins": 20.174999237060547,
|
| 2438 |
+
"rewards/rejected": -23.399999618530273,
|
| 2439 |
+
"step": 735
|
| 2440 |
+
},
|
| 2441 |
+
{
|
| 2442 |
+
"epoch": 10.716845878136201,
|
| 2443 |
+
"grad_norm": 1.4146541357040405,
|
| 2444 |
+
"learning_rate": 6.630882029334112e-05,
|
| 2445 |
+
"logits/chosen": -1.2531249523162842,
|
| 2446 |
+
"logits/rejected": -1.345312476158142,
|
| 2447 |
+
"logps/chosen": -412.0,
|
| 2448 |
+
"logps/rejected": -667.5,
|
| 2449 |
+
"loss": 0.0345,
|
| 2450 |
+
"rewards/accuracies": 0.973437488079071,
|
| 2451 |
+
"rewards/chosen": -2.1131834983825684,
|
| 2452 |
+
"rewards/margins": 17.715625762939453,
|
| 2453 |
+
"rewards/rejected": -19.828125,
|
| 2454 |
+
"step": 740
|
| 2455 |
+
},
|
| 2456 |
+
{
|
| 2457 |
+
"epoch": 10.78853046594982,
|
| 2458 |
+
"grad_norm": 10.958137512207031,
|
| 2459 |
+
"learning_rate": 6.62980753143691e-05,
|
| 2460 |
+
"logits/chosen": -1.258203148841858,
|
| 2461 |
+
"logits/rejected": -1.382421851158142,
|
| 2462 |
+
"logps/chosen": -396.1000061035156,
|
| 2463 |
+
"logps/rejected": -648.0999755859375,
|
| 2464 |
+
"loss": 0.5085,
|
| 2465 |
+
"rewards/accuracies": 0.9859374761581421,
|
| 2466 |
+
"rewards/chosen": -2.490673780441284,
|
| 2467 |
+
"rewards/margins": 16.428125381469727,
|
| 2468 |
+
"rewards/rejected": -18.918750762939453,
|
| 2469 |
+
"step": 745
|
| 2470 |
+
},
|
| 2471 |
+
{
|
| 2472 |
+
"epoch": 10.86021505376344,
|
| 2473 |
+
"grad_norm": 0.7889600396156311,
|
| 2474 |
+
"learning_rate": 6.62872477601626e-05,
|
| 2475 |
+
"logits/chosen": -1.3039062023162842,
|
| 2476 |
+
"logits/rejected": -1.4226562976837158,
|
| 2477 |
+
"logps/chosen": -410.3999938964844,
|
| 2478 |
+
"logps/rejected": -664.2000122070312,
|
| 2479 |
+
"loss": 0.2331,
|
| 2480 |
+
"rewards/accuracies": 0.987500011920929,
|
| 2481 |
+
"rewards/chosen": -2.001757860183716,
|
| 2482 |
+
"rewards/margins": 18.571874618530273,
|
| 2483 |
+
"rewards/rejected": -20.575000762939453,
|
| 2484 |
+
"step": 750
|
| 2485 |
+
},
|
| 2486 |
+
{
|
| 2487 |
+
"epoch": 10.86021505376344,
|
| 2488 |
+
"eval_logits/chosen": -1.3402777910232544,
|
| 2489 |
+
"eval_logits/rejected": -1.4592013359069824,
|
| 2490 |
+
"eval_logps/chosen": -408.4444580078125,
|
| 2491 |
+
"eval_logps/rejected": -681.3333129882812,
|
| 2492 |
+
"eval_loss": 0.04453478381037712,
|
| 2493 |
+
"eval_rewards/accuracies": 0.9725378751754761,
|
| 2494 |
+
"eval_rewards/chosen": -2.062066078186035,
|
| 2495 |
+
"eval_rewards/margins": 18.46527862548828,
|
| 2496 |
+
"eval_rewards/rejected": -20.52083396911621,
|
| 2497 |
+
"eval_runtime": 10.2105,
|
| 2498 |
+
"eval_samples_per_second": 26.149,
|
| 2499 |
+
"eval_steps_per_second": 0.881,
|
| 2500 |
+
"step": 750
|
| 2501 |
+
},
|
| 2502 |
+
{
|
| 2503 |
+
"epoch": 10.931899641577061,
|
| 2504 |
+
"grad_norm": 1.6667836904525757,
|
| 2505 |
+
"learning_rate": 6.627633765797999e-05,
|
| 2506 |
+
"logits/chosen": -1.3240234851837158,
|
| 2507 |
+
"logits/rejected": -1.429296851158142,
|
| 2508 |
+
"logps/chosen": -408.3999938964844,
|
| 2509 |
+
"logps/rejected": -681.4000244140625,
|
| 2510 |
+
"loss": 0.1956,
|
| 2511 |
+
"rewards/accuracies": 0.965624988079071,
|
| 2512 |
+
"rewards/chosen": -2.8973631858825684,
|
| 2513 |
+
"rewards/margins": 18.996875762939453,
|
| 2514 |
+
"rewards/rejected": -21.881250381469727,
|
| 2515 |
+
"step": 755
|
| 2516 |
+
},
|
| 2517 |
+
{
|
| 2518 |
+
"epoch": 11.014336917562725,
|
| 2519 |
+
"grad_norm": 2.1559181213378906,
|
| 2520 |
+
"learning_rate": 6.62653450352874e-05,
|
| 2521 |
+
"logits/chosen": -1.3860085010528564,
|
| 2522 |
+
"logits/rejected": -1.4886363744735718,
|
| 2523 |
+
"logps/chosen": -414.18182373046875,
|
| 2524 |
+
"logps/rejected": -683.6363525390625,
|
| 2525 |
+
"loss": 0.0331,
|
| 2526 |
+
"rewards/accuracies": 0.984375,
|
| 2527 |
+
"rewards/chosen": -3.476029872894287,
|
| 2528 |
+
"rewards/margins": 19.136363983154297,
|
| 2529 |
+
"rewards/rejected": -22.625,
|
| 2530 |
+
"step": 760
|
| 2531 |
+
},
|
| 2532 |
+
{
|
| 2533 |
+
"epoch": 11.086021505376344,
|
| 2534 |
+
"grad_norm": 0.5421841740608215,
|
| 2535 |
+
"learning_rate": 6.625426991975878e-05,
|
| 2536 |
+
"logits/chosen": -1.2683594226837158,
|
| 2537 |
+
"logits/rejected": -1.3828125,
|
| 2538 |
+
"logps/chosen": -449.20001220703125,
|
| 2539 |
+
"logps/rejected": -716.2000122070312,
|
| 2540 |
+
"loss": 0.0245,
|
| 2541 |
+
"rewards/accuracies": 0.981249988079071,
|
| 2542 |
+
"rewards/chosen": -3.512500047683716,
|
| 2543 |
+
"rewards/margins": 18.740625381469727,
|
| 2544 |
+
"rewards/rejected": -22.262500762939453,
|
| 2545 |
+
"step": 765
|
| 2546 |
+
},
|
| 2547 |
+
{
|
| 2548 |
+
"epoch": 11.157706093189963,
|
| 2549 |
+
"grad_norm": 0.014763603918254375,
|
| 2550 |
+
"learning_rate": 6.624311233927571e-05,
|
| 2551 |
+
"logits/chosen": -1.2734375,
|
| 2552 |
+
"logits/rejected": -1.381250023841858,
|
| 2553 |
+
"logps/chosen": -408.20001220703125,
|
| 2554 |
+
"logps/rejected": -683.7999877929688,
|
| 2555 |
+
"loss": 0.0215,
|
| 2556 |
+
"rewards/accuracies": 0.979687511920929,
|
| 2557 |
+
"rewards/chosen": -3.837890625,
|
| 2558 |
+
"rewards/margins": 17.071874618530273,
|
| 2559 |
+
"rewards/rejected": -20.912500381469727,
|
| 2560 |
+
"step": 770
|
| 2561 |
+
},
|
| 2562 |
+
{
|
| 2563 |
+
"epoch": 11.229390681003585,
|
| 2564 |
+
"grad_norm": 2.519441843032837,
|
| 2565 |
+
"learning_rate": 6.623187232192738e-05,
|
| 2566 |
+
"logits/chosen": -1.279687523841858,
|
| 2567 |
+
"logits/rejected": -1.396875023841858,
|
| 2568 |
+
"logps/chosen": -404.1000061035156,
|
| 2569 |
+
"logps/rejected": -662.2999877929688,
|
| 2570 |
+
"loss": 0.0389,
|
| 2571 |
+
"rewards/accuracies": 0.965624988079071,
|
| 2572 |
+
"rewards/chosen": -4.133593559265137,
|
| 2573 |
+
"rewards/margins": 17.243749618530273,
|
| 2574 |
+
"rewards/rejected": -21.375,
|
| 2575 |
+
"step": 775
|
| 2576 |
+
},
|
| 2577 |
+
{
|
| 2578 |
+
"epoch": 11.301075268817204,
|
| 2579 |
+
"grad_norm": 1.973528504371643,
|
| 2580 |
+
"learning_rate": 6.622054989601051e-05,
|
| 2581 |
+
"logits/chosen": -1.212890625,
|
| 2582 |
+
"logits/rejected": -1.360742211341858,
|
| 2583 |
+
"logps/chosen": -410.8999938964844,
|
| 2584 |
+
"logps/rejected": -691.4000244140625,
|
| 2585 |
+
"loss": 0.0222,
|
| 2586 |
+
"rewards/accuracies": 0.979687511920929,
|
| 2587 |
+
"rewards/chosen": -4.862500190734863,
|
| 2588 |
+
"rewards/margins": 16.665624618530273,
|
| 2589 |
+
"rewards/rejected": -21.518749237060547,
|
| 2590 |
+
"step": 780
|
| 2591 |
+
},
|
| 2592 |
+
{
|
| 2593 |
+
"epoch": 11.372759856630825,
|
| 2594 |
+
"grad_norm": 0.8458139896392822,
|
| 2595 |
+
"learning_rate": 6.620914509002932e-05,
|
| 2596 |
+
"logits/chosen": -1.215234398841858,
|
| 2597 |
+
"logits/rejected": -1.349609375,
|
| 2598 |
+
"logps/chosen": -403.29998779296875,
|
| 2599 |
+
"logps/rejected": -650.2999877929688,
|
| 2600 |
+
"loss": 0.0473,
|
| 2601 |
+
"rewards/accuracies": 0.973437488079071,
|
| 2602 |
+
"rewards/chosen": -3.1806640625,
|
| 2603 |
+
"rewards/margins": 16.887500762939453,
|
| 2604 |
+
"rewards/rejected": -20.071874618530273,
|
| 2605 |
+
"step": 785
|
| 2606 |
+
},
|
| 2607 |
+
{
|
| 2608 |
+
"epoch": 11.444444444444445,
|
| 2609 |
+
"grad_norm": 0.09396322816610336,
|
| 2610 |
+
"learning_rate": 6.619765793269539e-05,
|
| 2611 |
+
"logits/chosen": -1.1103515625,
|
| 2612 |
+
"logits/rejected": -1.233984351158142,
|
| 2613 |
+
"logps/chosen": -384.79998779296875,
|
| 2614 |
+
"logps/rejected": -670.5999755859375,
|
| 2615 |
+
"loss": 0.0192,
|
| 2616 |
+
"rewards/accuracies": 0.9859374761581421,
|
| 2617 |
+
"rewards/chosen": -2.9014649391174316,
|
| 2618 |
+
"rewards/margins": 16.671875,
|
| 2619 |
+
"rewards/rejected": -19.559375762939453,
|
| 2620 |
+
"step": 790
|
| 2621 |
+
},
|
| 2622 |
+
{
|
| 2623 |
+
"epoch": 11.516129032258064,
|
| 2624 |
+
"grad_norm": 0.5156263709068298,
|
| 2625 |
+
"learning_rate": 6.618608845292762e-05,
|
| 2626 |
+
"logits/chosen": -1.0205078125,
|
| 2627 |
+
"logits/rejected": -1.146875023841858,
|
| 2628 |
+
"logps/chosen": -389.8999938964844,
|
| 2629 |
+
"logps/rejected": -652.2000122070312,
|
| 2630 |
+
"loss": 0.0317,
|
| 2631 |
+
"rewards/accuracies": 0.9828125238418579,
|
| 2632 |
+
"rewards/chosen": -1.9188964366912842,
|
| 2633 |
+
"rewards/margins": 16.628124237060547,
|
| 2634 |
+
"rewards/rejected": -18.540624618530273,
|
| 2635 |
+
"step": 795
|
| 2636 |
+
},
|
| 2637 |
+
{
|
| 2638 |
+
"epoch": 11.587813620071685,
|
| 2639 |
+
"grad_norm": 2.2448863983154297,
|
| 2640 |
+
"learning_rate": 6.617443667985216e-05,
|
| 2641 |
+
"logits/chosen": -1.002539038658142,
|
| 2642 |
+
"logits/rejected": -1.141210913658142,
|
| 2643 |
+
"logps/chosen": -406.0,
|
| 2644 |
+
"logps/rejected": -656.4000244140625,
|
| 2645 |
+
"loss": 0.0225,
|
| 2646 |
+
"rewards/accuracies": 0.989062488079071,
|
| 2647 |
+
"rewards/chosen": -1.7773040533065796,
|
| 2648 |
+
"rewards/margins": 16.728124618530273,
|
| 2649 |
+
"rewards/rejected": -18.512500762939453,
|
| 2650 |
+
"step": 800
|
| 2651 |
+
},
|
| 2652 |
+
{
|
| 2653 |
+
"epoch": 11.587813620071685,
|
| 2654 |
+
"eval_logits/chosen": -1.0394965410232544,
|
| 2655 |
+
"eval_logits/rejected": -1.1527777910232544,
|
| 2656 |
+
"eval_logps/chosen": -404.0,
|
| 2657 |
+
"eval_logps/rejected": -672.888916015625,
|
| 2658 |
+
"eval_loss": 0.03328302875161171,
|
| 2659 |
+
"eval_rewards/accuracies": 0.9760100841522217,
|
| 2660 |
+
"eval_rewards/chosen": -1.6126302480697632,
|
| 2661 |
+
"eval_rewards/margins": 18.16666603088379,
|
| 2662 |
+
"eval_rewards/rejected": -19.8125,
|
| 2663 |
+
"eval_runtime": 10.2022,
|
| 2664 |
+
"eval_samples_per_second": 26.171,
|
| 2665 |
+
"eval_steps_per_second": 0.882,
|
| 2666 |
+
"step": 800
|
| 2667 |
}
|
| 2668 |
],
|
| 2669 |
"logging_steps": 5,
|