Llama-3.1-8B-Instruct_resist_40 / trainer_state.json
Incomple's picture
End of training
a68f310 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9999067251189254,
"eval_steps": 500,
"global_step": 2680,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04999533625594627,
"grad_norm": 5.705146789550781,
"learning_rate": 5e-07,
"logits/chosen": -0.5462052822113037,
"logits/rejected": -0.46075063943862915,
"logps/chosen": -69.6404800415039,
"logps/rejected": -10.503483772277832,
"loss": 0.69,
"rewards/accuracies": 0.5541044473648071,
"rewards/chosen": 0.005677139386534691,
"rewards/margins": 0.006748478394001722,
"rewards/rejected": -0.0010713385418057442,
"step": 134
},
{
"epoch": 0.09999067251189254,
"grad_norm": 4.263132572174072,
"learning_rate": 1e-06,
"logits/chosen": -0.5453211665153503,
"logits/rejected": -0.4581734836101532,
"logps/chosen": -71.13265228271484,
"logps/rejected": -11.624285697937012,
"loss": 0.6205,
"rewards/accuracies": 0.9160447716712952,
"rewards/chosen": 0.14199481904506683,
"rewards/margins": 0.16203062236309052,
"rewards/rejected": -0.020035814493894577,
"step": 268
},
{
"epoch": 0.14998600876783882,
"grad_norm": 1.5373331308364868,
"learning_rate": 9.444444444444444e-07,
"logits/chosen": -0.545798122882843,
"logits/rejected": -0.4811278283596039,
"logps/chosen": -61.10158920288086,
"logps/rejected": -15.043923377990723,
"loss": 0.3159,
"rewards/accuracies": 0.9869402647018433,
"rewards/chosen": 0.9139772653579712,
"rewards/margins": 1.3145134449005127,
"rewards/rejected": -0.4005362391471863,
"step": 402
},
{
"epoch": 0.1999813450237851,
"grad_norm": 0.5485444068908691,
"learning_rate": 8.888888888888888e-07,
"logits/chosen": -0.5420589447021484,
"logits/rejected": -0.49538397789001465,
"logps/chosen": -52.57987976074219,
"logps/rejected": -23.705102920532227,
"loss": 0.1156,
"rewards/accuracies": 0.9906716346740723,
"rewards/chosen": 1.417620301246643,
"rewards/margins": 2.7980282306671143,
"rewards/rejected": -1.3804079294204712,
"step": 536
},
{
"epoch": 0.24997668127973136,
"grad_norm": 0.19278554618358612,
"learning_rate": 8.333333333333333e-07,
"logits/chosen": -0.5059043765068054,
"logits/rejected": -0.4366276264190674,
"logps/chosen": -55.41277313232422,
"logps/rejected": -34.412296295166016,
"loss": 0.05,
"rewards/accuracies": 0.9850745797157288,
"rewards/chosen": 1.6597167253494263,
"rewards/margins": 4.129410266876221,
"rewards/rejected": -2.469693660736084,
"step": 670
},
{
"epoch": 0.29997201753567765,
"grad_norm": 0.5303434729576111,
"learning_rate": 7.777777777777778e-07,
"logits/chosen": -0.4820927679538727,
"logits/rejected": -0.3845590353012085,
"logps/chosen": -52.056922912597656,
"logps/rejected": -41.084842681884766,
"loss": 0.0391,
"rewards/accuracies": 0.9860074520111084,
"rewards/chosen": 1.6465966701507568,
"rewards/margins": 4.822764873504639,
"rewards/rejected": -3.1761679649353027,
"step": 804
},
{
"epoch": 0.3499673537916239,
"grad_norm": 0.06953659653663635,
"learning_rate": 7.222222222222221e-07,
"logits/chosen": -0.4638223350048065,
"logits/rejected": -0.3506713807582855,
"logps/chosen": -52.924964904785156,
"logps/rejected": -46.31932830810547,
"loss": 0.0416,
"rewards/accuracies": 0.9850745797157288,
"rewards/chosen": 1.6556929349899292,
"rewards/margins": 5.256211757659912,
"rewards/rejected": -3.6005189418792725,
"step": 938
},
{
"epoch": 0.3999626900475702,
"grad_norm": 0.13936500251293182,
"learning_rate": 6.666666666666666e-07,
"logits/chosen": -0.4642048180103302,
"logits/rejected": -0.3347207307815552,
"logps/chosen": -51.326290130615234,
"logps/rejected": -47.818946838378906,
"loss": 0.03,
"rewards/accuracies": 0.9860074520111084,
"rewards/chosen": 1.7104003429412842,
"rewards/margins": 5.57481575012207,
"rewards/rejected": -3.864415407180786,
"step": 1072
},
{
"epoch": 0.44995802630351645,
"grad_norm": 0.10513754934072495,
"learning_rate": 6.111111111111112e-07,
"logits/chosen": -0.4259939193725586,
"logits/rejected": -0.29308220744132996,
"logps/chosen": -52.058380126953125,
"logps/rejected": -50.53205871582031,
"loss": 0.0353,
"rewards/accuracies": 0.9813432693481445,
"rewards/chosen": 1.7411428689956665,
"rewards/margins": 5.778336048126221,
"rewards/rejected": -4.037193775177002,
"step": 1206
},
{
"epoch": 0.4999533625594627,
"grad_norm": 15.070347785949707,
"learning_rate": 5.555555555555555e-07,
"logits/chosen": -0.4217334985733032,
"logits/rejected": -0.2778339684009552,
"logps/chosen": -49.258907318115234,
"logps/rejected": -52.084754943847656,
"loss": 0.0386,
"rewards/accuracies": 0.9785447716712952,
"rewards/chosen": 1.7135344743728638,
"rewards/margins": 5.933449745178223,
"rewards/rejected": -4.21991491317749,
"step": 1340
},
{
"epoch": 0.549948698815409,
"grad_norm": 0.07054832577705383,
"learning_rate": 5e-07,
"logits/chosen": -0.41517916321754456,
"logits/rejected": -0.25792089104652405,
"logps/chosen": -51.234615325927734,
"logps/rejected": -53.174564361572266,
"loss": 0.0263,
"rewards/accuracies": 0.9850745797157288,
"rewards/chosen": 1.7893245220184326,
"rewards/margins": 6.14929723739624,
"rewards/rejected": -4.3599724769592285,
"step": 1474
},
{
"epoch": 0.5999440350713553,
"grad_norm": 1.686726689338684,
"learning_rate": 4.444444444444444e-07,
"logits/chosen": -0.3858945369720459,
"logits/rejected": -0.22172169387340546,
"logps/chosen": -51.00803756713867,
"logps/rejected": -54.77092361450195,
"loss": 0.0446,
"rewards/accuracies": 0.9776118993759155,
"rewards/chosen": 1.7425472736358643,
"rewards/margins": 6.227110385894775,
"rewards/rejected": -4.48456335067749,
"step": 1608
},
{
"epoch": 0.6499393713273015,
"grad_norm": 6.180748462677002,
"learning_rate": 3.888888888888889e-07,
"logits/chosen": -0.3795308768749237,
"logits/rejected": -0.213688462972641,
"logps/chosen": -50.311119079589844,
"logps/rejected": -57.47030258178711,
"loss": 0.0178,
"rewards/accuracies": 0.9944029450416565,
"rewards/chosen": 1.7370685338974,
"rewards/margins": 6.562605381011963,
"rewards/rejected": -4.825536727905273,
"step": 1742
},
{
"epoch": 0.6999347075832478,
"grad_norm": 1.3855689764022827,
"learning_rate": 3.333333333333333e-07,
"logits/chosen": -0.37974509596824646,
"logits/rejected": -0.1997498720884323,
"logps/chosen": -54.77206802368164,
"logps/rejected": -58.50139617919922,
"loss": 0.0329,
"rewards/accuracies": 0.983208954334259,
"rewards/chosen": 1.8076502084732056,
"rewards/margins": 6.584301948547363,
"rewards/rejected": -4.776651859283447,
"step": 1876
},
{
"epoch": 0.7499300438391941,
"grad_norm": 3.921687364578247,
"learning_rate": 2.7777777777777776e-07,
"logits/chosen": -0.37658217549324036,
"logits/rejected": -0.20967237651348114,
"logps/chosen": -52.74870681762695,
"logps/rejected": -60.60834503173828,
"loss": 0.0284,
"rewards/accuracies": 0.9878731369972229,
"rewards/chosen": 1.7630212306976318,
"rewards/margins": 6.771360397338867,
"rewards/rejected": -5.008338928222656,
"step": 2010
},
{
"epoch": 0.7999253800951404,
"grad_norm": 0.07866105437278748,
"learning_rate": 2.222222222222222e-07,
"logits/chosen": -0.3648207485675812,
"logits/rejected": -0.1908709853887558,
"logps/chosen": -52.76331329345703,
"logps/rejected": -61.42716598510742,
"loss": 0.0236,
"rewards/accuracies": 0.9897387623786926,
"rewards/chosen": 1.758689522743225,
"rewards/margins": 6.900312900543213,
"rewards/rejected": -5.141623497009277,
"step": 2144
},
{
"epoch": 0.8499207163510867,
"grad_norm": 0.038292620331048965,
"learning_rate": 1.6666666666666665e-07,
"logits/chosen": -0.3632016181945801,
"logits/rejected": -0.20296621322631836,
"logps/chosen": -51.63236618041992,
"logps/rejected": -64.2586898803711,
"loss": 0.0385,
"rewards/accuracies": 0.9822760820388794,
"rewards/chosen": 1.6954851150512695,
"rewards/margins": 6.922590732574463,
"rewards/rejected": -5.227106094360352,
"step": 2278
},
{
"epoch": 0.8999160526070329,
"grad_norm": 0.05809802561998367,
"learning_rate": 1.111111111111111e-07,
"logits/chosen": -0.3890366554260254,
"logits/rejected": -0.21105322241783142,
"logps/chosen": -52.29930114746094,
"logps/rejected": -61.95232009887695,
"loss": 0.0161,
"rewards/accuracies": 0.9934701323509216,
"rewards/chosen": 1.7759125232696533,
"rewards/margins": 6.998130798339844,
"rewards/rejected": -5.2222185134887695,
"step": 2412
},
{
"epoch": 0.9499113888629792,
"grad_norm": 0.06595258414745331,
"learning_rate": 5.555555555555555e-08,
"logits/chosen": -0.37663742899894714,
"logits/rejected": -0.19746284186840057,
"logps/chosen": -52.22157669067383,
"logps/rejected": -63.86149597167969,
"loss": 0.0283,
"rewards/accuracies": 0.9822760820388794,
"rewards/chosen": 1.6980862617492676,
"rewards/margins": 7.005319118499756,
"rewards/rejected": -5.307232856750488,
"step": 2546
},
{
"epoch": 0.9999067251189254,
"grad_norm": 0.06881717592477798,
"learning_rate": 0.0,
"logits/chosen": -0.35994064807891846,
"logits/rejected": -0.19044946134090424,
"logps/chosen": -53.963340759277344,
"logps/rejected": -64.53665924072266,
"loss": 0.0188,
"rewards/accuracies": 0.9925373196601868,
"rewards/chosen": 1.7544020414352417,
"rewards/margins": 7.084873676300049,
"rewards/rejected": -5.330471515655518,
"step": 2680
},
{
"epoch": 0.9999067251189254,
"step": 2680,
"total_flos": 1.5261823890782945e+18,
"train_loss": 0.11258992188012422,
"train_runtime": 37304.1181,
"train_samples_per_second": 0.575,
"train_steps_per_second": 0.072
}
],
"logging_steps": 134,
"max_steps": 2680,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.5261823890782945e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}