Llama-3.1-8B-Instruct_resist_20 / trainer_state.json
Incomple's picture
End of training
1335703 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9996283909327388,
"eval_steps": 500,
"global_step": 1345,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0505388331475288,
"grad_norm": 4.774968147277832,
"learning_rate": 5.037037037037037e-07,
"logits/chosen": -0.535763680934906,
"logits/rejected": -0.45358335971832275,
"logps/chosen": -72.03621673583984,
"logps/rejected": -10.970436096191406,
"loss": 0.6914,
"rewards/accuracies": 0.5533088445663452,
"rewards/chosen": 0.003953414969146252,
"rewards/margins": 0.003940199967473745,
"rewards/rejected": 1.321489253314212e-05,
"step": 68
},
{
"epoch": 0.1010776662950576,
"grad_norm": 7.106871128082275,
"learning_rate": 9.991735537190081e-07,
"logits/chosen": -0.5598275661468506,
"logits/rejected": -0.48031094670295715,
"logps/chosen": -67.91443634033203,
"logps/rejected": -10.759711265563965,
"loss": 0.6763,
"rewards/accuracies": 0.7647058963775635,
"rewards/chosen": 0.030388537794351578,
"rewards/margins": 0.03460656851530075,
"rewards/rejected": -0.004218032583594322,
"step": 136
},
{
"epoch": 0.1516164994425864,
"grad_norm": 6.630987167358398,
"learning_rate": 9.429752066115701e-07,
"logits/chosen": -0.5454370379447937,
"logits/rejected": -0.4578668177127838,
"logps/chosen": -65.27257537841797,
"logps/rejected": -11.141642570495605,
"loss": 0.5834,
"rewards/accuracies": 0.966911792755127,
"rewards/chosen": 0.20791733264923096,
"rewards/margins": 0.25071945786476135,
"rewards/rejected": -0.04280214384198189,
"step": 204
},
{
"epoch": 0.2021553325901152,
"grad_norm": 2.813685417175293,
"learning_rate": 8.867768595041321e-07,
"logits/chosen": -0.5535087585449219,
"logits/rejected": -0.48542195558547974,
"logps/chosen": -59.47464370727539,
"logps/rejected": -12.498270988464355,
"loss": 0.4096,
"rewards/accuracies": 0.9761029481887817,
"rewards/chosen": 0.5964955687522888,
"rewards/margins": 0.8128367066383362,
"rewards/rejected": -0.21634113788604736,
"step": 272
},
{
"epoch": 0.25269416573764397,
"grad_norm": 1.8204927444458008,
"learning_rate": 8.305785123966941e-07,
"logits/chosen": -0.5383437871932983,
"logits/rejected": -0.4763253629207611,
"logps/chosen": -59.065216064453125,
"logps/rejected": -15.607768058776855,
"loss": 0.2626,
"rewards/accuracies": 0.9816176295280457,
"rewards/chosen": 1.058206558227539,
"rewards/margins": 1.5873197317123413,
"rewards/rejected": -0.529113233089447,
"step": 340
},
{
"epoch": 0.3032329988851728,
"grad_norm": 2.656168222427368,
"learning_rate": 7.743801652892562e-07,
"logits/chosen": -0.5623682737350464,
"logits/rejected": -0.5051128268241882,
"logps/chosen": -57.070865631103516,
"logps/rejected": -18.777294158935547,
"loss": 0.1687,
"rewards/accuracies": 0.9871323704719543,
"rewards/chosen": 1.3186638355255127,
"rewards/margins": 2.2629363536834717,
"rewards/rejected": -0.9442727565765381,
"step": 408
},
{
"epoch": 0.35377183203270157,
"grad_norm": 1.3962138891220093,
"learning_rate": 7.181818181818181e-07,
"logits/chosen": -0.5668503046035767,
"logits/rejected": -0.5197104215621948,
"logps/chosen": -58.299644470214844,
"logps/rejected": -24.42568016052246,
"loss": 0.1139,
"rewards/accuracies": 0.9889705777168274,
"rewards/chosen": 1.4811238050460815,
"rewards/margins": 2.8274738788604736,
"rewards/rejected": -1.346349835395813,
"step": 476
},
{
"epoch": 0.4043106651802304,
"grad_norm": 4.664539337158203,
"learning_rate": 6.619834710743801e-07,
"logits/chosen": -0.5390637516975403,
"logits/rejected": -0.5045632719993591,
"logps/chosen": -49.41936492919922,
"logps/rejected": -27.90851593017578,
"loss": 0.077,
"rewards/accuracies": 0.9889705777168274,
"rewards/chosen": 1.4117194414138794,
"rewards/margins": 3.2265889644622803,
"rewards/rejected": -1.8148694038391113,
"step": 544
},
{
"epoch": 0.45484949832775917,
"grad_norm": 0.970925509929657,
"learning_rate": 6.057851239669421e-07,
"logits/chosen": -0.5126790404319763,
"logits/rejected": -0.46291089057922363,
"logps/chosen": -58.05442810058594,
"logps/rejected": -35.03934097290039,
"loss": 0.0496,
"rewards/accuracies": 0.9908088445663452,
"rewards/chosen": 1.6365827322006226,
"rewards/margins": 3.8830738067626953,
"rewards/rejected": -2.246490955352783,
"step": 612
},
{
"epoch": 0.5053883314752879,
"grad_norm": 1.9165253639221191,
"learning_rate": 5.49586776859504e-07,
"logits/chosen": -0.5267462730407715,
"logits/rejected": -0.45349758863449097,
"logps/chosen": -53.660301208496094,
"logps/rejected": -34.66725158691406,
"loss": 0.0412,
"rewards/accuracies": 0.9834558963775635,
"rewards/chosen": 1.7377012968063354,
"rewards/margins": 4.283178329467773,
"rewards/rejected": -2.5454771518707275,
"step": 680
},
{
"epoch": 0.5559271646228168,
"grad_norm": 0.5560820698738098,
"learning_rate": 4.933884297520661e-07,
"logits/chosen": -0.5102059841156006,
"logits/rejected": -0.44081857800483704,
"logps/chosen": -48.95499801635742,
"logps/rejected": -38.01549530029297,
"loss": 0.0338,
"rewards/accuracies": 0.9908088445663452,
"rewards/chosen": 1.5988441705703735,
"rewards/margins": 4.466281890869141,
"rewards/rejected": -2.8674376010894775,
"step": 748
},
{
"epoch": 0.6064659977703456,
"grad_norm": 0.654052734375,
"learning_rate": 4.3719008264462806e-07,
"logits/chosen": -0.5107941627502441,
"logits/rejected": -0.424625426530838,
"logps/chosen": -52.267120361328125,
"logps/rejected": -39.51735305786133,
"loss": 0.0403,
"rewards/accuracies": 0.9797794222831726,
"rewards/chosen": 1.5989586114883423,
"rewards/margins": 4.681400775909424,
"rewards/rejected": -3.0824427604675293,
"step": 816
},
{
"epoch": 0.6570048309178744,
"grad_norm": 0.2799667716026306,
"learning_rate": 3.8099173553719006e-07,
"logits/chosen": -0.48151201009750366,
"logits/rejected": -0.3872612416744232,
"logps/chosen": -51.13566970825195,
"logps/rejected": -41.436946868896484,
"loss": 0.0386,
"rewards/accuracies": 0.9852941036224365,
"rewards/chosen": 1.6831274032592773,
"rewards/margins": 4.950973033905029,
"rewards/rejected": -3.26784610748291,
"step": 884
},
{
"epoch": 0.7075436640654031,
"grad_norm": 3.401352882385254,
"learning_rate": 3.2479338842975206e-07,
"logits/chosen": -0.4963739216327667,
"logits/rejected": -0.397490918636322,
"logps/chosen": -51.522117614746094,
"logps/rejected": -42.51453399658203,
"loss": 0.0242,
"rewards/accuracies": 0.9908088445663452,
"rewards/chosen": 1.671505331993103,
"rewards/margins": 4.983243465423584,
"rewards/rejected": -3.3117384910583496,
"step": 952
},
{
"epoch": 0.758082497212932,
"grad_norm": 0.16179317235946655,
"learning_rate": 2.6859504132231406e-07,
"logits/chosen": -0.4788703918457031,
"logits/rejected": -0.3747369050979614,
"logps/chosen": -54.24635314941406,
"logps/rejected": -42.791847229003906,
"loss": 0.037,
"rewards/accuracies": 0.9852941036224365,
"rewards/chosen": 1.7351138591766357,
"rewards/margins": 5.101663112640381,
"rewards/rejected": -3.366548776626587,
"step": 1020
},
{
"epoch": 0.8086213303604608,
"grad_norm": 0.2579549252986908,
"learning_rate": 2.1239669421487603e-07,
"logits/chosen": -0.48607107996940613,
"logits/rejected": -0.3732473850250244,
"logps/chosen": -53.21406555175781,
"logps/rejected": -45.13155746459961,
"loss": 0.0328,
"rewards/accuracies": 0.9852941036224365,
"rewards/chosen": 1.671941876411438,
"rewards/margins": 5.195909023284912,
"rewards/rejected": -3.5239670276641846,
"step": 1088
},
{
"epoch": 0.8591601635079896,
"grad_norm": 0.4580838680267334,
"learning_rate": 1.56198347107438e-07,
"logits/chosen": -0.4816429018974304,
"logits/rejected": -0.36846473813056946,
"logps/chosen": -53.959617614746094,
"logps/rejected": -46.597286224365234,
"loss": 0.0252,
"rewards/accuracies": 0.9889705777168274,
"rewards/chosen": 1.7494579553604126,
"rewards/margins": 5.300571441650391,
"rewards/rejected": -3.5511131286621094,
"step": 1156
},
{
"epoch": 0.9096989966555183,
"grad_norm": 0.2617437243461609,
"learning_rate": 1e-07,
"logits/chosen": -0.48045316338539124,
"logits/rejected": -0.3751773536205292,
"logps/chosen": -53.45015335083008,
"logps/rejected": -45.071617126464844,
"loss": 0.0358,
"rewards/accuracies": 0.9834558963775635,
"rewards/chosen": 1.7436227798461914,
"rewards/margins": 5.314986705780029,
"rewards/rejected": -3.571363925933838,
"step": 1224
},
{
"epoch": 0.9602378298030472,
"grad_norm": 0.671177864074707,
"learning_rate": 4.3801652892561986e-08,
"logits/chosen": -0.47449061274528503,
"logits/rejected": -0.35607320070266724,
"logps/chosen": -55.56103515625,
"logps/rejected": -45.930023193359375,
"loss": 0.0315,
"rewards/accuracies": 0.9834558963775635,
"rewards/chosen": 1.7040444612503052,
"rewards/margins": 5.217987060546875,
"rewards/rejected": -3.5139424800872803,
"step": 1292
},
{
"epoch": 0.9996283909327388,
"step": 1345,
"total_flos": 7.57814156543656e+17,
"train_loss": 0.17186723207452484,
"train_runtime": 19211.7112,
"train_samples_per_second": 0.56,
"train_steps_per_second": 0.07
}
],
"logging_steps": 68,
"max_steps": 1345,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7.57814156543656e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}