code-1b-aligned / last-checkpoint /trainer_state.json
rovdetection's picture
Training in progress, step 500, checkpoint
3a804b7 verified
Raw
History Blame Contribute Delete
32.9 kB
Invalid JSON:Unexpected token 'N', ..."_chosen": NaN, "... is not valid JSON
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.3616146092302129,
"eval_steps": 500,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.007232292184604258,
"grad_norm": 0.24258430302143097,
"learning_rate": 4.977500000000001e-06,
"log_odds_chosen": 0.024490734562277794,
"log_odds_ratio": -0.8170725703239441,
"logits/chosen": -1.157371997833252,
"logits/rejected": -1.3074114322662354,
"logps/chosen": -5.802087306976318,
"logps/rejected": -5.824903964996338,
"loss": 5.930429458618164,
"nll_loss": 5.848721981048584,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.5802086591720581,
"rewards/margins": 0.002281700726598501,
"rewards/rejected": -0.5824903845787048,
"step": 10
},
{
"epoch": 0.014464584369208515,
"grad_norm": 0.2757987976074219,
"learning_rate": 4.9525000000000004e-06,
"log_odds_chosen": 0.07715226709842682,
"log_odds_ratio": -0.824821949005127,
"logits/chosen": -1.2339580059051514,
"logits/rejected": -1.3473726511001587,
"logps/chosen": -5.762179851531982,
"logps/rejected": -5.835549354553223,
"loss": 5.804796600341797,
"nll_loss": 5.722315311431885,
"rewards/accuracies": 0.49687498807907104,
"rewards/chosen": -0.5762180089950562,
"rewards/margins": 0.007336919195950031,
"rewards/rejected": -0.5835549235343933,
"step": 20
},
{
"epoch": 0.021696876553812774,
"grad_norm": 0.335886150598526,
"learning_rate": 4.927500000000001e-06,
"log_odds_chosen": 0.1334013044834137,
"log_odds_ratio": -0.7585476636886597,
"logits/chosen": -1.2495059967041016,
"logits/rejected": -1.377275824546814,
"logps/chosen": -5.738375663757324,
"logps/rejected": -5.869871616363525,
"loss": 5.848030853271484,
"nll_loss": 5.772176265716553,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.5738375782966614,
"rewards/margins": 0.013149544596672058,
"rewards/rejected": -0.5869871377944946,
"step": 30
},
{
"epoch": 0.02892916873841703,
"grad_norm": 0.3094758987426758,
"learning_rate": 4.902500000000001e-06,
"log_odds_chosen": NaN,
"log_odds_ratio": NaN,
"logits/chosen": -1.2641137838363647,
"logits/rejected": -1.3740476369857788,
"logps/chosen": -5.545978546142578,
"logps/rejected": NaN,
"loss": 6.1603240966796875,
"nll_loss": 5.585268974304199,
"rewards/accuracies": 0.5218750238418579,
"rewards/chosen": -0.5545979738235474,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 40
},
{
"epoch": 0.03616146092302129,
"grad_norm": 0.32100710272789,
"learning_rate": 4.8775e-06,
"log_odds_chosen": NaN,
"log_odds_ratio": NaN,
"logits/chosen": -1.30973219871521,
"logits/rejected": -1.4038114547729492,
"logps/chosen": NaN,
"logps/rejected": -5.679649829864502,
"loss": 5.871247863769531,
"nll_loss": 5.540013790130615,
"rewards/accuracies": 0.578125,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": -0.567965030670166,
"step": 50
},
{
"epoch": 0.04339375310762555,
"grad_norm": 0.37317097187042236,
"learning_rate": 4.8525000000000006e-06,
"log_odds_chosen": 0.08813583850860596,
"log_odds_ratio": -0.7889271974563599,
"logits/chosen": -1.2617380619049072,
"logits/rejected": -1.3692435026168823,
"logps/chosen": -5.557856559753418,
"logps/rejected": -5.643843173980713,
"loss": 5.632916259765625,
"nll_loss": 5.554023742675781,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.5557857155799866,
"rewards/margins": 0.008598615415394306,
"rewards/rejected": -0.5643843412399292,
"step": 60
},
{
"epoch": 0.050626045292229804,
"grad_norm": 0.2950762212276459,
"learning_rate": 4.827500000000001e-06,
"log_odds_chosen": 0.12244565784931183,
"log_odds_ratio": -0.7549425959587097,
"logits/chosen": -1.312510371208191,
"logits/rejected": -1.41781485080719,
"logps/chosen": -5.558300018310547,
"logps/rejected": -5.677781581878662,
"loss": 5.611359786987305,
"nll_loss": 5.535863876342773,
"rewards/accuracies": 0.5093749761581421,
"rewards/chosen": -0.5558300018310547,
"rewards/margins": 0.01194816268980503,
"rewards/rejected": -0.5677782297134399,
"step": 70
},
{
"epoch": 0.05785833747683406,
"grad_norm": 0.3010028600692749,
"learning_rate": 4.8025e-06,
"log_odds_chosen": 0.17995290458202362,
"log_odds_ratio": -0.7352453470230103,
"logits/chosen": -1.3281229734420776,
"logits/rejected": -1.4346697330474854,
"logps/chosen": -5.496776580810547,
"logps/rejected": -5.675050735473633,
"loss": 5.57300033569336,
"nll_loss": 5.499476432800293,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.5496777296066284,
"rewards/margins": 0.017827384173870087,
"rewards/rejected": -0.5675050616264343,
"step": 80
},
{
"epoch": 0.06509062966143832,
"grad_norm": 0.2564737796783447,
"learning_rate": 4.7775e-06,
"log_odds_chosen": 0.08067800104618073,
"log_odds_ratio": -0.757804274559021,
"logits/chosen": -1.2539831399917603,
"logits/rejected": -1.3801463842391968,
"logps/chosen": -5.533335208892822,
"logps/rejected": -5.611950874328613,
"loss": 5.607465744018555,
"nll_loss": 5.531683921813965,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.5533335208892822,
"rewards/margins": 0.007861590944230556,
"rewards/rejected": -0.5611951351165771,
"step": 90
},
{
"epoch": 0.07232292184604258,
"grad_norm": 0.2745928168296814,
"learning_rate": 4.752500000000001e-06,
"log_odds_chosen": NaN,
"log_odds_ratio": NaN,
"logits/chosen": -1.1503195762634277,
"logits/rejected": -1.3092117309570312,
"logps/chosen": -5.674561500549316,
"logps/rejected": NaN,
"loss": 5.882052612304688,
"nll_loss": 5.633638381958008,
"rewards/accuracies": 0.546875,
"rewards/chosen": -0.5674561262130737,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 100
},
{
"epoch": 0.07955521403064683,
"grad_norm": 0.22206123173236847,
"learning_rate": 4.7275e-06,
"log_odds_chosen": NaN,
"log_odds_ratio": NaN,
"logits/chosen": -1.3089696168899536,
"logits/rejected": -1.4873392581939697,
"logps/chosen": -5.392054557800293,
"logps/rejected": NaN,
"loss": 5.727831649780273,
"nll_loss": 5.414012908935547,
"rewards/accuracies": 0.515625,
"rewards/chosen": -0.5392054319381714,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 110
},
{
"epoch": 0.0867875062152511,
"grad_norm": 0.19849246740341187,
"learning_rate": 4.7025e-06,
"log_odds_chosen": 0.09243413805961609,
"log_odds_ratio": -0.7682880759239197,
"logits/chosen": -1.3715227842330933,
"logits/rejected": -1.4874814748764038,
"logps/chosen": -5.332463264465332,
"logps/rejected": -5.421862602233887,
"loss": 5.385763931274414,
"nll_loss": 5.308935642242432,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.5332463383674622,
"rewards/margins": 0.00893993116915226,
"rewards/rejected": -0.5421862602233887,
"step": 120
},
{
"epoch": 0.09401979839985536,
"grad_norm": 0.20989899337291718,
"learning_rate": 4.6775000000000005e-06,
"log_odds_chosen": 0.07261505722999573,
"log_odds_ratio": -0.7669461965560913,
"logits/chosen": -1.2831826210021973,
"logits/rejected": -1.3855401277542114,
"logps/chosen": -5.570550441741943,
"logps/rejected": -5.639805793762207,
"loss": 5.608541107177734,
"nll_loss": 5.531846046447754,
"rewards/accuracies": 0.534375011920929,
"rewards/chosen": -0.5570551156997681,
"rewards/margins": 0.006925526075065136,
"rewards/rejected": -0.5639805793762207,
"step": 130
},
{
"epoch": 0.10125209058445961,
"grad_norm": 0.24246586859226227,
"learning_rate": 4.652500000000001e-06,
"log_odds_chosen": 0.13198330998420715,
"log_odds_ratio": -0.7554014921188354,
"logits/chosen": -1.3736729621887207,
"logits/rejected": -1.4753162860870361,
"logps/chosen": -5.387119770050049,
"logps/rejected": -5.51568078994751,
"loss": 5.4450115203857425,
"nll_loss": 5.369471549987793,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.5387119650840759,
"rewards/margins": 0.01285608857870102,
"rewards/rejected": -0.5515680909156799,
"step": 140
},
{
"epoch": 0.10848438276906387,
"grad_norm": 0.22028212249279022,
"learning_rate": 4.6275e-06,
"log_odds_chosen": 0.1945369690656662,
"log_odds_ratio": -0.7150241732597351,
"logits/chosen": -1.222019076347351,
"logits/rejected": -1.3116670846939087,
"logps/chosen": -5.559757232666016,
"logps/rejected": -5.74977970123291,
"loss": 5.611997985839844,
"nll_loss": 5.540493965148926,
"rewards/accuracies": 0.559374988079071,
"rewards/chosen": -0.5559757351875305,
"rewards/margins": 0.01900230534374714,
"rewards/rejected": -0.5749779939651489,
"step": 150
},
{
"epoch": 0.11571667495366812,
"grad_norm": 0.24762941896915436,
"learning_rate": 4.6025e-06,
"log_odds_chosen": 0.12068144977092743,
"log_odds_ratio": -0.7480632066726685,
"logits/chosen": -1.2661784887313843,
"logits/rejected": -1.3999755382537842,
"logps/chosen": -5.444934844970703,
"logps/rejected": -5.563178062438965,
"loss": 5.528120040893555,
"nll_loss": 5.453313827514648,
"rewards/accuracies": 0.5531250238418579,
"rewards/chosen": -0.544493556022644,
"rewards/margins": 0.011824256740510464,
"rewards/rejected": -0.5563178062438965,
"step": 160
},
{
"epoch": 0.12294896713827239,
"grad_norm": 0.24410110712051392,
"learning_rate": 4.577500000000001e-06,
"log_odds_chosen": 0.07302852720022202,
"log_odds_ratio": -0.7880190014839172,
"logits/chosen": -1.2591315507888794,
"logits/rejected": -1.379748821258545,
"logps/chosen": -5.474093437194824,
"logps/rejected": -5.544507026672363,
"loss": 5.533624267578125,
"nll_loss": 5.454824447631836,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.5474093556404114,
"rewards/margins": 0.00704141054302454,
"rewards/rejected": -0.5544507503509521,
"step": 170
},
{
"epoch": 0.13018125932287664,
"grad_norm": 0.238921120762825,
"learning_rate": 4.5525e-06,
"log_odds_chosen": NaN,
"log_odds_ratio": NaN,
"logits/chosen": -1.2266502380371094,
"logits/rejected": -1.3530925512313843,
"logps/chosen": -5.5178141593933105,
"logps/rejected": NaN,
"loss": 5.79667854309082,
"nll_loss": 5.490872859954834,
"rewards/accuracies": 0.546875,
"rewards/chosen": -0.551781415939331,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 180
},
{
"epoch": 0.1374135515074809,
"grad_norm": 0.24503076076507568,
"learning_rate": 4.5275e-06,
"log_odds_chosen": 0.03844783455133438,
"log_odds_ratio": -0.7719030976295471,
"logits/chosen": -1.2923024892807007,
"logits/rejected": -1.4297457933425903,
"logps/chosen": -5.426344871520996,
"logps/rejected": -5.463438987731934,
"loss": 5.509374618530273,
"nll_loss": 5.432183265686035,
"rewards/accuracies": 0.4468750059604645,
"rewards/chosen": -0.5426343679428101,
"rewards/margins": 0.003709450364112854,
"rewards/rejected": -0.5463439226150513,
"step": 190
},
{
"epoch": 0.14464584369208516,
"grad_norm": 0.2474403828382492,
"learning_rate": 4.5025000000000005e-06,
"log_odds_chosen": 0.15352819859981537,
"log_odds_ratio": -0.7516843676567078,
"logits/chosen": -1.238797664642334,
"logits/rejected": -1.3516855239868164,
"logps/chosen": -5.482898235321045,
"logps/rejected": -5.631514549255371,
"loss": 5.482235336303711,
"nll_loss": 5.407067775726318,
"rewards/accuracies": 0.578125,
"rewards/chosen": -0.5482897758483887,
"rewards/margins": 0.01486161071807146,
"rewards/rejected": -0.563151478767395,
"step": 200
},
{
"epoch": 0.1518781358766894,
"grad_norm": 0.24647395312786102,
"learning_rate": 4.4775e-06,
"log_odds_chosen": 0.06791242212057114,
"log_odds_ratio": -0.7611157894134521,
"logits/chosen": -1.2040866613388062,
"logits/rejected": -1.325791835784912,
"logps/chosen": -5.527557373046875,
"logps/rejected": -5.593737602233887,
"loss": 5.5501853942871096,
"nll_loss": 5.4740729331970215,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.5527557134628296,
"rewards/margins": 0.00661806296557188,
"rewards/rejected": -0.5593737959861755,
"step": 210
},
{
"epoch": 0.15911042806129366,
"grad_norm": 0.3191209137439728,
"learning_rate": 4.4525e-06,
"log_odds_chosen": 0.11287051439285278,
"log_odds_ratio": -0.744554877281189,
"logits/chosen": -1.2233508825302124,
"logits/rejected": -1.334123134613037,
"logps/chosen": -5.541935920715332,
"logps/rejected": -5.6515703201293945,
"loss": 5.557551574707031,
"nll_loss": 5.483095169067383,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.5541935563087463,
"rewards/margins": 0.010963483713567257,
"rewards/rejected": -0.5651570558547974,
"step": 220
},
{
"epoch": 0.16634272024589794,
"grad_norm": 0.3177518844604492,
"learning_rate": 4.4275e-06,
"log_odds_chosen": NaN,
"log_odds_ratio": NaN,
"logits/chosen": -1.2862989902496338,
"logits/rejected": -1.396356225013733,
"logps/chosen": -5.400467872619629,
"logps/rejected": NaN,
"loss": 5.670994567871094,
"nll_loss": 5.330209255218506,
"rewards/accuracies": 0.503125011920929,
"rewards/chosen": -0.5400468111038208,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 230
},
{
"epoch": 0.1735750124305022,
"grad_norm": 0.2698371410369873,
"learning_rate": 4.4025e-06,
"log_odds_chosen": 0.10866693407297134,
"log_odds_ratio": -0.7732769250869751,
"logits/chosen": -1.2277987003326416,
"logits/rejected": -1.3508949279785156,
"logps/chosen": -5.505708694458008,
"logps/rejected": -5.6127214431762695,
"loss": 5.515193176269531,
"nll_loss": 5.437865257263184,
"rewards/accuracies": 0.534375011920929,
"rewards/chosen": -0.5505709052085876,
"rewards/margins": 0.010701271705329418,
"rewards/rejected": -0.5612722039222717,
"step": 240
},
{
"epoch": 0.18080730461510644,
"grad_norm": 0.2867446839809418,
"learning_rate": 4.3775e-06,
"log_odds_chosen": NaN,
"log_odds_ratio": NaN,
"logits/chosen": -1.2118570804595947,
"logits/rejected": -1.3736122846603394,
"logps/chosen": NaN,
"logps/rejected": -5.480542182922363,
"loss": 5.575833892822265,
"nll_loss": 5.324864387512207,
"rewards/accuracies": 0.528124988079071,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": -0.5480541586875916,
"step": 250
},
{
"epoch": 0.18803959679971072,
"grad_norm": 0.31495073437690735,
"learning_rate": 4.3525e-06,
"log_odds_chosen": 0.008091190829873085,
"log_odds_ratio": -0.8035072088241577,
"logits/chosen": -1.214800477027893,
"logits/rejected": -1.3568775653839111,
"logps/chosen": -5.552915096282959,
"logps/rejected": -5.560267925262451,
"loss": 5.492018127441407,
"nll_loss": 5.411666393280029,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.5552915334701538,
"rewards/margins": 0.00073523836908862,
"rewards/rejected": -0.5560267567634583,
"step": 260
},
{
"epoch": 0.19527188898431497,
"grad_norm": 0.3158721327781677,
"learning_rate": 4.3275000000000005e-06,
"log_odds_chosen": 0.12867891788482666,
"log_odds_ratio": -0.7282706499099731,
"logits/chosen": -1.1992931365966797,
"logits/rejected": -1.3094508647918701,
"logps/chosen": -5.397171497344971,
"logps/rejected": -5.522560119628906,
"loss": 5.501744079589844,
"nll_loss": 5.42891788482666,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.5397171378135681,
"rewards/margins": 0.012538868002593517,
"rewards/rejected": -0.5522559881210327,
"step": 270
},
{
"epoch": 0.20250418116891922,
"grad_norm": NaN,
"learning_rate": 4.302500000000001e-06,
"log_odds_chosen": NaN,
"log_odds_ratio": NaN,
"logits/chosen": -1.1986209154129028,
"logits/rejected": -1.325928807258606,
"logps/chosen": -5.435647964477539,
"logps/rejected": NaN,
"loss": 5.748141479492188,
"nll_loss": 5.416440010070801,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.5435648560523987,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 280
},
{
"epoch": 0.2097364733535235,
"grad_norm": 0.3696766197681427,
"learning_rate": 4.2775e-06,
"log_odds_chosen": -0.056973300874233246,
"log_odds_ratio": -0.8312661051750183,
"logits/chosen": -1.1497722864151,
"logits/rejected": -1.2519116401672363,
"logps/chosen": -5.443070888519287,
"logps/rejected": -5.3841233253479,
"loss": 5.432368469238281,
"nll_loss": 5.349241733551025,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.5443071126937866,
"rewards/margins": -0.005894799251109362,
"rewards/rejected": -0.5384122729301453,
"step": 290
},
{
"epoch": 0.21696876553812774,
"grad_norm": 0.4062056839466095,
"learning_rate": 4.2525e-06,
"log_odds_chosen": 0.06658594310283661,
"log_odds_ratio": -0.7710601687431335,
"logits/chosen": -1.152179479598999,
"logits/rejected": -1.2460296154022217,
"logps/chosen": -5.450706958770752,
"logps/rejected": -5.514741897583008,
"loss": 5.461360168457031,
"nll_loss": 5.384252548217773,
"rewards/accuracies": 0.484375,
"rewards/chosen": -0.5450707077980042,
"rewards/margins": 0.006403499282896519,
"rewards/rejected": -0.5514742136001587,
"step": 300
},
{
"epoch": 0.224201057722732,
"grad_norm": 0.35252928733825684,
"learning_rate": 4.227500000000001e-06,
"log_odds_chosen": NaN,
"log_odds_ratio": NaN,
"logits/chosen": -1.11289381980896,
"logits/rejected": -1.1899579763412476,
"logps/chosen": NaN,
"logps/rejected": -5.594590187072754,
"loss": 5.755791473388672,
"nll_loss": 5.4381585121154785,
"rewards/accuracies": 0.503125011920929,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": -0.5594589710235596,
"step": 310
},
{
"epoch": 0.23143334990733624,
"grad_norm": 0.32109105587005615,
"learning_rate": 4.202500000000001e-06,
"log_odds_chosen": NaN,
"log_odds_ratio": NaN,
"logits/chosen": -1.1014893054962158,
"logits/rejected": -1.2668919563293457,
"logps/chosen": NaN,
"logps/rejected": -5.533962249755859,
"loss": 5.670769500732422,
"nll_loss": 5.3555803298950195,
"rewards/accuracies": 0.5531250238418579,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": -0.5533961653709412,
"step": 320
},
{
"epoch": 0.23866564209194052,
"grad_norm": 0.3987061083316803,
"learning_rate": 4.1775e-06,
"log_odds_chosen": 0.09470056742429733,
"log_odds_ratio": -0.7665299773216248,
"logits/chosen": -1.0869171619415283,
"logits/rejected": -1.1906062364578247,
"logps/chosen": -5.4382758140563965,
"logps/rejected": -5.529966831207275,
"loss": 5.484853363037109,
"nll_loss": 5.408199787139893,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.5438276529312134,
"rewards/margins": 0.009169066324830055,
"rewards/rejected": -0.5529965758323669,
"step": 330
},
{
"epoch": 0.24589793427654477,
"grad_norm": 0.3762986958026886,
"learning_rate": 4.1525000000000005e-06,
"log_odds_chosen": NaN,
"log_odds_ratio": NaN,
"logits/chosen": -1.0889309644699097,
"logits/rejected": -1.210901141166687,
"logps/chosen": NaN,
"logps/rejected": -5.512875556945801,
"loss": 5.707857513427735,
"nll_loss": 5.374106407165527,
"rewards/accuracies": 0.5406249761581421,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": -0.5512875318527222,
"step": 340
},
{
"epoch": 0.253130226461149,
"grad_norm": 0.3539881706237793,
"learning_rate": 4.127500000000001e-06,
"log_odds_chosen": 0.05990752577781677,
"log_odds_ratio": -0.7520009279251099,
"logits/chosen": -1.1000460386276245,
"logits/rejected": -1.1998263597488403,
"logps/chosen": -5.449780464172363,
"logps/rejected": -5.5063323974609375,
"loss": 5.4952552795410154,
"nll_loss": 5.420053958892822,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.5449780225753784,
"rewards/margins": 0.005655230488628149,
"rewards/rejected": -0.5506333112716675,
"step": 350
},
{
"epoch": 0.26036251864575327,
"grad_norm": 0.4491257071495056,
"learning_rate": 4.1025e-06,
"log_odds_chosen": 0.0024302334059029818,
"log_odds_ratio": -0.803369402885437,
"logits/chosen": -1.1746547222137451,
"logits/rejected": -1.2685177326202393,
"logps/chosen": -5.358044624328613,
"logps/rejected": -5.358481407165527,
"loss": 5.37115707397461,
"nll_loss": 5.290821075439453,
"rewards/accuracies": 0.49687498807907104,
"rewards/chosen": -0.5358044505119324,
"rewards/margins": 4.368703957879916e-05,
"rewards/rejected": -0.5358482003211975,
"step": 360
},
{
"epoch": 0.2675948108303575,
"grad_norm": 0.4225010275840759,
"learning_rate": 4.0775e-06,
"log_odds_chosen": NaN,
"log_odds_ratio": NaN,
"logits/chosen": -1.1588351726531982,
"logits/rejected": -1.2908666133880615,
"logps/chosen": -5.33625602722168,
"logps/rejected": NaN,
"loss": 5.6588897705078125,
"nll_loss": 5.325121879577637,
"rewards/accuracies": 0.534375011920929,
"rewards/chosen": -0.533625602722168,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 370
},
{
"epoch": 0.2748271030149618,
"grad_norm": 0.39807629585266113,
"learning_rate": 4.052500000000001e-06,
"log_odds_chosen": 0.10123734176158905,
"log_odds_ratio": -0.7437968850135803,
"logits/chosen": -1.1793785095214844,
"logits/rejected": -1.3256503343582153,
"logps/chosen": -5.213901996612549,
"logps/rejected": -5.312169551849365,
"loss": 5.233790588378906,
"nll_loss": 5.159411430358887,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.5213901996612549,
"rewards/margins": 0.009826736524701118,
"rewards/rejected": -0.5312169790267944,
"step": 380
},
{
"epoch": 0.2820593951995661,
"grad_norm": 0.4834080934524536,
"learning_rate": 4.0275e-06,
"log_odds_chosen": 0.07742507755756378,
"log_odds_ratio": -0.7695199847221375,
"logits/chosen": -1.0857242345809937,
"logits/rejected": -1.2077770233154297,
"logps/chosen": -5.406026363372803,
"logps/rejected": -5.479708671569824,
"loss": 5.404788970947266,
"nll_loss": 5.3278374671936035,
"rewards/accuracies": 0.5218750238418579,
"rewards/chosen": -0.5406026840209961,
"rewards/margins": 0.007368179503828287,
"rewards/rejected": -0.5479708313941956,
"step": 390
},
{
"epoch": 0.2892916873841703,
"grad_norm": 0.4307977259159088,
"learning_rate": 4.0025e-06,
"log_odds_chosen": 0.13691337406635284,
"log_odds_ratio": -0.7147995233535767,
"logits/chosen": -1.1390448808670044,
"logits/rejected": -1.2907403707504272,
"logps/chosen": -5.265523433685303,
"logps/rejected": -5.39796257019043,
"loss": 5.341230010986328,
"nll_loss": 5.269749641418457,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.5265523791313171,
"rewards/margins": 0.013243894092738628,
"rewards/rejected": -0.5397962331771851,
"step": 400
},
{
"epoch": 0.2965239795687746,
"grad_norm": 0.40860849618911743,
"learning_rate": 3.9775000000000005e-06,
"log_odds_chosen": NaN,
"log_odds_ratio": NaN,
"logits/chosen": -1.1594798564910889,
"logits/rejected": -1.288938283920288,
"logps/chosen": -5.337071895599365,
"logps/rejected": NaN,
"loss": 5.633375549316407,
"nll_loss": 5.293572425842285,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.5337072014808655,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 410
},
{
"epoch": 0.3037562717533788,
"grad_norm": 0.5024765729904175,
"learning_rate": 3.9525e-06,
"log_odds_chosen": 0.14180947840213776,
"log_odds_ratio": -0.716079831123352,
"logits/chosen": -1.05372154712677,
"logits/rejected": -1.1723723411560059,
"logps/chosen": -5.377732753753662,
"logps/rejected": -5.514806747436523,
"loss": 5.390876770019531,
"nll_loss": 5.319269180297852,
"rewards/accuracies": 0.534375011920929,
"rewards/chosen": -0.5377733707427979,
"rewards/margins": 0.013707393780350685,
"rewards/rejected": -0.5514807105064392,
"step": 420
},
{
"epoch": 0.3109885639379831,
"grad_norm": 0.43874257802963257,
"learning_rate": 3.9275e-06,
"log_odds_chosen": 0.041213370859622955,
"log_odds_ratio": -0.7626296281814575,
"logits/chosen": -1.0877110958099365,
"logits/rejected": -1.2165416479110718,
"logps/chosen": -5.347620010375977,
"logps/rejected": -5.385829925537109,
"loss": 5.355945587158203,
"nll_loss": 5.279682636260986,
"rewards/accuracies": 0.49687498807907104,
"rewards/chosen": -0.5347620248794556,
"rewards/margins": 0.0038209576159715652,
"rewards/rejected": -0.538582980632782,
"step": 430
},
{
"epoch": 0.3182208561225873,
"grad_norm": 0.4173244535923004,
"learning_rate": 3.9025e-06,
"log_odds_chosen": 0.11863790452480316,
"log_odds_ratio": -0.7459925413131714,
"logits/chosen": -1.1519792079925537,
"logits/rejected": -1.3020139932632446,
"logps/chosen": -5.2671380043029785,
"logps/rejected": -5.38327693939209,
"loss": 5.298666381835938,
"nll_loss": 5.224067211151123,
"rewards/accuracies": 0.578125,
"rewards/chosen": -0.5267137885093689,
"rewards/margins": 0.011613896116614342,
"rewards/rejected": -0.538327693939209,
"step": 440
},
{
"epoch": 0.32545314830719163,
"grad_norm": 0.4220745861530304,
"learning_rate": 3.8775000000000006e-06,
"log_odds_chosen": NaN,
"log_odds_ratio": NaN,
"logits/chosen": -1.0332152843475342,
"logits/rejected": -1.1924374103546143,
"logps/chosen": NaN,
"logps/rejected": -5.533570766448975,
"loss": 5.6558387756347654,
"nll_loss": 5.354229927062988,
"rewards/accuracies": 0.528124988079071,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": -0.5533571243286133,
"step": 450
},
{
"epoch": 0.3326854404917959,
"grad_norm": 0.4184921085834503,
"learning_rate": 3.8525e-06,
"log_odds_chosen": 0.0849083662033081,
"log_odds_ratio": -0.7574716210365295,
"logits/chosen": -1.0905861854553223,
"logits/rejected": -1.18590247631073,
"logps/chosen": -5.311153888702393,
"logps/rejected": -5.390518665313721,
"loss": 5.285702133178711,
"nll_loss": 5.209954261779785,
"rewards/accuracies": 0.5218750238418579,
"rewards/chosen": -0.5311154127120972,
"rewards/margins": 0.007936512120068073,
"rewards/rejected": -0.53905189037323,
"step": 460
},
{
"epoch": 0.33991773267640013,
"grad_norm": 0.4340634047985077,
"learning_rate": 3.8275e-06,
"log_odds_chosen": 0.12307295948266983,
"log_odds_ratio": -0.7435885667800903,
"logits/chosen": -1.1616504192352295,
"logits/rejected": -1.275611162185669,
"logps/chosen": -5.210690498352051,
"logps/rejected": -5.329777717590332,
"loss": 5.172726058959961,
"nll_loss": 5.098366737365723,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.5210691094398499,
"rewards/margins": 0.011908676475286484,
"rewards/rejected": -0.5329777598381042,
"step": 470
},
{
"epoch": 0.3471500248610044,
"grad_norm": 0.5348623394966125,
"learning_rate": 3.8025e-06,
"log_odds_chosen": 0.10718987882137299,
"log_odds_ratio": -0.7553779482841492,
"logits/chosen": -1.069937825202942,
"logits/rejected": -1.2259876728057861,
"logps/chosen": -5.3289594650268555,
"logps/rejected": -5.430902004241943,
"loss": 5.337881469726563,
"nll_loss": 5.262343406677246,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.5328959822654724,
"rewards/margins": 0.010194242931902409,
"rewards/rejected": -0.5430902242660522,
"step": 480
},
{
"epoch": 0.35438231704560863,
"grad_norm": 0.5375458002090454,
"learning_rate": 3.7775000000000003e-06,
"log_odds_chosen": -0.0630793422460556,
"log_odds_ratio": -0.8402652740478516,
"logits/chosen": -1.0562111139297485,
"logits/rejected": -1.1850430965423584,
"logps/chosen": -5.353094100952148,
"logps/rejected": -5.288957595825195,
"loss": 5.370440673828125,
"nll_loss": 5.286414623260498,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.5353094339370728,
"rewards/margins": -0.006413729395717382,
"rewards/rejected": -0.5288957357406616,
"step": 490
},
{
"epoch": 0.3616146092302129,
"grad_norm": 0.5694707036018372,
"learning_rate": 3.7525e-06,
"log_odds_chosen": 0.1951710283756256,
"log_odds_ratio": -0.7369016408920288,
"logits/chosen": -1.1101913452148438,
"logits/rejected": -1.2623517513275146,
"logps/chosen": -5.364739418029785,
"logps/rejected": -5.554258823394775,
"loss": 5.355496597290039,
"nll_loss": 5.281806945800781,
"rewards/accuracies": 0.546875,
"rewards/chosen": -0.5364739298820496,
"rewards/margins": 0.018951958045363426,
"rewards/rejected": -0.5554260015487671,
"step": 500
}
],
"logging_steps": 10,
"max_steps": 2000,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}