| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.97196261682243, | |
| "eval_steps": 50, | |
| "global_step": 159, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.09345794392523364, | |
| "grad_norm": 66.9687943936917, | |
| "learning_rate": 5e-07, | |
| "logits/chosen": -2.720803737640381, | |
| "logits/rejected": -2.7183666229248047, | |
| "logps/chosen": -237.3436279296875, | |
| "logps/rejected": -190.54464721679688, | |
| "loss": 0.6913, | |
| "rewards/accuracies": 0.32499998807907104, | |
| "rewards/chosen": 0.013445606455206871, | |
| "rewards/margins": 0.008647488430142403, | |
| "rewards/rejected": 0.00479811942204833, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.18691588785046728, | |
| "grad_norm": 48.6419981873445, | |
| "learning_rate": 1e-06, | |
| "logits/chosen": -2.679405689239502, | |
| "logits/rejected": -2.670754909515381, | |
| "logps/chosen": -279.81866455078125, | |
| "logps/rejected": -226.22573852539062, | |
| "loss": 0.6556, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": 0.4765666127204895, | |
| "rewards/margins": 0.12125828117132187, | |
| "rewards/rejected": 0.35530832409858704, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.2803738317757009, | |
| "grad_norm": 44.827387730520904, | |
| "learning_rate": 9.972240926774166e-07, | |
| "logits/chosen": -2.5193655490875244, | |
| "logits/rejected": -2.510051965713501, | |
| "logps/chosen": -236.1126708984375, | |
| "logps/rejected": -201.1164093017578, | |
| "loss": 0.6375, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 1.2661212682724, | |
| "rewards/margins": 0.5709505677223206, | |
| "rewards/rejected": 0.6951709985733032, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.37383177570093457, | |
| "grad_norm": 51.33633927747967, | |
| "learning_rate": 9.889271933555212e-07, | |
| "logits/chosen": -2.4093480110168457, | |
| "logits/rejected": -2.381843090057373, | |
| "logps/chosen": -258.9214782714844, | |
| "logps/rejected": -220.68408203125, | |
| "loss": 0.6632, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": 1.2196061611175537, | |
| "rewards/margins": 0.9229635000228882, | |
| "rewards/rejected": 0.2966426610946655, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.4672897196261682, | |
| "grad_norm": 58.55983526769123, | |
| "learning_rate": 9.752014277286431e-07, | |
| "logits/chosen": -2.319462299346924, | |
| "logits/rejected": -2.303922653198242, | |
| "logps/chosen": -254.1189422607422, | |
| "logps/rejected": -196.4254608154297, | |
| "loss": 0.6806, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": 1.0977602005004883, | |
| "rewards/margins": 0.803708553314209, | |
| "rewards/rejected": 0.2940516173839569, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.5607476635514018, | |
| "grad_norm": 38.96884797085844, | |
| "learning_rate": 9.561992016100291e-07, | |
| "logits/chosen": -2.380964994430542, | |
| "logits/rejected": -2.357675075531006, | |
| "logps/chosen": -242.76406860351562, | |
| "logps/rejected": -221.53903198242188, | |
| "loss": 0.6103, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": 0.37621065974235535, | |
| "rewards/margins": 0.7926680445671082, | |
| "rewards/rejected": -0.4164574146270752, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.6542056074766355, | |
| "grad_norm": 49.029442328071866, | |
| "learning_rate": 9.321315086741915e-07, | |
| "logits/chosen": -2.3943734169006348, | |
| "logits/rejected": -2.386823892593384, | |
| "logps/chosen": -251.5531463623047, | |
| "logps/rejected": -226.2257537841797, | |
| "loss": 0.61, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.37054210901260376, | |
| "rewards/margins": 0.955781102180481, | |
| "rewards/rejected": -0.5852389931678772, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.7476635514018691, | |
| "grad_norm": 47.16621948348876, | |
| "learning_rate": 9.032655876613635e-07, | |
| "logits/chosen": -2.323800802230835, | |
| "logits/rejected": -2.296937942504883, | |
| "logps/chosen": -255.12149047851562, | |
| "logps/rejected": -216.43679809570312, | |
| "loss": 0.5697, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.7852829694747925, | |
| "rewards/margins": 1.0229356288909912, | |
| "rewards/rejected": -0.2376527041196823, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.8411214953271028, | |
| "grad_norm": 45.06757129062354, | |
| "learning_rate": 8.699219550575952e-07, | |
| "logits/chosen": -2.2773144245147705, | |
| "logits/rejected": -2.2689876556396484, | |
| "logps/chosen": -242.54403686523438, | |
| "logps/rejected": -198.2088165283203, | |
| "loss": 0.5913, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": 1.0134313106536865, | |
| "rewards/margins": 0.8711115121841431, | |
| "rewards/rejected": 0.1423199325799942, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.9345794392523364, | |
| "grad_norm": 37.25379822397818, | |
| "learning_rate": 8.324708461985124e-07, | |
| "logits/chosen": -2.3120808601379395, | |
| "logits/rejected": -2.325521945953369, | |
| "logps/chosen": -244.45162963867188, | |
| "logps/rejected": -233.9671173095703, | |
| "loss": 0.5323, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.8873047828674316, | |
| "rewards/margins": 1.0542502403259277, | |
| "rewards/rejected": -0.16694557666778564, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.9345794392523364, | |
| "eval_logits/chosen": -2.3802032470703125, | |
| "eval_logits/rejected": -2.3511736392974854, | |
| "eval_logps/chosen": -257.5635070800781, | |
| "eval_logps/rejected": -242.5312957763672, | |
| "eval_loss": 0.5823682546615601, | |
| "eval_rewards/accuracies": 0.7395833134651184, | |
| "eval_rewards/chosen": 0.9205262660980225, | |
| "eval_rewards/margins": 1.2713056802749634, | |
| "eval_rewards/rejected": -0.35077938437461853, | |
| "eval_runtime": 101.9106, | |
| "eval_samples_per_second": 14.915, | |
| "eval_steps_per_second": 0.236, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.02803738317757, | |
| "grad_norm": 17.588268217174, | |
| "learning_rate": 7.913281043133977e-07, | |
| "logits/chosen": -2.377732038497925, | |
| "logits/rejected": -2.4019691944122314, | |
| "logps/chosen": -251.0818328857422, | |
| "logps/rejected": -231.4687957763672, | |
| "loss": 0.4699, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 1.248857021331787, | |
| "rewards/margins": 1.5204874277114868, | |
| "rewards/rejected": -0.2716304361820221, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 1.1214953271028036, | |
| "grad_norm": 17.6740148393279, | |
| "learning_rate": 7.469505631561317e-07, | |
| "logits/chosen": -2.4365363121032715, | |
| "logits/rejected": -2.3999149799346924, | |
| "logps/chosen": -235.5603790283203, | |
| "logps/rejected": -217.8879852294922, | |
| "loss": 0.2443, | |
| "rewards/accuracies": 0.893750011920929, | |
| "rewards/chosen": 1.6067253351211548, | |
| "rewards/margins": 2.39015531539917, | |
| "rewards/rejected": -0.7834302186965942, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.2149532710280373, | |
| "grad_norm": 19.25307975460043, | |
| "learning_rate": 6.998309744925411e-07, | |
| "logits/chosen": -2.459545612335205, | |
| "logits/rejected": -2.443091869354248, | |
| "logps/chosen": -234.2629852294922, | |
| "logps/rejected": -229.77243041992188, | |
| "loss": 0.2014, | |
| "rewards/accuracies": 0.9312499761581421, | |
| "rewards/chosen": 2.0133259296417236, | |
| "rewards/margins": 3.0357606410980225, | |
| "rewards/rejected": -1.0224347114562988, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.308411214953271, | |
| "grad_norm": 18.33160467378068, | |
| "learning_rate": 6.504925367674594e-07, | |
| "logits/chosen": -2.5056710243225098, | |
| "logits/rejected": -2.481133222579956, | |
| "logps/chosen": -239.59219360351562, | |
| "logps/rejected": -222.05429077148438, | |
| "loss": 0.2154, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": 1.9632623195648193, | |
| "rewards/margins": 3.0751612186431885, | |
| "rewards/rejected": -1.1118988990783691, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.4018691588785046, | |
| "grad_norm": 26.100443454167603, | |
| "learning_rate": 5.994830857031499e-07, | |
| "logits/chosen": -2.4787604808807373, | |
| "logits/rejected": -2.477220058441162, | |
| "logps/chosen": -242.30496215820312, | |
| "logps/rejected": -246.5374298095703, | |
| "loss": 0.2167, | |
| "rewards/accuracies": 0.9312499761581421, | |
| "rewards/chosen": 2.2068536281585693, | |
| "rewards/margins": 3.7736289501190186, | |
| "rewards/rejected": -1.5667749643325806, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.4953271028037383, | |
| "grad_norm": 23.947057848275417, | |
| "learning_rate": 5.473690113345342e-07, | |
| "logits/chosen": -2.4580140113830566, | |
| "logits/rejected": -2.4340569972991943, | |
| "logps/chosen": -232.0405731201172, | |
| "logps/rejected": -232.50894165039062, | |
| "loss": 0.2133, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 1.6286522150039673, | |
| "rewards/margins": 3.344123363494873, | |
| "rewards/rejected": -1.7154712677001953, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.588785046728972, | |
| "grad_norm": 25.173894130852474, | |
| "learning_rate": 4.947289690242102e-07, | |
| "logits/chosen": -2.3979992866516113, | |
| "logits/rejected": -2.372950553894043, | |
| "logps/chosen": -234.0653533935547, | |
| "logps/rejected": -226.2272491455078, | |
| "loss": 0.2526, | |
| "rewards/accuracies": 0.918749988079071, | |
| "rewards/chosen": 2.000070333480835, | |
| "rewards/margins": 3.399747371673584, | |
| "rewards/rejected": -1.3996769189834595, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.6822429906542056, | |
| "grad_norm": 25.381019801326666, | |
| "learning_rate": 4.421474542878194e-07, | |
| "logits/chosen": -2.402013063430786, | |
| "logits/rejected": -2.358625888824463, | |
| "logps/chosen": -240.19235229492188, | |
| "logps/rejected": -235.06942749023438, | |
| "loss": 0.2381, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/chosen": 2.031113624572754, | |
| "rewards/margins": 3.7032268047332764, | |
| "rewards/rejected": -1.6721128225326538, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.7757009345794392, | |
| "grad_norm": 22.605364041509517, | |
| "learning_rate": 3.902083127725186e-07, | |
| "logits/chosen": -2.3787314891815186, | |
| "logits/rejected": -2.382676601409912, | |
| "logps/chosen": -231.67434692382812, | |
| "logps/rejected": -207.5229949951172, | |
| "loss": 0.2348, | |
| "rewards/accuracies": 0.918749988079071, | |
| "rewards/chosen": 2.045062780380249, | |
| "rewards/margins": 3.560044765472412, | |
| "rewards/rejected": -1.5149818658828735, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.8691588785046729, | |
| "grad_norm": 26.859387828548858, | |
| "learning_rate": 3.394882574513519e-07, | |
| "logits/chosen": -2.3952929973602295, | |
| "logits/rejected": -2.3831348419189453, | |
| "logps/chosen": -234.7982940673828, | |
| "logps/rejected": -259.3247375488281, | |
| "loss": 0.2441, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 1.9878575801849365, | |
| "rewards/margins": 3.512976884841919, | |
| "rewards/rejected": -1.525119423866272, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.8691588785046729, | |
| "eval_logits/chosen": -2.3957111835479736, | |
| "eval_logits/rejected": -2.363419532775879, | |
| "eval_logps/chosen": -256.04901123046875, | |
| "eval_logps/rejected": -246.68414306640625, | |
| "eval_loss": 0.5841386318206787, | |
| "eval_rewards/accuracies": 0.7708333134651184, | |
| "eval_rewards/chosen": 1.071976661682129, | |
| "eval_rewards/margins": 1.8380416631698608, | |
| "eval_rewards/rejected": -0.7660649418830872, | |
| "eval_runtime": 100.9496, | |
| "eval_samples_per_second": 15.057, | |
| "eval_steps_per_second": 0.238, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.9626168224299065, | |
| "grad_norm": 30.452454043277406, | |
| "learning_rate": 2.9055046501619083e-07, | |
| "logits/chosen": -2.3725104331970215, | |
| "logits/rejected": -2.3521809577941895, | |
| "logps/chosen": -251.14065551757812, | |
| "logps/rejected": -223.05380249023438, | |
| "loss": 0.2356, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 2.067333459854126, | |
| "rewards/margins": 3.433384656906128, | |
| "rewards/rejected": -1.366051435470581, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 2.05607476635514, | |
| "grad_norm": 14.133552406481272, | |
| "learning_rate": 2.439383225725225e-07, | |
| "logits/chosen": -2.374899387359619, | |
| "logits/rejected": -2.3513777256011963, | |
| "logps/chosen": -229.90478515625, | |
| "logps/rejected": -230.5806884765625, | |
| "loss": 0.1857, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/chosen": 1.9912045001983643, | |
| "rewards/margins": 3.692650318145752, | |
| "rewards/rejected": -1.7014458179473877, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.149532710280374, | |
| "grad_norm": 11.316511114391215, | |
| "learning_rate": 2.0016939407046986e-07, | |
| "logits/chosen": -2.380131959915161, | |
| "logits/rejected": -2.3474698066711426, | |
| "logps/chosen": -223.51254272460938, | |
| "logps/rejected": -238.4824676513672, | |
| "loss": 0.1132, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 1.8329362869262695, | |
| "rewards/margins": 3.968013286590576, | |
| "rewards/rejected": -2.135077476501465, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 2.2429906542056073, | |
| "grad_norm": 19.797163444062072, | |
| "learning_rate": 1.5972967346655448e-07, | |
| "logits/chosen": -2.371460437774658, | |
| "logits/rejected": -2.36273193359375, | |
| "logps/chosen": -231.41104125976562, | |
| "logps/rejected": -190.1511993408203, | |
| "loss": 0.1337, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 2.0867326259613037, | |
| "rewards/margins": 3.7511298656463623, | |
| "rewards/rejected": -1.6643966436386108, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.336448598130841, | |
| "grad_norm": 13.870872057083783, | |
| "learning_rate": 1.2306818842696715e-07, | |
| "logits/chosen": -2.391444683074951, | |
| "logits/rejected": -2.373061180114746, | |
| "logps/chosen": -235.8319854736328, | |
| "logps/rejected": -220.6596221923828, | |
| "loss": 0.1358, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 2.6346383094787598, | |
| "rewards/margins": 4.137408256530762, | |
| "rewards/rejected": -1.502769112586975, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 2.4299065420560746, | |
| "grad_norm": 12.23774644882284, | |
| "learning_rate": 9.059201449082043e-08, | |
| "logits/chosen": -2.3761117458343506, | |
| "logits/rejected": -2.363049268722534, | |
| "logps/chosen": -219.3924560546875, | |
| "logps/rejected": -233.48605346679688, | |
| "loss": 0.1281, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/chosen": 2.4915733337402344, | |
| "rewards/margins": 4.301194190979004, | |
| "rewards/rejected": -1.8096210956573486, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.5233644859813085, | |
| "grad_norm": 18.769111889041984, | |
| "learning_rate": 6.266175505426957e-08, | |
| "logits/chosen": -2.3567309379577637, | |
| "logits/rejected": -2.3618130683898926, | |
| "logps/chosen": -228.6204071044922, | |
| "logps/rejected": -231.1812286376953, | |
| "loss": 0.1246, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 2.454102039337158, | |
| "rewards/margins": 4.2184247970581055, | |
| "rewards/rejected": -1.7643229961395264, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 2.616822429906542, | |
| "grad_norm": 16.78135239414831, | |
| "learning_rate": 3.958753736408105e-08, | |
| "logits/chosen": -2.389759063720703, | |
| "logits/rejected": -2.372283458709717, | |
| "logps/chosen": -221.2369842529297, | |
| "logps/rejected": -223.2550811767578, | |
| "loss": 0.1219, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 2.3341567516326904, | |
| "rewards/margins": 4.09852409362793, | |
| "rewards/rejected": -1.7643673419952393, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.710280373831776, | |
| "grad_norm": 15.32254073396557, | |
| "learning_rate": 2.162556897965101e-08, | |
| "logits/chosen": -2.3783352375030518, | |
| "logits/rejected": -2.360616445541382, | |
| "logps/chosen": -234.25350952148438, | |
| "logps/rejected": -253.4725799560547, | |
| "loss": 0.1048, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 2.511007308959961, | |
| "rewards/margins": 4.834166049957275, | |
| "rewards/rejected": -2.3231587409973145, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 2.803738317757009, | |
| "grad_norm": 14.732641015320004, | |
| "learning_rate": 8.975292939244927e-09, | |
| "logits/chosen": -2.3658361434936523, | |
| "logits/rejected": -2.349327564239502, | |
| "logps/chosen": -226.5908660888672, | |
| "logps/rejected": -223.129638671875, | |
| "loss": 0.1203, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 2.179231643676758, | |
| "rewards/margins": 4.109222412109375, | |
| "rewards/rejected": -1.9299901723861694, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.803738317757009, | |
| "eval_logits/chosen": -2.393800735473633, | |
| "eval_logits/rejected": -2.363922357559204, | |
| "eval_logps/chosen": -255.3956756591797, | |
| "eval_logps/rejected": -247.4010467529297, | |
| "eval_loss": 0.5898510813713074, | |
| "eval_rewards/accuracies": 0.7760416865348816, | |
| "eval_rewards/chosen": 1.1373103857040405, | |
| "eval_rewards/margins": 1.9750633239746094, | |
| "eval_rewards/rejected": -0.8377528190612793, | |
| "eval_runtime": 100.7549, | |
| "eval_samples_per_second": 15.086, | |
| "eval_steps_per_second": 0.238, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.897196261682243, | |
| "grad_norm": 12.576875515019212, | |
| "learning_rate": 1.7771732184357901e-09, | |
| "logits/chosen": -2.388183355331421, | |
| "logits/rejected": -2.389042615890503, | |
| "logps/chosen": -246.50845336914062, | |
| "logps/rejected": -217.6068878173828, | |
| "loss": 0.1134, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 2.572183132171631, | |
| "rewards/margins": 4.353964805603027, | |
| "rewards/rejected": -1.781781554222107, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 2.97196261682243, | |
| "step": 159, | |
| "total_flos": 1874604243025920.0, | |
| "train_loss": 0.33037466410570926, | |
| "train_runtime": 5730.1243, | |
| "train_samples_per_second": 7.16, | |
| "train_steps_per_second": 0.028 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 159, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1874604243025920.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |