| { |
| "best_global_step": 100, |
| "best_metric": 0.6752368807792664, |
| "best_model_checkpoint": "models/dpo_fft_LFM2.5-1.2B-Instruct_argilla__distilabel-math-preference-dpo_20260222_210527/checkpoint-100", |
| "epoch": 2.7791304347826085, |
| "eval_steps": 100, |
| "global_step": 400, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.06956521739130435, |
| "grad_norm": 87.0, |
| "learning_rate": 4.090909090909091e-07, |
| "logits/chosen": -1.0601829290390015, |
| "logits/rejected": -1.0425456762313843, |
| "logps/chosen": -332.2013244628906, |
| "logps/rejected": -333.1183776855469, |
| "loss": 0.6823273181915284, |
| "rewards/accuracies": 0.42500001192092896, |
| "rewards/chosen": 0.015406012535095215, |
| "rewards/margins": 0.03173117712140083, |
| "rewards/rejected": -0.01632516458630562, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.1391304347826087, |
| "grad_norm": 98.5, |
| "learning_rate": 8.636363636363636e-07, |
| "logits/chosen": -1.0965769290924072, |
| "logits/rejected": -1.0956510305404663, |
| "logps/chosen": -328.796875, |
| "logps/rejected": -312.0242919921875, |
| "loss": 0.6926839828491211, |
| "rewards/accuracies": 0.48750001192092896, |
| "rewards/chosen": 0.093757264316082, |
| "rewards/margins": 0.013911411166191101, |
| "rewards/rejected": 0.0798458456993103, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.20869565217391303, |
| "grad_norm": 96.5, |
| "learning_rate": 1.318181818181818e-06, |
| "logits/chosen": -1.1252676248550415, |
| "logits/rejected": -1.1598210334777832, |
| "logps/chosen": -326.04327392578125, |
| "logps/rejected": -303.9259033203125, |
| "loss": 0.7117842674255371, |
| "rewards/accuracies": 0.5062500238418579, |
| "rewards/chosen": 0.48031529784202576, |
| "rewards/margins": -0.010448494926095009, |
| "rewards/rejected": 0.49076375365257263, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.2782608695652174, |
| "grad_norm": 109.0, |
| "learning_rate": 1.7727272727272727e-06, |
| "logits/chosen": -1.0572926998138428, |
| "logits/rejected": -1.069678544998169, |
| "logps/chosen": -333.5104064941406, |
| "logps/rejected": -322.76116943359375, |
| "loss": 0.721163272857666, |
| "rewards/accuracies": 0.5062500238418579, |
| "rewards/chosen": 1.2552604675292969, |
| "rewards/margins": 0.020199721679091454, |
| "rewards/rejected": 1.2350608110427856, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.34782608695652173, |
| "grad_norm": 106.0, |
| "learning_rate": 1.99918061692433e-06, |
| "logits/chosen": -1.116310954093933, |
| "logits/rejected": -1.126555323600769, |
| "logps/chosen": -325.90625, |
| "logps/rejected": -320.7261047363281, |
| "loss": 0.7112587451934814, |
| "rewards/accuracies": 0.518750011920929, |
| "rewards/chosen": 0.9580303430557251, |
| "rewards/margins": 0.02043265663087368, |
| "rewards/rejected": 0.9375975728034973, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.41739130434782606, |
| "grad_norm": 134.0, |
| "learning_rate": 1.992633606781968e-06, |
| "logits/chosen": -1.0915653705596924, |
| "logits/rejected": -1.0714164972305298, |
| "logps/chosen": -335.96258544921875, |
| "logps/rejected": -329.37567138671875, |
| "loss": 0.6888086795806885, |
| "rewards/accuracies": 0.5062500238418579, |
| "rewards/chosen": 0.24013535678386688, |
| "rewards/margins": 0.025822216644883156, |
| "rewards/rejected": 0.21431314945220947, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.48695652173913045, |
| "grad_norm": 168.0, |
| "learning_rate": 1.9795824849893477e-06, |
| "logits/chosen": -1.124298334121704, |
| "logits/rejected": -1.1153584718704224, |
| "logps/chosen": -319.74371337890625, |
| "logps/rejected": -317.81964111328125, |
| "loss": 0.7498865127563477, |
| "rewards/accuracies": 0.45625001192092896, |
| "rewards/chosen": 0.3042285442352295, |
| "rewards/margins": -0.07379330694675446, |
| "rewards/rejected": 0.37802186608314514, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.5565217391304348, |
| "grad_norm": 93.5, |
| "learning_rate": 1.960112767443493e-06, |
| "logits/chosen": -1.1165910959243774, |
| "logits/rejected": -1.1083123683929443, |
| "logps/chosen": -314.81610107421875, |
| "logps/rejected": -312.41070556640625, |
| "loss": 0.67913818359375, |
| "rewards/accuracies": 0.581250011920929, |
| "rewards/chosen": 0.3251148760318756, |
| "rewards/margins": 0.07726944983005524, |
| "rewards/rejected": 0.24784541130065918, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.6260869565217392, |
| "grad_norm": 97.5, |
| "learning_rate": 1.9343520271137762e-06, |
| "logits/chosen": -1.0576120615005493, |
| "logits/rejected": -1.0416970252990723, |
| "logps/chosen": -333.35565185546875, |
| "logps/rejected": -329.2746276855469, |
| "loss": 0.6899321556091309, |
| "rewards/accuracies": 0.5562499761581421, |
| "rewards/chosen": 1.0298190116882324, |
| "rewards/margins": 0.062107719480991364, |
| "rewards/rejected": 0.967711329460144, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.6956521739130435, |
| "grad_norm": 106.0, |
| "learning_rate": 1.9024690581354698e-06, |
| "logits/chosen": -1.0332655906677246, |
| "logits/rejected": -1.0259943008422852, |
| "logps/chosen": -327.9278564453125, |
| "logps/rejected": -320.8587951660156, |
| "loss": 0.6782574653625488, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": 1.1065479516983032, |
| "rewards/margins": 0.09923191368579865, |
| "rewards/rejected": 1.007315993309021, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.6956521739130435, |
| "eval_logits/chosen": -1.065671682357788, |
| "eval_logits/rejected": -1.0876761674880981, |
| "eval_logps/chosen": -315.0599670410156, |
| "eval_logps/rejected": -316.6776123046875, |
| "eval_loss": 0.6752368807792664, |
| "eval_rewards/accuracies": 0.5887096524238586, |
| "eval_rewards/chosen": 0.8812527060508728, |
| "eval_rewards/margins": 0.13870203495025635, |
| "eval_rewards/rejected": 0.7425506114959717, |
| "eval_runtime": 11.3291, |
| "eval_samples_per_second": 10.68, |
| "eval_steps_per_second": 2.736, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.7652173913043478, |
| "grad_norm": 84.0, |
| "learning_rate": 1.8646727698065862e-06, |
| "logits/chosen": -1.0779330730438232, |
| "logits/rejected": -1.0829439163208008, |
| "logps/chosen": -314.15679931640625, |
| "logps/rejected": -308.0323181152344, |
| "loss": 0.6908653736114502, |
| "rewards/accuracies": 0.5375000238418579, |
| "rewards/chosen": 0.6120839715003967, |
| "rewards/margins": 0.0896262526512146, |
| "rewards/rejected": 0.5224577188491821, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.8347826086956521, |
| "grad_norm": 101.0, |
| "learning_rate": 1.821210817734972e-06, |
| "logits/chosen": -1.0664002895355225, |
| "logits/rejected": -1.0612239837646484, |
| "logps/chosen": -334.7059020996094, |
| "logps/rejected": -321.7742919921875, |
| "loss": 0.7135319232940673, |
| "rewards/accuracies": 0.5062500238418579, |
| "rewards/chosen": 0.22731296718120575, |
| "rewards/margins": 0.016000976786017418, |
| "rewards/rejected": 0.21131198108196259, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.9043478260869565, |
| "grad_norm": 104.5, |
| "learning_rate": 1.7723679811048902e-06, |
| "logits/chosen": -1.093273401260376, |
| "logits/rejected": -1.0916509628295898, |
| "logps/chosen": -332.2840270996094, |
| "logps/rejected": -320.5323181152344, |
| "loss": 0.6850498199462891, |
| "rewards/accuracies": 0.5249999761581421, |
| "rewards/chosen": 0.8852480053901672, |
| "rewards/margins": 0.07424825429916382, |
| "rewards/rejected": 0.8109996914863586, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.9739130434782609, |
| "grad_norm": 96.0, |
| "learning_rate": 1.7184642966958607e-06, |
| "logits/chosen": -1.065161943435669, |
| "logits/rejected": -1.0810632705688477, |
| "logps/chosen": -336.020751953125, |
| "logps/rejected": -311.6708984375, |
| "loss": 0.7298181056976318, |
| "rewards/accuracies": 0.518750011920929, |
| "rewards/chosen": 0.8573455810546875, |
| "rewards/margins": 0.014898905530571938, |
| "rewards/rejected": 0.842446506023407, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.0417391304347827, |
| "grad_norm": 169.0, |
| "learning_rate": 1.6598529618803698e-06, |
| "logits/chosen": -1.135772705078125, |
| "logits/rejected": -1.1369844675064087, |
| "logps/chosen": -297.4026794433594, |
| "logps/rejected": -306.18194580078125, |
| "loss": 0.5908462524414062, |
| "rewards/accuracies": 0.7435897588729858, |
| "rewards/chosen": 0.7982656359672546, |
| "rewards/margins": 0.3948451280593872, |
| "rewards/rejected": 0.4034205377101898, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.111304347826087, |
| "grad_norm": 88.0, |
| "learning_rate": 1.596918020340805e-06, |
| "logits/chosen": -1.084324836730957, |
| "logits/rejected": -1.083601951599121, |
| "logps/chosen": -326.7928771972656, |
| "logps/rejected": -318.2250671386719, |
| "loss": 0.4479428768157959, |
| "rewards/accuracies": 0.893750011920929, |
| "rewards/chosen": 0.5380933284759521, |
| "rewards/margins": 0.6464223861694336, |
| "rewards/rejected": -0.10832903534173965, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.1808695652173913, |
| "grad_norm": 67.0, |
| "learning_rate": 1.5300718456696778e-06, |
| "logits/chosen": -1.1100142002105713, |
| "logits/rejected": -1.1088769435882568, |
| "logps/chosen": -324.53009033203125, |
| "logps/rejected": -317.4223937988281, |
| "loss": 0.4346441745758057, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": 0.8458078503608704, |
| "rewards/margins": 0.7088065147399902, |
| "rewards/rejected": 0.13700127601623535, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.2504347826086957, |
| "grad_norm": 163.0, |
| "learning_rate": 1.4597524393415336e-06, |
| "logits/chosen": -1.1076725721359253, |
| "logits/rejected": -1.0804173946380615, |
| "logps/chosen": -330.8292541503906, |
| "logps/rejected": -318.1831359863281, |
| "loss": 0.47336974143981936, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": 1.2574979066848755, |
| "rewards/margins": 0.669701874256134, |
| "rewards/rejected": 0.5877960920333862, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.32, |
| "grad_norm": 67.5, |
| "learning_rate": 1.3864205607612647e-06, |
| "logits/chosen": -1.0184274911880493, |
| "logits/rejected": -1.0195646286010742, |
| "logps/chosen": -339.4798583984375, |
| "logps/rejected": -329.2326965332031, |
| "loss": 0.47516441345214844, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 1.4096896648406982, |
| "rewards/margins": 0.6121624112129211, |
| "rewards/rejected": 0.7975271940231323, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.3895652173913042, |
| "grad_norm": 63.25, |
| "learning_rate": 1.3105567081938423e-06, |
| "logits/chosen": -1.0496165752410889, |
| "logits/rejected": -1.0412036180496216, |
| "logps/chosen": -321.82830810546875, |
| "logps/rejected": -301.32464599609375, |
| "loss": 0.47269201278686523, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": 1.2313692569732666, |
| "rewards/margins": 0.6157368421554565, |
| "rewards/rejected": 0.615632176399231, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.3895652173913042, |
| "eval_logits/chosen": -1.0566930770874023, |
| "eval_logits/rejected": -1.07687509059906, |
| "eval_logps/chosen": -314.4008483886719, |
| "eval_logps/rejected": -315.7554931640625, |
| "eval_loss": 0.7017911076545715, |
| "eval_rewards/accuracies": 0.5, |
| "eval_rewards/chosen": 1.0130723714828491, |
| "eval_rewards/margins": 0.08608859032392502, |
| "eval_rewards/rejected": 0.9269838929176331, |
| "eval_runtime": 11.2238, |
| "eval_samples_per_second": 10.781, |
| "eval_steps_per_second": 2.762, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.4591304347826086, |
| "grad_norm": 70.5, |
| "learning_rate": 1.2326579703575462e-06, |
| "logits/chosen": -1.0474871397018433, |
| "logits/rejected": -1.0500593185424805, |
| "logps/chosen": -324.48602294921875, |
| "logps/rejected": -322.1194763183594, |
| "loss": 0.4632419586181641, |
| "rewards/accuracies": 0.8812500238418579, |
| "rewards/chosen": 1.3502912521362305, |
| "rewards/margins": 0.6482532620429993, |
| "rewards/rejected": 0.7020379304885864, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.528695652173913, |
| "grad_norm": 104.5, |
| "learning_rate": 1.1532347693102631e-06, |
| "logits/chosen": -1.0756328105926514, |
| "logits/rejected": -1.0997965335845947, |
| "logps/chosen": -322.89508056640625, |
| "logps/rejected": -318.676025390625, |
| "loss": 0.4693108081817627, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": 0.9996536374092102, |
| "rewards/margins": 0.6115056276321411, |
| "rewards/rejected": 0.38814812898635864, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.5982608695652174, |
| "grad_norm": 68.0, |
| "learning_rate": 1.0728075159706879e-06, |
| "logits/chosen": -1.0858252048492432, |
| "logits/rejected": -1.0972968339920044, |
| "logps/chosen": -333.7071533203125, |
| "logps/rejected": -322.7843322753906, |
| "loss": 0.537294578552246, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 0.4641377329826355, |
| "rewards/margins": 0.43356814980506897, |
| "rewards/rejected": 0.030569633468985558, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.6678260869565218, |
| "grad_norm": 66.0, |
| "learning_rate": 9.919032001887214e-07, |
| "logits/chosen": -1.0909720659255981, |
| "logits/rejected": -1.0826283693313599, |
| "logps/chosen": -332.0724792480469, |
| "logps/rejected": -330.3124694824219, |
| "loss": 0.4591354846954346, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": 0.42356061935424805, |
| "rewards/margins": 0.632544219493866, |
| "rewards/rejected": -0.2089836597442627, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.7373913043478262, |
| "grad_norm": 64.0, |
| "learning_rate": 9.110519377082173e-07, |
| "logits/chosen": -1.1404974460601807, |
| "logits/rejected": -1.1487622261047363, |
| "logps/chosen": -315.04705810546875, |
| "logps/rejected": -306.60357666015625, |
| "loss": 0.47364654541015627, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": 0.552635669708252, |
| "rewards/margins": 0.5962538719177246, |
| "rewards/rejected": -0.043618228286504745, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.8069565217391306, |
| "grad_norm": 96.0, |
| "learning_rate": 8.307834966476883e-07, |
| "logits/chosen": -1.1199188232421875, |
| "logits/rejected": -1.110740303993225, |
| "logps/chosen": -328.4118957519531, |
| "logps/rejected": -333.0620422363281, |
| "loss": 0.4562994956970215, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 0.5946794152259827, |
| "rewards/margins": 0.6635168790817261, |
| "rewards/rejected": -0.0688374936580658, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.8765217391304347, |
| "grad_norm": 97.0, |
| "learning_rate": 7.51623826258785e-07, |
| "logits/chosen": -1.1038875579833984, |
| "logits/rejected": -1.1073077917099, |
| "logps/chosen": -332.9511413574219, |
| "logps/rejected": -317.7924499511719, |
| "loss": 0.48169240951538084, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 0.6237157583236694, |
| "rewards/margins": 0.5657099485397339, |
| "rewards/rejected": 0.05800582095980644, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.9460869565217391, |
| "grad_norm": 76.5, |
| "learning_rate": 6.740916107074371e-07, |
| "logits/chosen": -1.0971988439559937, |
| "logits/rejected": -1.1136589050292969, |
| "logps/chosen": -331.86248779296875, |
| "logps/rejected": -324.9644470214844, |
| "loss": 0.4545170307159424, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": 0.8813239336013794, |
| "rewards/margins": 0.7066205143928528, |
| "rewards/rejected": 0.1747034788131714, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.013913043478261, |
| "grad_norm": 61.75, |
| "learning_rate": 5.986948704585895e-07, |
| "logits/chosen": -1.0902117490768433, |
| "logits/rejected": -1.0950541496276855, |
| "logps/chosen": -323.3013916015625, |
| "logps/rejected": -322.97393798828125, |
| "loss": 0.45575871467590334, |
| "rewards/accuracies": 0.8974359035491943, |
| "rewards/chosen": 0.7955907583236694, |
| "rewards/margins": 0.6638101935386658, |
| "rewards/rejected": 0.13178066909313202, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.0834782608695654, |
| "grad_norm": 56.25, |
| "learning_rate": 5.259276335335521e-07, |
| "logits/chosen": -1.111509084701538, |
| "logits/rejected": -1.138770341873169, |
| "logps/chosen": -332.07989501953125, |
| "logps/rejected": -317.6449279785156, |
| "loss": 0.3894503593444824, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": 0.806149959564209, |
| "rewards/margins": 0.8637164235115051, |
| "rewards/rejected": -0.05756649374961853, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.0834782608695654, |
| "eval_logits/chosen": -1.0786491632461548, |
| "eval_logits/rejected": -1.0995056629180908, |
| "eval_logps/chosen": -317.61407470703125, |
| "eval_logps/rejected": -318.8610534667969, |
| "eval_loss": 0.6876804232597351, |
| "eval_rewards/accuracies": 0.5645161271095276, |
| "eval_rewards/chosen": 0.37044042348861694, |
| "eval_rewards/margins": 0.06457632035017014, |
| "eval_rewards/rejected": 0.305864155292511, |
| "eval_runtime": 11.2588, |
| "eval_samples_per_second": 10.747, |
| "eval_steps_per_second": 2.753, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.1530434782608694, |
| "grad_norm": 59.25, |
| "learning_rate": 4.5626669845114154e-07, |
| "logits/chosen": -1.1131139993667603, |
| "logits/rejected": -1.0847505331039429, |
| "logps/chosen": -317.81744384765625, |
| "logps/rejected": -318.0986633300781, |
| "loss": 0.37284040451049805, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": 0.8261886835098267, |
| "rewards/margins": 0.8763921856880188, |
| "rewards/rejected": -0.05020345374941826, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.222608695652174, |
| "grad_norm": 66.0, |
| "learning_rate": 3.901685100630554e-07, |
| "logits/chosen": -1.0582597255706787, |
| "logits/rejected": -1.0536084175109863, |
| "logps/chosen": -323.5310974121094, |
| "logps/rejected": -329.77264404296875, |
| "loss": 0.40012392997741697, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": 0.7232468128204346, |
| "rewards/margins": 0.8045024871826172, |
| "rewards/rejected": -0.08125568181276321, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.292173913043478, |
| "grad_norm": 59.75, |
| "learning_rate": 3.2806616875418757e-07, |
| "logits/chosen": -1.0983725786209106, |
| "logits/rejected": -1.0878392457962036, |
| "logps/chosen": -315.97442626953125, |
| "logps/rejected": -314.21307373046875, |
| "loss": 0.4109466552734375, |
| "rewards/accuracies": 0.9312499761581421, |
| "rewards/chosen": 0.6401529908180237, |
| "rewards/margins": 0.7892977595329285, |
| "rewards/rejected": -0.14914488792419434, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.3617391304347826, |
| "grad_norm": 56.0, |
| "learning_rate": 2.7036659260473973e-07, |
| "logits/chosen": -1.0930839776992798, |
| "logits/rejected": -1.1155784130096436, |
| "logps/chosen": -338.72357177734375, |
| "logps/rejected": -327.84832763671875, |
| "loss": 0.39486031532287597, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": 0.7087287902832031, |
| "rewards/margins": 0.8493242263793945, |
| "rewards/rejected": -0.1405954360961914, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.431304347826087, |
| "grad_norm": 81.0, |
| "learning_rate": 2.174478511087171e-07, |
| "logits/chosen": -1.0855780839920044, |
| "logits/rejected": -1.074064016342163, |
| "logps/chosen": -330.82952880859375, |
| "logps/rejected": -330.74505615234375, |
| "loss": 0.39856863021850586, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": 0.7598364949226379, |
| "rewards/margins": 0.8086788058280945, |
| "rewards/rejected": -0.04884239286184311, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.5008695652173913, |
| "grad_norm": 68.0, |
| "learning_rate": 1.69656687919296e-07, |
| "logits/chosen": -1.0719494819641113, |
| "logits/rejected": -1.0662832260131836, |
| "logps/chosen": -337.28863525390625, |
| "logps/rejected": -320.1393127441406, |
| "loss": 0.38623409271240233, |
| "rewards/accuracies": 0.9437500238418579, |
| "rewards/chosen": 0.8269112706184387, |
| "rewards/margins": 0.8633332252502441, |
| "rewards/rejected": -0.036422014236450195, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.5704347826086957, |
| "grad_norm": 63.0, |
| "learning_rate": 1.2730624885297537e-07, |
| "logits/chosen": -1.0956530570983887, |
| "logits/rejected": -1.0990194082260132, |
| "logps/chosen": -330.1253356933594, |
| "logps/rejected": -323.8518371582031, |
| "loss": 0.3871379613876343, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.794479489326477, |
| "rewards/margins": 0.8608804941177368, |
| "rewards/rejected": -0.066400907933712, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.64, |
| "grad_norm": 64.0, |
| "learning_rate": 9.067403003948781e-08, |
| "logits/chosen": -1.1272144317626953, |
| "logits/rejected": -1.10252845287323, |
| "logps/chosen": -329.3648681640625, |
| "logps/rejected": -315.2191162109375, |
| "loss": 0.36964147090911864, |
| "rewards/accuracies": 0.9312499761581421, |
| "rewards/chosen": 0.839946448802948, |
| "rewards/margins": 0.9301446080207825, |
| "rewards/rejected": -0.09019814431667328, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.7095652173913045, |
| "grad_norm": 59.25, |
| "learning_rate": 6.000005966197386e-08, |
| "logits/chosen": -1.149733543395996, |
| "logits/rejected": -1.1311017274856567, |
| "logps/chosen": -317.32818603515625, |
| "logps/rejected": -311.5692138671875, |
| "loss": 0.3780661106109619, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": 0.7798963785171509, |
| "rewards/margins": 0.8840686082839966, |
| "rewards/rejected": -0.10417220741510391, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.7791304347826085, |
| "grad_norm": 43.0, |
| "learning_rate": 3.5485325201341284e-08, |
| "logits/chosen": -1.104060411453247, |
| "logits/rejected": -1.1139435768127441, |
| "logps/chosen": -325.05377197265625, |
| "logps/rejected": -323.31304931640625, |
| "loss": 0.40581889152526857, |
| "rewards/accuracies": 0.9437500238418579, |
| "rewards/chosen": 0.7068971991539001, |
| "rewards/margins": 0.8001037836074829, |
| "rewards/rejected": -0.09320656955242157, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.7791304347826085, |
| "eval_logits/chosen": -1.0804522037506104, |
| "eval_logits/rejected": -1.1010961532592773, |
| "eval_logps/chosen": -318.0834655761719, |
| "eval_logps/rejected": -319.351806640625, |
| "eval_loss": 0.6869162917137146, |
| "eval_rewards/accuracies": 0.5403226017951965, |
| "eval_rewards/chosen": 0.2765510380268097, |
| "eval_rewards/margins": 0.06884314864873886, |
| "eval_rewards/rejected": 0.20770789682865143, |
| "eval_runtime": 11.2322, |
| "eval_samples_per_second": 10.773, |
| "eval_steps_per_second": 2.76, |
| "step": 400 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 432, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|