Invalid JSON:
Unexpected token 'N', ..."ad_norm": NaN,
"... is not valid JSON
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 100, | |
| "global_step": 1589, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0031466331025802393, | |
| "grad_norm": 8.804184913635254, | |
| "learning_rate": 1.257861635220126e-06, | |
| "logits/chosen": -0.11013289541006088, | |
| "logits/rejected": -0.5208367109298706, | |
| "logps/chosen": -0.8537980914115906, | |
| "logps/rejected": -1.0550096035003662, | |
| "loss": 24.9985, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": -5.359128408599645e-06, | |
| "rewards/margins": 1.545622944831848e-05, | |
| "rewards/rejected": -2.081535967590753e-05, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0062932662051604785, | |
| "grad_norm": 18.678768157958984, | |
| "learning_rate": 2.2641509433962266e-06, | |
| "logits/chosen": -0.3030635714530945, | |
| "logits/rejected": -0.5435053706169128, | |
| "logps/chosen": -0.9865642786026001, | |
| "logps/rejected": -1.107262372970581, | |
| "loss": 24.9967, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.00010868474782910198, | |
| "rewards/margins": 3.348257814650424e-05, | |
| "rewards/rejected": -0.00014216733688954264, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.009439899307740718, | |
| "grad_norm": 11.281435012817383, | |
| "learning_rate": 3.5220125786163524e-06, | |
| "logits/chosen": -0.5111545324325562, | |
| "logits/rejected": -0.8536307215690613, | |
| "logps/chosen": -1.0305876731872559, | |
| "logps/rejected": -1.2494089603424072, | |
| "loss": 24.9847, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.00031705142464488745, | |
| "rewards/margins": 0.000152838954818435, | |
| "rewards/rejected": -0.00046989036491140723, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.012586532410320957, | |
| "grad_norm": 59.5455322265625, | |
| "learning_rate": 4.528301886792453e-06, | |
| "logits/chosen": -0.616014838218689, | |
| "logits/rejected": -0.6851056218147278, | |
| "logps/chosen": -1.130916953086853, | |
| "logps/rejected": -1.46986985206604, | |
| "loss": 24.9645, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.0012707245768979192, | |
| "rewards/margins": 0.0003901523014064878, | |
| "rewards/rejected": -0.0016608769074082375, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.015733165512901194, | |
| "grad_norm": 9.532500267028809, | |
| "learning_rate": 5.786163522012579e-06, | |
| "logits/chosen": -0.12423186004161835, | |
| "logits/rejected": -0.4599896967411041, | |
| "logps/chosen": -0.8485546112060547, | |
| "logps/rejected": -1.0018525123596191, | |
| "loss": 24.9267, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.001064571551978588, | |
| "rewards/margins": 0.0007417487213388085, | |
| "rewards/rejected": -0.0018063202733173966, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.018879798615481436, | |
| "grad_norm": 11.064372062683105, | |
| "learning_rate": 7.044025157232705e-06, | |
| "logits/chosen": -0.1580429971218109, | |
| "logits/rejected": -0.38266992568969727, | |
| "logps/chosen": -0.8662201166152954, | |
| "logps/rejected": -1.0262982845306396, | |
| "loss": 24.8872, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.0021042851731181145, | |
| "rewards/margins": 0.0011668736115098, | |
| "rewards/rejected": -0.0032711587846279144, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.022026431718061675, | |
| "grad_norm": 37.646690368652344, | |
| "learning_rate": 8.301886792452832e-06, | |
| "logits/chosen": 0.026471847668290138, | |
| "logits/rejected": -0.49966034293174744, | |
| "logps/chosen": -0.8883110880851746, | |
| "logps/rejected": -1.199055790901184, | |
| "loss": 24.6732, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.003758195089176297, | |
| "rewards/margins": 0.0034271504264324903, | |
| "rewards/rejected": -0.0071853450499475, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.025173064820641914, | |
| "grad_norm": 34.88091278076172, | |
| "learning_rate": 9.559748427672956e-06, | |
| "logits/chosen": -0.36181551218032837, | |
| "logits/rejected": -0.6659843325614929, | |
| "logps/chosen": -0.9565097689628601, | |
| "logps/rejected": -1.183980941772461, | |
| "loss": 24.6032, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.007488996721804142, | |
| "rewards/margins": 0.004166100639849901, | |
| "rewards/rejected": -0.011655096895992756, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.028319697923222153, | |
| "grad_norm": 20.635541915893555, | |
| "learning_rate": 1.0817610062893083e-05, | |
| "logits/chosen": -0.5469181537628174, | |
| "logits/rejected": -0.7580572366714478, | |
| "logps/chosen": -1.0930149555206299, | |
| "logps/rejected": -1.2114075422286987, | |
| "loss": 24.7536, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.0149534298107028, | |
| "rewards/margins": 0.002928597154095769, | |
| "rewards/rejected": -0.017882030457258224, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.03146633102580239, | |
| "grad_norm": 36.45863723754883, | |
| "learning_rate": 1.2075471698113209e-05, | |
| "logits/chosen": -0.5085287094116211, | |
| "logits/rejected": -0.7208930253982544, | |
| "logps/chosen": -1.083438515663147, | |
| "logps/rejected": -1.3884985446929932, | |
| "loss": 23.9964, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.018525470048189163, | |
| "rewards/margins": 0.011683688499033451, | |
| "rewards/rejected": -0.03020915761590004, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.034612964128382634, | |
| "grad_norm": 45.08958053588867, | |
| "learning_rate": 1.3081761006289308e-05, | |
| "logits/chosen": -0.6235328912734985, | |
| "logits/rejected": -0.8463523983955383, | |
| "logps/chosen": -1.1567853689193726, | |
| "logps/rejected": -2.149567127227783, | |
| "loss": 23.8291, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.028398022055625916, | |
| "rewards/margins": 0.03046615794301033, | |
| "rewards/rejected": -0.05886417627334595, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.03775959723096287, | |
| "grad_norm": 48.65830612182617, | |
| "learning_rate": 1.408805031446541e-05, | |
| "logits/chosen": -0.6318017244338989, | |
| "logits/rejected": -0.9939996600151062, | |
| "logps/chosen": -1.7119739055633545, | |
| "logps/rejected": -2.3402199745178223, | |
| "loss": 23.5592, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.060618169605731964, | |
| "rewards/margins": 0.028203105553984642, | |
| "rewards/rejected": -0.08882127702236176, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.04090623033354311, | |
| "grad_norm": 138.4451904296875, | |
| "learning_rate": 1.5345911949685536e-05, | |
| "logits/chosen": -0.9717090725898743, | |
| "logits/rejected": -1.1959871053695679, | |
| "logps/chosen": -1.9082473516464233, | |
| "logps/rejected": -2.449486494064331, | |
| "loss": 22.7524, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.08517814427614212, | |
| "rewards/margins": 0.03464372828602791, | |
| "rewards/rejected": -0.11982186883687973, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.04405286343612335, | |
| "grad_norm": 49.257568359375, | |
| "learning_rate": 1.6603773584905664e-05, | |
| "logits/chosen": -0.7433441281318665, | |
| "logits/rejected": -1.0399134159088135, | |
| "logps/chosen": -2.255545139312744, | |
| "logps/rejected": -2.98321533203125, | |
| "loss": 23.4113, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.11126575618982315, | |
| "rewards/margins": 0.04789165034890175, | |
| "rewards/rejected": -0.159157395362854, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.04719949653870359, | |
| "grad_norm": 56.168670654296875, | |
| "learning_rate": 1.7861635220125788e-05, | |
| "logits/chosen": -1.0234445333480835, | |
| "logits/rejected": -1.288999080657959, | |
| "logps/chosen": -1.653058648109436, | |
| "logps/rejected": -2.370941162109375, | |
| "loss": 22.181, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.08025868237018585, | |
| "rewards/margins": 0.044546954333782196, | |
| "rewards/rejected": -0.12480561435222626, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.05034612964128383, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.8867924528301888e-05, | |
| "logits/chosen": -1.1835613250732422, | |
| "logits/rejected": -1.4036767482757568, | |
| "logps/chosen": -1.90883469581604, | |
| "logps/rejected": -2.1450889110565186, | |
| "loss": 25.8869, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.09280741214752197, | |
| "rewards/margins": 0.021463319659233093, | |
| "rewards/rejected": -0.11427073180675507, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.05349276274386407, | |
| "grad_norm": 103.32862091064453, | |
| "learning_rate": 2.0125786163522016e-05, | |
| "logits/chosen": -1.539898157119751, | |
| "logits/rejected": -1.6518356800079346, | |
| "logps/chosen": -2.0776684284210205, | |
| "logps/rejected": -2.5599067211151123, | |
| "loss": 24.1212, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.12567706406116486, | |
| "rewards/margins": 0.026400262489914894, | |
| "rewards/rejected": -0.152077317237854, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.056639395846444306, | |
| "grad_norm": 176.14784240722656, | |
| "learning_rate": 2.138364779874214e-05, | |
| "logits/chosen": -1.3818124532699585, | |
| "logits/rejected": -1.5843524932861328, | |
| "logps/chosen": -2.48514461517334, | |
| "logps/rejected": -2.8115108013153076, | |
| "loss": 26.3518, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.15766306221485138, | |
| "rewards/margins": 0.025023411959409714, | |
| "rewards/rejected": -0.1826864778995514, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.059786028949024544, | |
| "grad_norm": 106.15309143066406, | |
| "learning_rate": 2.2641509433962265e-05, | |
| "logits/chosen": -1.5876004695892334, | |
| "logits/rejected": -1.7525005340576172, | |
| "logps/chosen": -2.2529654502868652, | |
| "logps/rejected": -3.2197937965393066, | |
| "loss": 20.8074, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.14453086256980896, | |
| "rewards/margins": 0.07421146333217621, | |
| "rewards/rejected": -0.21874232590198517, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.06293266205160478, | |
| "grad_norm": 91.7872085571289, | |
| "learning_rate": 2.3899371069182393e-05, | |
| "logits/chosen": -1.6880552768707275, | |
| "logits/rejected": -1.667741060256958, | |
| "logps/chosen": -3.5453040599823, | |
| "logps/rejected": -3.8808798789978027, | |
| "loss": 24.6555, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.2320992648601532, | |
| "rewards/margins": 0.020265836268663406, | |
| "rewards/rejected": -0.2523651123046875, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06607929515418502, | |
| "grad_norm": 778.8959350585938, | |
| "learning_rate": 2.4905660377358492e-05, | |
| "logits/chosen": -1.8318984508514404, | |
| "logits/rejected": -1.8932411670684814, | |
| "logps/chosen": -3.125164031982422, | |
| "logps/rejected": -4.746774673461914, | |
| "loss": 27.3293, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.19288301467895508, | |
| "rewards/margins": 0.096275694668293, | |
| "rewards/rejected": -0.28915873169898987, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.06922592825676527, | |
| "grad_norm": 132.40354919433594, | |
| "learning_rate": 2.6163522012578617e-05, | |
| "logits/chosen": -1.7445348501205444, | |
| "logits/rejected": -1.902320146560669, | |
| "logps/chosen": -1.9325546026229858, | |
| "logps/rejected": -3.3019511699676514, | |
| "loss": 21.7317, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.10555150359869003, | |
| "rewards/margins": 0.07426220178604126, | |
| "rewards/rejected": -0.1798136979341507, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0723725613593455, | |
| "grad_norm": 98.48819732666016, | |
| "learning_rate": 2.742138364779874e-05, | |
| "logits/chosen": -1.7994951009750366, | |
| "logits/rejected": -1.9057296514511108, | |
| "logps/chosen": -2.1663613319396973, | |
| "logps/rejected": -2.82452392578125, | |
| "loss": 22.7429, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.1297454833984375, | |
| "rewards/margins": 0.04077299311757088, | |
| "rewards/rejected": -0.17051845788955688, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.07551919446192575, | |
| "grad_norm": 93.13198852539062, | |
| "learning_rate": 2.867924528301887e-05, | |
| "logits/chosen": -1.6606374979019165, | |
| "logits/rejected": -1.7864787578582764, | |
| "logps/chosen": -2.2936453819274902, | |
| "logps/rejected": -2.5704400539398193, | |
| "loss": 24.0989, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.12692785263061523, | |
| "rewards/margins": 0.020071204751729965, | |
| "rewards/rejected": -0.1469990611076355, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.07866582756450598, | |
| "grad_norm": 101.10535430908203, | |
| "learning_rate": 2.968553459119497e-05, | |
| "logits/chosen": -1.648816704750061, | |
| "logits/rejected": -1.6658546924591064, | |
| "logps/chosen": -2.0479884147644043, | |
| "logps/rejected": -2.8278560638427734, | |
| "loss": 27.9983, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.11854568868875504, | |
| "rewards/margins": 0.0439017117023468, | |
| "rewards/rejected": -0.16244739294052124, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.08181246066708622, | |
| "grad_norm": 92.93740844726562, | |
| "learning_rate": 3.09433962264151e-05, | |
| "logits/chosen": -1.7306410074234009, | |
| "logits/rejected": -1.8349339962005615, | |
| "logps/chosen": -2.082920551300049, | |
| "logps/rejected": -3.115952253341675, | |
| "loss": 23.5299, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.11979808658361435, | |
| "rewards/margins": 0.06730449199676514, | |
| "rewards/rejected": -0.18710258603096008, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.08495909376966645, | |
| "grad_norm": 123.31324768066406, | |
| "learning_rate": 3.220125786163522e-05, | |
| "logits/chosen": -1.8235572576522827, | |
| "logits/rejected": -1.8541405200958252, | |
| "logps/chosen": -1.9667946100234985, | |
| "logps/rejected": -2.772089958190918, | |
| "loss": 22.6137, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.10746946185827255, | |
| "rewards/margins": 0.04866869002580643, | |
| "rewards/rejected": -0.15613815188407898, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.0881057268722467, | |
| "grad_norm": 126.42218017578125, | |
| "learning_rate": 3.345911949685535e-05, | |
| "logits/chosen": -1.674515962600708, | |
| "logits/rejected": -1.8894662857055664, | |
| "logps/chosen": -2.245245933532715, | |
| "logps/rejected": -3.0301966667175293, | |
| "loss": 22.6984, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.11901465803384781, | |
| "rewards/margins": 0.049767203629016876, | |
| "rewards/rejected": -0.16878187656402588, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.09125235997482693, | |
| "grad_norm": 114.61023712158203, | |
| "learning_rate": 3.471698113207548e-05, | |
| "logits/chosen": -1.7905619144439697, | |
| "logits/rejected": -1.8821656703948975, | |
| "logps/chosen": -3.373708724975586, | |
| "logps/rejected": -4.691153526306152, | |
| "loss": 22.2353, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.18414758145809174, | |
| "rewards/margins": 0.0776277631521225, | |
| "rewards/rejected": -0.26177531480789185, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.09439899307740718, | |
| "grad_norm": 296.22955322265625, | |
| "learning_rate": 3.59748427672956e-05, | |
| "logits/chosen": -1.654166579246521, | |
| "logits/rejected": -1.847495436668396, | |
| "logps/chosen": -3.2497410774230957, | |
| "logps/rejected": -4.303386688232422, | |
| "loss": 20.9992, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.22493870556354523, | |
| "rewards/margins": 0.07029641419649124, | |
| "rewards/rejected": -0.2952350974082947, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.09754562617998741, | |
| "grad_norm": 579.211669921875, | |
| "learning_rate": 3.7232704402515726e-05, | |
| "logits/chosen": -1.6689754724502563, | |
| "logits/rejected": -1.7173473834991455, | |
| "logps/chosen": -3.7695910930633545, | |
| "logps/rejected": -4.783900260925293, | |
| "loss": 25.2195, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.2695242762565613, | |
| "rewards/margins": 0.05760473012924194, | |
| "rewards/rejected": -0.3271290063858032, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.10069225928256766, | |
| "grad_norm": 200.13609313964844, | |
| "learning_rate": 3.8490566037735854e-05, | |
| "logits/chosen": -1.7428325414657593, | |
| "logits/rejected": -1.74752938747406, | |
| "logps/chosen": -3.6156649589538574, | |
| "logps/rejected": -4.805546760559082, | |
| "loss": 22.7118, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.2704419791698456, | |
| "rewards/margins": 0.06444540619850159, | |
| "rewards/rejected": -0.33488741517066956, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.10383889238514789, | |
| "grad_norm": 172.51622009277344, | |
| "learning_rate": 3.9748427672955975e-05, | |
| "logits/chosen": -1.7474384307861328, | |
| "logits/rejected": -1.7428706884384155, | |
| "logps/chosen": -3.276729106903076, | |
| "logps/rejected": -4.082120418548584, | |
| "loss": 22.3077, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.2311684638261795, | |
| "rewards/margins": 0.051828593015670776, | |
| "rewards/rejected": -0.2829970717430115, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.10698552548772813, | |
| "grad_norm": 146.08352661132812, | |
| "learning_rate": 3.9999227773220194e-05, | |
| "logits/chosen": -1.6052366495132446, | |
| "logits/rejected": -1.6235520839691162, | |
| "logps/chosen": -3.030139207839966, | |
| "logps/rejected": -4.707204818725586, | |
| "loss": 20.0014, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.21569938957691193, | |
| "rewards/margins": 0.12310032546520233, | |
| "rewards/rejected": -0.33879974484443665, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.11013215859030837, | |
| "grad_norm": 133.93203735351562, | |
| "learning_rate": 3.9996090704130684e-05, | |
| "logits/chosen": -1.7021839618682861, | |
| "logits/rejected": -1.7295335531234741, | |
| "logps/chosen": -3.9147982597351074, | |
| "logps/rejected": -5.332208633422852, | |
| "loss": 20.047, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.3039882779121399, | |
| "rewards/margins": 0.1180083155632019, | |
| "rewards/rejected": -0.4219965934753418, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.11327879169288861, | |
| "grad_norm": 558.7332763671875, | |
| "learning_rate": 3.999054090678532e-05, | |
| "logits/chosen": -1.5368597507476807, | |
| "logits/rejected": -1.592637300491333, | |
| "logps/chosen": -6.026860237121582, | |
| "logps/rejected": -6.550711631774902, | |
| "loss": 29.6933, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.4695097804069519, | |
| "rewards/margins": 0.02213056944310665, | |
| "rewards/rejected": -0.4916403889656067, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.11642542479546884, | |
| "grad_norm": 212.05760192871094, | |
| "learning_rate": 3.9982579050822615e-05, | |
| "logits/chosen": -1.5933212041854858, | |
| "logits/rejected": -1.5753694772720337, | |
| "logps/chosen": -4.716382026672363, | |
| "logps/rejected": -5.257371425628662, | |
| "loss": 27.5815, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.3512773811817169, | |
| "rewards/margins": 0.033867720514535904, | |
| "rewards/rejected": -0.3851450979709625, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.11957205789804909, | |
| "grad_norm": 134.0122833251953, | |
| "learning_rate": 3.997220609692011e-05, | |
| "logits/chosen": -1.6495559215545654, | |
| "logits/rejected": -1.6725133657455444, | |
| "logps/chosen": -3.984989643096924, | |
| "logps/rejected": -5.001562595367432, | |
| "loss": 22.6766, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.2919956147670746, | |
| "rewards/margins": 0.05422482639551163, | |
| "rewards/rejected": -0.3462204337120056, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.12271869100062932, | |
| "grad_norm": 151.4617919921875, | |
| "learning_rate": 3.9959423296678384e-05, | |
| "logits/chosen": -1.7128961086273193, | |
| "logits/rejected": -1.6318174600601196, | |
| "logps/chosen": -3.3435721397399902, | |
| "logps/rejected": -4.078289985656738, | |
| "loss": 25.0994, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.23941469192504883, | |
| "rewards/margins": 0.04007013887166977, | |
| "rewards/rejected": -0.2794848084449768, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.12586532410320955, | |
| "grad_norm": 115.02042388916016, | |
| "learning_rate": 3.9944232192470094e-05, | |
| "logits/chosen": -1.7137172222137451, | |
| "logits/rejected": -1.7910420894622803, | |
| "logps/chosen": -3.106358051300049, | |
| "logps/rejected": -3.97111439704895, | |
| "loss": 21.6293, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.2061949521303177, | |
| "rewards/margins": 0.04795767739415169, | |
| "rewards/rejected": -0.2541525959968567, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1290119572057898, | |
| "grad_norm": 81.87369537353516, | |
| "learning_rate": 3.992663461725383e-05, | |
| "logits/chosen": -1.5431886911392212, | |
| "logits/rejected": -1.557018518447876, | |
| "logps/chosen": -2.805392026901245, | |
| "logps/rejected": -4.356006622314453, | |
| "loss": 21.8817, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.1901206076145172, | |
| "rewards/margins": 0.0818587988615036, | |
| "rewards/rejected": -0.2719793915748596, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.13215859030837004, | |
| "grad_norm": 188.29173278808594, | |
| "learning_rate": 3.990663269435298e-05, | |
| "logits/chosen": -1.6920125484466553, | |
| "logits/rejected": -1.6854931116104126, | |
| "logps/chosen": -3.156735897064209, | |
| "logps/rejected": -4.396471977233887, | |
| "loss": 27.5638, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.20159177482128143, | |
| "rewards/margins": 0.07304342836141586, | |
| "rewards/rejected": -0.2746351957321167, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.13530522341095028, | |
| "grad_norm": 141.69142150878906, | |
| "learning_rate": 3.98842288371995e-05, | |
| "logits/chosen": -1.6487762928009033, | |
| "logits/rejected": -1.7372210025787354, | |
| "logps/chosen": -2.6156325340270996, | |
| "logps/rejected": -3.677928924560547, | |
| "loss": 21.5613, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.1552310734987259, | |
| "rewards/margins": 0.0671745091676712, | |
| "rewards/rejected": -0.2224055826663971, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.13845185651353054, | |
| "grad_norm": 92.31954193115234, | |
| "learning_rate": 3.985942574904275e-05, | |
| "logits/chosen": -1.677199363708496, | |
| "logits/rejected": -1.6414434909820557, | |
| "logps/chosen": -2.499932050704956, | |
| "logps/rejected": -3.3010895252227783, | |
| "loss": 22.2151, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.16245323419570923, | |
| "rewards/margins": 0.05558500811457634, | |
| "rewards/rejected": -0.21803824603557587, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.14159848961611077, | |
| "grad_norm": 106.32303619384766, | |
| "learning_rate": 3.983222642262329e-05, | |
| "logits/chosen": -1.6422779560089111, | |
| "logits/rejected": -1.6500838994979858, | |
| "logps/chosen": -2.66230845451355, | |
| "logps/rejected": -3.7150185108184814, | |
| "loss": 20.2102, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.18372972309589386, | |
| "rewards/margins": 0.08325181156396866, | |
| "rewards/rejected": -0.2669815421104431, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.144745122718691, | |
| "grad_norm": 113.5155029296875, | |
| "learning_rate": 3.980263413981178e-05, | |
| "logits/chosen": -1.5669496059417725, | |
| "logits/rejected": -1.5747731924057007, | |
| "logps/chosen": -3.1706671714782715, | |
| "logps/rejected": -3.948005199432373, | |
| "loss": 21.8852, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.2336561679840088, | |
| "rewards/margins": 0.06708776950836182, | |
| "rewards/rejected": -0.300743967294693, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.14789175582127123, | |
| "grad_norm": 99.03396606445312, | |
| "learning_rate": 3.977065247121298e-05, | |
| "logits/chosen": -1.639129400253296, | |
| "logits/rejected": -1.6693006753921509, | |
| "logps/chosen": -3.2495856285095215, | |
| "logps/rejected": -4.634251594543457, | |
| "loss": 22.4292, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.23980684578418732, | |
| "rewards/margins": 0.10619230568408966, | |
| "rewards/rejected": -0.345999151468277, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.1510383889238515, | |
| "grad_norm": 254.25714111328125, | |
| "learning_rate": 3.973628527573495e-05, | |
| "logits/chosen": -1.4451357126235962, | |
| "logits/rejected": -1.415290355682373, | |
| "logps/chosen": -4.496035575866699, | |
| "logps/rejected": -5.4740800857543945, | |
| "loss": 24.0697, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.3446175158023834, | |
| "rewards/margins": 0.07305804640054703, | |
| "rewards/rejected": -0.41767558455467224, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.15418502202643172, | |
| "grad_norm": 98.8416976928711, | |
| "learning_rate": 3.969953670012342e-05, | |
| "logits/chosen": -1.6127903461456299, | |
| "logits/rejected": -1.529802918434143, | |
| "logps/chosen": -3.744677782058716, | |
| "logps/rejected": -5.76874303817749, | |
| "loss": 20.2498, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.2814852297306061, | |
| "rewards/margins": 0.12259259074926376, | |
| "rewards/rejected": -0.40407782793045044, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.15733165512901195, | |
| "grad_norm": 174.65185546875, | |
| "learning_rate": 3.9660411178461427e-05, | |
| "logits/chosen": -1.6170070171356201, | |
| "logits/rejected": -1.5994997024536133, | |
| "logps/chosen": -3.390500545501709, | |
| "logps/rejected": -4.377715587615967, | |
| "loss": 22.3596, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.2286950647830963, | |
| "rewards/margins": 0.07162559777498245, | |
| "rewards/rejected": -0.30032065510749817, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.1604782882315922, | |
| "grad_norm": 98.30694580078125, | |
| "learning_rate": 3.9618913431634326e-05, | |
| "logits/chosen": -1.5248662233352661, | |
| "logits/rejected": -1.570233702659607, | |
| "logps/chosen": -2.914156436920166, | |
| "logps/rejected": -3.4877963066101074, | |
| "loss": 21.8392, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.17873355746269226, | |
| "rewards/margins": 0.04700728505849838, | |
| "rewards/rejected": -0.22574086487293243, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.16362492133417245, | |
| "grad_norm": 108.39894104003906, | |
| "learning_rate": 3.957504846676015e-05, | |
| "logits/chosen": -1.5246005058288574, | |
| "logits/rejected": -1.6037238836288452, | |
| "logps/chosen": -3.113523006439209, | |
| "logps/rejected": -4.024534702301025, | |
| "loss": 21.9178, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.21206626296043396, | |
| "rewards/margins": 0.06214412301778793, | |
| "rewards/rejected": -0.2742103934288025, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.16677155443675268, | |
| "grad_norm": 122.53215026855469, | |
| "learning_rate": 3.952882157658545e-05, | |
| "logits/chosen": -1.4534975290298462, | |
| "logits/rejected": -1.4294064044952393, | |
| "logps/chosen": -3.44130277633667, | |
| "logps/rejected": -3.7570698261260986, | |
| "loss": 25.6929, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.2504199147224426, | |
| "rewards/margins": 0.021822316572070122, | |
| "rewards/rejected": -0.2722422182559967, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.1699181875393329, | |
| "grad_norm": 117.72502899169922, | |
| "learning_rate": 3.948023833884667e-05, | |
| "logits/chosen": -1.596609354019165, | |
| "logits/rejected": -1.6202917098999023, | |
| "logps/chosen": -3.7515816688537598, | |
| "logps/rejected": -3.9420647621154785, | |
| "loss": 25.1709, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.23414090275764465, | |
| "rewards/margins": 0.027978042140603065, | |
| "rewards/rejected": -0.26211896538734436, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.17306482064191314, | |
| "grad_norm": 84.38936614990234, | |
| "learning_rate": 3.942930461559718e-05, | |
| "logits/chosen": -1.5714600086212158, | |
| "logits/rejected": -1.683579683303833, | |
| "logps/chosen": -3.3148865699768066, | |
| "logps/rejected": -3.7648849487304688, | |
| "loss": 24.1859, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.2219853699207306, | |
| "rewards/margins": 0.02847103402018547, | |
| "rewards/rejected": -0.25045639276504517, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.1762114537444934, | |
| "grad_norm": 122.990478515625, | |
| "learning_rate": 3.9376026552499894e-05, | |
| "logits/chosen": -1.5986852645874023, | |
| "logits/rejected": -1.6811764240264893, | |
| "logps/chosen": -3.261617660522461, | |
| "logps/rejected": -4.3173418045043945, | |
| "loss": 19.8872, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.22893181443214417, | |
| "rewards/margins": 0.0762997642159462, | |
| "rewards/rejected": -0.30523157119750977, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.17935808684707363, | |
| "grad_norm": 128.1126251220703, | |
| "learning_rate": 3.9320410578085774e-05, | |
| "logits/chosen": -1.5240422487258911, | |
| "logits/rejected": -1.5410079956054688, | |
| "logps/chosen": -3.7498767375946045, | |
| "logps/rejected": -4.466190338134766, | |
| "loss": 22.8035, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.2702712118625641, | |
| "rewards/margins": 0.0467303991317749, | |
| "rewards/rejected": -0.3170016407966614, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.18250471994965387, | |
| "grad_norm": 160.00189208984375, | |
| "learning_rate": 3.9262463402978165e-05, | |
| "logits/chosen": -1.413119912147522, | |
| "logits/rejected": -1.3633155822753906, | |
| "logps/chosen": -3.8721237182617188, | |
| "logps/rejected": -5.0125298500061035, | |
| "loss": 22.2056, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.3019005358219147, | |
| "rewards/margins": 0.08287017047405243, | |
| "rewards/rejected": -0.3847707211971283, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.1856513530522341, | |
| "grad_norm": 168.05908203125, | |
| "learning_rate": 3.920219201908306e-05, | |
| "logits/chosen": -1.2270746231079102, | |
| "logits/rejected": -1.2809008359909058, | |
| "logps/chosen": -4.052460670471191, | |
| "logps/rejected": -5.228961944580078, | |
| "loss": 21.1427, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.3124389052391052, | |
| "rewards/margins": 0.0833948403596878, | |
| "rewards/rejected": -0.3958337903022766, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.18879798615481436, | |
| "grad_norm": 94.47506713867188, | |
| "learning_rate": 3.9139603698745514e-05, | |
| "logits/chosen": -1.1681110858917236, | |
| "logits/rejected": -1.2372829914093018, | |
| "logps/chosen": -3.511944532394409, | |
| "logps/rejected": -4.100220680236816, | |
| "loss": 22.7025, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.24873590469360352, | |
| "rewards/margins": 0.03639525547623634, | |
| "rewards/rejected": -0.28513115644454956, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.1919446192573946, | |
| "grad_norm": 560.2835083007812, | |
| "learning_rate": 3.907470599387209e-05, | |
| "logits/chosen": -1.101466178894043, | |
| "logits/rejected": -1.0982881784439087, | |
| "logps/chosen": -3.0287392139434814, | |
| "logps/rejected": -3.3412985801696777, | |
| "loss": 25.2732, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.210398867726326, | |
| "rewards/margins": 0.023965148255228996, | |
| "rewards/rejected": -0.23436403274536133, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.19509125235997482, | |
| "grad_norm": 190.03529357910156, | |
| "learning_rate": 3.900750673501971e-05, | |
| "logits/chosen": -0.8078586459159851, | |
| "logits/rejected": -1.0514795780181885, | |
| "logps/chosen": -2.391371250152588, | |
| "logps/rejected": -3.401437282562256, | |
| "loss": 21.6721, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.14814777672290802, | |
| "rewards/margins": 0.07396493852138519, | |
| "rewards/rejected": -0.22211270034313202, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.19823788546255505, | |
| "grad_norm": 127.30278778076172, | |
| "learning_rate": 3.893801403045078e-05, | |
| "logits/chosen": -0.9948938488960266, | |
| "logits/rejected": -1.1343729496002197, | |
| "logps/chosen": -2.520848274230957, | |
| "logps/rejected": -3.695737838745117, | |
| "loss": 21.1395, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.16981211304664612, | |
| "rewards/margins": 0.08695949614048004, | |
| "rewards/rejected": -0.25677159428596497, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.2013845185651353, | |
| "grad_norm": 164.39279174804688, | |
| "learning_rate": 3.8866236265154864e-05, | |
| "logits/chosen": -1.059020757675171, | |
| "logits/rejected": -1.1909369230270386, | |
| "logps/chosen": -3.2958297729492188, | |
| "logps/rejected": -4.60178279876709, | |
| "loss": 23.1263, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.24349625408649445, | |
| "rewards/margins": 0.08941353857517242, | |
| "rewards/rejected": -0.33290979266166687, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.20453115166771554, | |
| "grad_norm": 317.5319519042969, | |
| "learning_rate": 3.8792182099836956e-05, | |
| "logits/chosen": -1.1690977811813354, | |
| "logits/rejected": -1.221868872642517, | |
| "logps/chosen": -3.4916579723358154, | |
| "logps/rejected": -4.967286109924316, | |
| "loss": 19.5685, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.2618243992328644, | |
| "rewards/margins": 0.09256020933389664, | |
| "rewards/rejected": -0.35438457131385803, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.20767778477029578, | |
| "grad_norm": 113.65757751464844, | |
| "learning_rate": 3.8715860469872456e-05, | |
| "logits/chosen": -1.230567216873169, | |
| "logits/rejected": -1.2354533672332764, | |
| "logps/chosen": -4.1219682693481445, | |
| "logps/rejected": -5.140664577484131, | |
| "loss": 24.1262, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.3101332485675812, | |
| "rewards/margins": 0.07826542854309082, | |
| "rewards/rejected": -0.3883987069129944, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.21082441787287604, | |
| "grad_norm": 103.66908264160156, | |
| "learning_rate": 3.863728058422905e-05, | |
| "logits/chosen": -1.1679656505584717, | |
| "logits/rejected": -1.2492824792861938, | |
| "logps/chosen": -4.176590442657471, | |
| "logps/rejected": -5.121442794799805, | |
| "loss": 21.9799, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.3107621371746063, | |
| "rewards/margins": 0.07555123418569565, | |
| "rewards/rejected": -0.38631340861320496, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.21397105097545627, | |
| "grad_norm": 187.34596252441406, | |
| "learning_rate": 3.855645192435555e-05, | |
| "logits/chosen": -1.4208840131759644, | |
| "logits/rejected": -1.357755422592163, | |
| "logps/chosen": -3.746802568435669, | |
| "logps/rejected": -4.651678562164307, | |
| "loss": 21.8739, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.2814106345176697, | |
| "rewards/margins": 0.06742358207702637, | |
| "rewards/rejected": -0.34883421659469604, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.2171176840780365, | |
| "grad_norm": 128.47970581054688, | |
| "learning_rate": 3.847338424303787e-05, | |
| "logits/chosen": -1.403939962387085, | |
| "logits/rejected": -1.3926942348480225, | |
| "logps/chosen": -3.540362596511841, | |
| "logps/rejected": -4.463648796081543, | |
| "loss": 22.9837, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.2591942250728607, | |
| "rewards/margins": 0.06667342782020569, | |
| "rewards/rejected": -0.3258676528930664, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.22026431718061673, | |
| "grad_norm": 91.00343322753906, | |
| "learning_rate": 3.838808756322222e-05, | |
| "logits/chosen": -1.4555909633636475, | |
| "logits/rejected": -1.4179480075836182, | |
| "logps/chosen": -3.3319029808044434, | |
| "logps/rejected": -4.7188615798950195, | |
| "loss": 22.182, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.24019880592823029, | |
| "rewards/margins": 0.09150617569684982, | |
| "rewards/rejected": -0.3317049741744995, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.223410950283197, | |
| "grad_norm": 89.21013641357422, | |
| "learning_rate": 3.8300572176805796e-05, | |
| "logits/chosen": -1.505953073501587, | |
| "logits/rejected": -1.4713289737701416, | |
| "logps/chosen": -3.2633144855499268, | |
| "logps/rejected": -4.148341655731201, | |
| "loss": 22.4622, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.23655016720294952, | |
| "rewards/margins": 0.04711543396115303, | |
| "rewards/rejected": -0.28366559743881226, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.22655758338577722, | |
| "grad_norm": 136.71780395507812, | |
| "learning_rate": 3.82108486433949e-05, | |
| "logits/chosen": -1.4959208965301514, | |
| "logits/rejected": -1.4308115243911743, | |
| "logps/chosen": -3.161681652069092, | |
| "logps/rejected": -3.9897868633270264, | |
| "loss": 23.3097, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.2291373759508133, | |
| "rewards/margins": 0.045841820538043976, | |
| "rewards/rejected": -0.2749791741371155, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.22970421648835745, | |
| "grad_norm": 233.93896484375, | |
| "learning_rate": 3.8118927789030854e-05, | |
| "logits/chosen": -1.5138304233551025, | |
| "logits/rejected": -1.5346544981002808, | |
| "logps/chosen": -4.37386417388916, | |
| "logps/rejected": -5.469226837158203, | |
| "loss": 20.9319, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.3008665442466736, | |
| "rewards/margins": 0.07115120440721512, | |
| "rewards/rejected": -0.3720177412033081, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.2328508495909377, | |
| "grad_norm": 100.57418060302734, | |
| "learning_rate": 3.802482070488373e-05, | |
| "logits/chosen": -1.3890790939331055, | |
| "logits/rejected": -1.4434179067611694, | |
| "logps/chosen": -3.4095160961151123, | |
| "logps/rejected": -4.254734039306641, | |
| "loss": 21.2175, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.24669814109802246, | |
| "rewards/margins": 0.06480761617422104, | |
| "rewards/rejected": -0.3115057349205017, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.23599748269351795, | |
| "grad_norm": 194.1370391845703, | |
| "learning_rate": 3.792853874591408e-05, | |
| "logits/chosen": -1.5562362670898438, | |
| "logits/rejected": -1.4487522840499878, | |
| "logps/chosen": -3.45831561088562, | |
| "logps/rejected": -4.16960334777832, | |
| "loss": 24.8363, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.23216786980628967, | |
| "rewards/margins": 0.047336287796497345, | |
| "rewards/rejected": -0.2795041799545288, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.23914411579609818, | |
| "grad_norm": 88.31356811523438, | |
| "learning_rate": 3.783009352950282e-05, | |
| "logits/chosen": -1.371385097503662, | |
| "logits/rejected": -1.373175859451294, | |
| "logps/chosen": -2.55993390083313, | |
| "logps/rejected": -3.111349105834961, | |
| "loss": 22.3814, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.17149809002876282, | |
| "rewards/margins": 0.04337615519762039, | |
| "rewards/rejected": -0.214874267578125, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.2422907488986784, | |
| "grad_norm": 126.74950408935547, | |
| "learning_rate": 3.772949693404954e-05, | |
| "logits/chosen": -1.33748459815979, | |
| "logits/rejected": -1.3754979372024536, | |
| "logps/chosen": -2.633439064025879, | |
| "logps/rejected": -3.534024715423584, | |
| "loss": 20.4661, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.17298361659049988, | |
| "rewards/margins": 0.07067564874887466, | |
| "rewards/rejected": -0.24365928769111633, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.24543738200125864, | |
| "grad_norm": 90.40318298339844, | |
| "learning_rate": 3.762676109753919e-05, | |
| "logits/chosen": -1.2709859609603882, | |
| "logits/rejected": -1.294306755065918, | |
| "logps/chosen": -3.954099655151367, | |
| "logps/rejected": -5.9721527099609375, | |
| "loss": 21.932, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.29533275961875916, | |
| "rewards/margins": 0.12940457463264465, | |
| "rewards/rejected": -0.4247373640537262, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.2485840151038389, | |
| "grad_norm": 84.59414672851562, | |
| "learning_rate": 3.7521898416077565e-05, | |
| "logits/chosen": -1.4984506368637085, | |
| "logits/rejected": -1.5229644775390625, | |
| "logps/chosen": -4.4091901779174805, | |
| "logps/rejected": -5.3940815925598145, | |
| "loss": 21.5859, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.3109613358974457, | |
| "rewards/margins": 0.08055521547794342, | |
| "rewards/rejected": -0.3915165364742279, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.2517306482064191, | |
| "grad_norm": 120.28202056884766, | |
| "learning_rate": 3.7414921542395546e-05, | |
| "logits/chosen": -1.5182693004608154, | |
| "logits/rejected": -1.5193490982055664, | |
| "logps/chosen": -4.545083045959473, | |
| "logps/rejected": -5.492725372314453, | |
| "loss": 21.539, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.36579376459121704, | |
| "rewards/margins": 0.06641928851604462, | |
| "rewards/rejected": -0.4322130084037781, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2548772813089994, | |
| "grad_norm": 143.28396606445312, | |
| "learning_rate": 3.7305843384322466e-05, | |
| "logits/chosen": -1.5114035606384277, | |
| "logits/rejected": -1.5092270374298096, | |
| "logps/chosen": -5.28603982925415, | |
| "logps/rejected": -6.232533931732178, | |
| "loss": 21.4891, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.43935927748680115, | |
| "rewards/margins": 0.08039890229701996, | |
| "rewards/rejected": -0.5197581648826599, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.2580239144115796, | |
| "grad_norm": 129.09864807128906, | |
| "learning_rate": 3.71946771032286e-05, | |
| "logits/chosen": -1.6940416097640991, | |
| "logits/rejected": -1.6389005184173584, | |
| "logps/chosen": -5.122313022613525, | |
| "logps/rejected": -6.010600566864014, | |
| "loss": 21.8681, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.42212480306625366, | |
| "rewards/margins": 0.076592817902565, | |
| "rewards/rejected": -0.49871763586997986, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.26117054751415986, | |
| "grad_norm": 1118.02392578125, | |
| "learning_rate": 3.708143611243716e-05, | |
| "logits/chosen": -1.65127432346344, | |
| "logits/rejected": -1.6758639812469482, | |
| "logps/chosen": -5.203777313232422, | |
| "logps/rejected": -6.3162078857421875, | |
| "loss": 21.2512, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.37822961807250977, | |
| "rewards/margins": 0.09629149734973907, | |
| "rewards/rejected": -0.4745211601257324, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.2643171806167401, | |
| "grad_norm": 109.98821258544922, | |
| "learning_rate": 3.696613407560582e-05, | |
| "logits/chosen": -1.6237115859985352, | |
| "logits/rejected": -1.5712984800338745, | |
| "logps/chosen": -4.632975101470947, | |
| "logps/rejected": -6.082078456878662, | |
| "loss": 20.9477, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.3740273118019104, | |
| "rewards/margins": 0.103847935795784, | |
| "rewards/rejected": -0.4778752326965332, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.2674638137193203, | |
| "grad_norm": 95.2988052368164, | |
| "learning_rate": 3.684878490507808e-05, | |
| "logits/chosen": -1.5806386470794678, | |
| "logits/rejected": -1.6192169189453125, | |
| "logps/chosen": -4.849827766418457, | |
| "logps/rejected": -5.800168037414551, | |
| "loss": 23.5806, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.3749791085720062, | |
| "rewards/margins": 0.07270670682191849, | |
| "rewards/rejected": -0.4476858079433441, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.27061044682190055, | |
| "grad_norm": 111.99176788330078, | |
| "learning_rate": 3.6729402760204535e-05, | |
| "logits/chosen": -1.6522388458251953, | |
| "logits/rejected": -1.6433773040771484, | |
| "logps/chosen": -3.4129672050476074, | |
| "logps/rejected": -4.362156867980957, | |
| "loss": 21.9253, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.25616371631622314, | |
| "rewards/margins": 0.07649270445108414, | |
| "rewards/rejected": -0.3326564431190491, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.2737570799244808, | |
| "grad_norm": 219.88124084472656, | |
| "learning_rate": 3.6608002045634535e-05, | |
| "logits/chosen": -1.7825971841812134, | |
| "logits/rejected": -1.6959110498428345, | |
| "logps/chosen": -3.785250425338745, | |
| "logps/rejected": -4.989777565002441, | |
| "loss": 22.1928, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.26640018820762634, | |
| "rewards/margins": 0.07046084105968475, | |
| "rewards/rejected": -0.3368610143661499, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.27690371302706107, | |
| "grad_norm": 110.93528747558594, | |
| "learning_rate": 3.6484597409577975e-05, | |
| "logits/chosen": -1.8389028310775757, | |
| "logits/rejected": -1.7533693313598633, | |
| "logps/chosen": -3.4091110229492188, | |
| "logps/rejected": -4.324118614196777, | |
| "loss": 21.2394, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.25699272751808167, | |
| "rewards/margins": 0.06507135927677155, | |
| "rewards/rejected": -0.322064071893692, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.2800503461296413, | |
| "grad_norm": 128.312255859375, | |
| "learning_rate": 3.6359203742037966e-05, | |
| "logits/chosen": -1.8402115106582642, | |
| "logits/rejected": -1.7344493865966797, | |
| "logps/chosen": -4.041749000549316, | |
| "logps/rejected": -4.417330741882324, | |
| "loss": 22.7853, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.2921445965766907, | |
| "rewards/margins": 0.04909106716513634, | |
| "rewards/rejected": -0.3412356376647949, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.28319697923222154, | |
| "grad_norm": 121.12706756591797, | |
| "learning_rate": 3.623183617301411e-05, | |
| "logits/chosen": -1.7311460971832275, | |
| "logits/rejected": -1.7096502780914307, | |
| "logps/chosen": -3.8819706439971924, | |
| "logps/rejected": -4.670237064361572, | |
| "loss": 22.6275, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.30139902234077454, | |
| "rewards/margins": 0.05851779133081436, | |
| "rewards/rejected": -0.3599168360233307, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.28634361233480177, | |
| "grad_norm": 93.03048706054688, | |
| "learning_rate": 3.610251007067699e-05, | |
| "logits/chosen": -1.836363434791565, | |
| "logits/rejected": -1.736104965209961, | |
| "logps/chosen": -4.1447577476501465, | |
| "logps/rejected": -4.325010299682617, | |
| "loss": 26.2728, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.32724231481552124, | |
| "rewards/margins": 0.010385597124695778, | |
| "rewards/rejected": -0.33762794733047485, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.289490245437382, | |
| "grad_norm": 76.58390808105469, | |
| "learning_rate": 3.597124103951379e-05, | |
| "logits/chosen": -1.7278220653533936, | |
| "logits/rejected": -1.7181174755096436, | |
| "logps/chosen": -4.0262017250061035, | |
| "logps/rejected": -4.855641841888428, | |
| "loss": 22.3804, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.2886626124382019, | |
| "rewards/margins": 0.06016182899475098, | |
| "rewards/rejected": -0.3488244414329529, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.29263687853996223, | |
| "grad_norm": 80.33660888671875, | |
| "learning_rate": 3.583804491844551e-05, | |
| "logits/chosen": -1.8658571243286133, | |
| "logits/rejected": -1.7413606643676758, | |
| "logps/chosen": -3.758129835128784, | |
| "logps/rejected": -4.306906223297119, | |
| "loss": 26.088, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.2766272723674774, | |
| "rewards/margins": 0.03810672461986542, | |
| "rewards/rejected": -0.31473398208618164, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.29578351164254246, | |
| "grad_norm": 66.17215728759766, | |
| "learning_rate": 3.5702937778915765e-05, | |
| "logits/chosen": -1.8694692850112915, | |
| "logits/rejected": -1.82939875125885, | |
| "logps/chosen": -2.9322712421417236, | |
| "logps/rejected": -3.7157013416290283, | |
| "loss": 21.7852, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.2061152458190918, | |
| "rewards/margins": 0.056372471153736115, | |
| "rewards/rejected": -0.2624877095222473, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.2989301447451227, | |
| "grad_norm": 95.2267837524414, | |
| "learning_rate": 3.556593592295171e-05, | |
| "logits/chosen": -1.8632274866104126, | |
| "logits/rejected": -1.8683363199234009, | |
| "logps/chosen": -2.8304595947265625, | |
| "logps/rejected": -3.464296817779541, | |
| "loss": 22.1458, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.19707690179347992, | |
| "rewards/margins": 0.04870922490954399, | |
| "rewards/rejected": -0.24578611552715302, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.302076777847703, | |
| "grad_norm": 128.1005096435547, | |
| "learning_rate": 3.5427055881196946e-05, | |
| "logits/chosen": -1.7504918575286865, | |
| "logits/rejected": -1.8846075534820557, | |
| "logps/chosen": -2.7551674842834473, | |
| "logps/rejected": -3.501314163208008, | |
| "loss": 21.4037, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.192325159907341, | |
| "rewards/margins": 0.05459358170628548, | |
| "rewards/rejected": -0.2469187080860138, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.3052234109502832, | |
| "grad_norm": 64.81920623779297, | |
| "learning_rate": 3.5286314410916967e-05, | |
| "logits/chosen": -1.8015562295913696, | |
| "logits/rejected": -1.9157085418701172, | |
| "logps/chosen": -3.297150135040283, | |
| "logps/rejected": -4.347265243530273, | |
| "loss": 20.2599, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.24196556210517883, | |
| "rewards/margins": 0.06687469035387039, | |
| "rewards/rejected": -0.30884024500846863, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.30837004405286345, | |
| "grad_norm": 121.4966812133789, | |
| "learning_rate": 3.5143728493977245e-05, | |
| "logits/chosen": -1.7404873371124268, | |
| "logits/rejected": -1.8498218059539795, | |
| "logps/chosen": -3.553678035736084, | |
| "logps/rejected": -4.084536075592041, | |
| "loss": 24.4702, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.2725631594657898, | |
| "rewards/margins": 0.037132084369659424, | |
| "rewards/rejected": -0.3096952736377716, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.3115166771554437, | |
| "grad_norm": 102.46180725097656, | |
| "learning_rate": 3.499931533479417e-05, | |
| "logits/chosen": -1.7682313919067383, | |
| "logits/rejected": -1.7660820484161377, | |
| "logps/chosen": -3.595475435256958, | |
| "logps/rejected": -4.801576137542725, | |
| "loss": 20.9722, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.2746976315975189, | |
| "rewards/margins": 0.10004003345966339, | |
| "rewards/rejected": -0.3747376501560211, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.3146633102580239, | |
| "grad_norm": 100.82923889160156, | |
| "learning_rate": 3.485309235825916e-05, | |
| "logits/chosen": -1.7638380527496338, | |
| "logits/rejected": -1.857962965965271, | |
| "logps/chosen": -4.1785569190979, | |
| "logps/rejected": -5.445678234100342, | |
| "loss": 20.121, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.30823373794555664, | |
| "rewards/margins": 0.09736496210098267, | |
| "rewards/rejected": -0.4055987298488617, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.31780994336060414, | |
| "grad_norm": 299.635009765625, | |
| "learning_rate": 3.470507720763625e-05, | |
| "logits/chosen": -1.7603092193603516, | |
| "logits/rejected": -1.8294856548309326, | |
| "logps/chosen": -3.818953037261963, | |
| "logps/rejected": -4.965951442718506, | |
| "loss": 24.0421, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.2867090702056885, | |
| "rewards/margins": 0.09908684343099594, | |
| "rewards/rejected": -0.385795921087265, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.3209565764631844, | |
| "grad_norm": 121.77188110351562, | |
| "learning_rate": 3.4555287742433115e-05, | |
| "logits/chosen": -1.8968608379364014, | |
| "logits/rejected": -1.863628625869751, | |
| "logps/chosen": -3.3851046562194824, | |
| "logps/rejected": -4.313992500305176, | |
| "loss": 21.5651, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.2504531145095825, | |
| "rewards/margins": 0.07505444437265396, | |
| "rewards/rejected": -0.3255075514316559, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.3241032095657646, | |
| "grad_norm": 84.7723617553711, | |
| "learning_rate": 3.440374203624628e-05, | |
| "logits/chosen": -1.8949018716812134, | |
| "logits/rejected": -2.03389573097229, | |
| "logps/chosen": -3.739046573638916, | |
| "logps/rejected": -4.937285423278809, | |
| "loss": 22.0895, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.2827950417995453, | |
| "rewards/margins": 0.07987246662378311, | |
| "rewards/rejected": -0.3626675605773926, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.3272498426683449, | |
| "grad_norm": 96.02967071533203, | |
| "learning_rate": 3.425045837458028e-05, | |
| "logits/chosen": -1.9336235523223877, | |
| "logits/rejected": -1.9811556339263916, | |
| "logps/chosen": -3.5748794078826904, | |
| "logps/rejected": -4.64247465133667, | |
| "loss": 20.7454, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.2698992192745209, | |
| "rewards/margins": 0.07278282940387726, | |
| "rewards/rejected": -0.3426820635795593, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.3303964757709251, | |
| "grad_norm": 138.71051025390625, | |
| "learning_rate": 3.4095455252641376e-05, | |
| "logits/chosen": -1.938104271888733, | |
| "logits/rejected": -2.024137020111084, | |
| "logps/chosen": -4.332060813903809, | |
| "logps/rejected": -5.391437530517578, | |
| "loss": 23.3511, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.3168641924858093, | |
| "rewards/margins": 0.049729883670806885, | |
| "rewards/rejected": -0.3665940761566162, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.33354310887350536, | |
| "grad_norm": 93.8726577758789, | |
| "learning_rate": 3.393875137310588e-05, | |
| "logits/chosen": -1.8752260208129883, | |
| "logits/rejected": -1.8945411443710327, | |
| "logps/chosen": -4.053868770599365, | |
| "logps/rejected": -5.044325828552246, | |
| "loss": 21.8528, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.3227534890174866, | |
| "rewards/margins": 0.0821223258972168, | |
| "rewards/rejected": -0.4048757553100586, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.3366897419760856, | |
| "grad_norm": 261.39129638671875, | |
| "learning_rate": 3.378036564386349e-05, | |
| "logits/chosen": -1.770957589149475, | |
| "logits/rejected": -1.8808790445327759, | |
| "logps/chosen": -3.8808326721191406, | |
| "logps/rejected": -4.960693836212158, | |
| "loss": 23.7267, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.3041539788246155, | |
| "rewards/margins": 0.08733677119016647, | |
| "rewards/rejected": -0.39149072766304016, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.3398363750786658, | |
| "grad_norm": 141.79991149902344, | |
| "learning_rate": 3.3620317175735945e-05, | |
| "logits/chosen": -1.929517149925232, | |
| "logits/rejected": -1.8599262237548828, | |
| "logps/chosen": -4.427219867706299, | |
| "logps/rejected": -5.757664680480957, | |
| "loss": 20.8591, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.3481447994709015, | |
| "rewards/margins": 0.0858476310968399, | |
| "rewards/rejected": -0.4339924454689026, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.34298300818124605, | |
| "grad_norm": 76.495361328125, | |
| "learning_rate": 3.345862528017101e-05, | |
| "logits/chosen": -1.8648240566253662, | |
| "logits/rejected": -1.899430513381958, | |
| "logps/chosen": -4.430551528930664, | |
| "logps/rejected": -5.134209156036377, | |
| "loss": 21.6823, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.3582889139652252, | |
| "rewards/margins": 0.05610053986310959, | |
| "rewards/rejected": -0.4143894612789154, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.3461296412838263, | |
| "grad_norm": 65.95896911621094, | |
| "learning_rate": 3.32953094669124e-05, | |
| "logits/chosen": -1.6951459646224976, | |
| "logits/rejected": -1.7398831844329834, | |
| "logps/chosen": -5.35291051864624, | |
| "logps/rejected": -6.347973823547363, | |
| "loss": 24.8551, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.4343182146549225, | |
| "rewards/margins": 0.085027314722538, | |
| "rewards/rejected": -0.5193454623222351, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.34927627438640657, | |
| "grad_norm": 64.50738525390625, | |
| "learning_rate": 3.313038944164577e-05, | |
| "logits/chosen": -1.7779582738876343, | |
| "logits/rejected": -1.8077032566070557, | |
| "logps/chosen": -4.008457183837891, | |
| "logps/rejected": -5.838412761688232, | |
| "loss": 19.2472, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.3185553550720215, | |
| "rewards/margins": 0.10776933282613754, | |
| "rewards/rejected": -0.4263246953487396, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.3524229074889868, | |
| "grad_norm": 62.579227447509766, | |
| "learning_rate": 3.296388510362095e-05, | |
| "logits/chosen": -1.5932537317276, | |
| "logits/rejected": -1.7019790410995483, | |
| "logps/chosen": -4.049741268157959, | |
| "logps/rejected": -4.859818935394287, | |
| "loss": 21.4107, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.29325228929519653, | |
| "rewards/margins": 0.06688085943460464, | |
| "rewards/rejected": -0.36013317108154297, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.35556954059156703, | |
| "grad_norm": 105.9216079711914, | |
| "learning_rate": 3.2795816543250977e-05, | |
| "logits/chosen": -1.5411794185638428, | |
| "logits/rejected": -1.5789968967437744, | |
| "logps/chosen": -3.8824076652526855, | |
| "logps/rejected": -4.560225486755371, | |
| "loss": 23.1195, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.2929798662662506, | |
| "rewards/margins": 0.05188722163438797, | |
| "rewards/rejected": -0.34486711025238037, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.35871617369414727, | |
| "grad_norm": 55.46923065185547, | |
| "learning_rate": 3.262620403968792e-05, | |
| "logits/chosen": -1.5855820178985596, | |
| "logits/rejected": -1.7370961904525757, | |
| "logps/chosen": -3.6918272972106934, | |
| "logps/rejected": -5.205948352813721, | |
| "loss": 19.1367, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.27848196029663086, | |
| "rewards/margins": 0.11322972923517227, | |
| "rewards/rejected": -0.3917117416858673, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.3618628067967275, | |
| "grad_norm": 114.82603454589844, | |
| "learning_rate": 3.245506805837605e-05, | |
| "logits/chosen": -1.6395822763442993, | |
| "logits/rejected": -1.8543764352798462, | |
| "logps/chosen": -4.298351287841797, | |
| "logps/rejected": -5.546226501464844, | |
| "loss": 19.9406, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.30993199348449707, | |
| "rewards/margins": 0.08511951565742493, | |
| "rewards/rejected": -0.3950514793395996, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.36500943989930773, | |
| "grad_norm": 174.55496215820312, | |
| "learning_rate": 3.228242924858248e-05, | |
| "logits/chosen": -1.5872471332550049, | |
| "logits/rejected": -1.688132882118225, | |
| "logps/chosen": -4.568819999694824, | |
| "logps/rejected": -5.411607265472412, | |
| "loss": 22.4314, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.34597450494766235, | |
| "rewards/margins": 0.07728902995586395, | |
| "rewards/rejected": -0.4232635498046875, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.36815607300188796, | |
| "grad_norm": 70.5542221069336, | |
| "learning_rate": 3.210830844090555e-05, | |
| "logits/chosen": -1.6192104816436768, | |
| "logits/rejected": -1.6785539388656616, | |
| "logps/chosen": -5.1252007484436035, | |
| "logps/rejected": -5.851187705993652, | |
| "loss": 25.8619, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.40460100769996643, | |
| "rewards/margins": 0.06072293594479561, | |
| "rewards/rejected": -0.46532392501831055, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.3713027061044682, | |
| "grad_norm": 100.62268829345703, | |
| "learning_rate": 3.193272664476152e-05, | |
| "logits/chosen": -1.7602649927139282, | |
| "logits/rejected": -1.9346716403961182, | |
| "logps/chosen": -4.961272239685059, | |
| "logps/rejected": -5.8130645751953125, | |
| "loss": 22.8852, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.3982604444026947, | |
| "rewards/margins": 0.059664536267519, | |
| "rewards/rejected": -0.457925021648407, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.3744493392070485, | |
| "grad_norm": 411.0801696777344, | |
| "learning_rate": 3.1755705045849465e-05, | |
| "logits/chosen": -1.7633399963378906, | |
| "logits/rejected": -1.818737268447876, | |
| "logps/chosen": -5.510100364685059, | |
| "logps/rejected": -6.382575035095215, | |
| "loss": 23.8471, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.4236491620540619, | |
| "rewards/margins": 0.06903719902038574, | |
| "rewards/rejected": -0.49268636107444763, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.3775959723096287, | |
| "grad_norm": 98.035888671875, | |
| "learning_rate": 3.157726500359509e-05, | |
| "logits/chosen": -1.825554609298706, | |
| "logits/rejected": -1.907472014427185, | |
| "logps/chosen": -5.569567680358887, | |
| "logps/rejected": -6.1025004386901855, | |
| "loss": 24.087, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.4460601210594177, | |
| "rewards/margins": 0.03472483158111572, | |
| "rewards/rejected": -0.48078498244285583, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.38074260541220895, | |
| "grad_norm": 80.47187805175781, | |
| "learning_rate": 3.1397428048573465e-05, | |
| "logits/chosen": -1.798015832901001, | |
| "logits/rejected": -1.9216489791870117, | |
| "logps/chosen": -4.644695281982422, | |
| "logps/rejected": -5.7896294593811035, | |
| "loss": 19.835, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.380901575088501, | |
| "rewards/margins": 0.08407244086265564, | |
| "rewards/rejected": -0.4649740159511566, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.3838892385147892, | |
| "grad_norm": 65.88395690917969, | |
| "learning_rate": 3.121621587991113e-05, | |
| "logits/chosen": -1.9489303827285767, | |
| "logits/rejected": -1.9782030582427979, | |
| "logps/chosen": -4.736275672912598, | |
| "logps/rejected": -5.893181800842285, | |
| "loss": 21.2523, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.37491172552108765, | |
| "rewards/margins": 0.09068160504102707, | |
| "rewards/rejected": -0.46559327840805054, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.3870358716173694, | |
| "grad_norm": 126.57975769042969, | |
| "learning_rate": 3.1033650362667935e-05, | |
| "logits/chosen": -1.945927619934082, | |
| "logits/rejected": -2.0246009826660156, | |
| "logps/chosen": -4.42104434967041, | |
| "logps/rejected": -5.623631000518799, | |
| "loss": 20.477, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.3376965820789337, | |
| "rewards/margins": 0.07996558398008347, | |
| "rewards/rejected": -0.41766220331192017, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.39018250471994964, | |
| "grad_norm": 88.92438507080078, | |
| "learning_rate": 3.084975352519874e-05, | |
| "logits/chosen": -2.063378095626831, | |
| "logits/rejected": -2.161208391189575, | |
| "logps/chosen": -4.2682085037231445, | |
| "logps/rejected": -5.291066646575928, | |
| "loss": 22.2295, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.3386593759059906, | |
| "rewards/margins": 0.07158732414245605, | |
| "rewards/rejected": -0.41024675965309143, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.3933291378225299, | |
| "grad_norm": 53.47737503051758, | |
| "learning_rate": 3.06645475564955e-05, | |
| "logits/chosen": -1.9409205913543701, | |
| "logits/rejected": -2.0371243953704834, | |
| "logps/chosen": -3.6241352558135986, | |
| "logps/rejected": -5.033164978027344, | |
| "loss": 20.5698, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.27396219968795776, | |
| "rewards/margins": 0.09085332602262497, | |
| "rewards/rejected": -0.36481553316116333, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.3964757709251101, | |
| "grad_norm": 87.2447738647461, | |
| "learning_rate": 3.0478054803509975e-05, | |
| "logits/chosen": -1.9413238763809204, | |
| "logits/rejected": -1.989638328552246, | |
| "logps/chosen": -3.974926710128784, | |
| "logps/rejected": -5.115756034851074, | |
| "loss": 20.8679, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.3056657314300537, | |
| "rewards/margins": 0.09486590325832367, | |
| "rewards/rejected": -0.4005316197872162, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.3996224040276904, | |
| "grad_norm": 105.37754821777344, | |
| "learning_rate": 3.029029776845726e-05, | |
| "logits/chosen": -1.9769777059555054, | |
| "logits/rejected": -2.0631349086761475, | |
| "logps/chosen": -4.811491012573242, | |
| "logps/rejected": -6.024916648864746, | |
| "loss": 22.3949, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.36858421564102173, | |
| "rewards/margins": 0.09452919661998749, | |
| "rewards/rejected": -0.463113397359848, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.4027690371302706, | |
| "grad_norm": 107.63380432128906, | |
| "learning_rate": 3.0101299106100766e-05, | |
| "logits/chosen": -1.9259755611419678, | |
| "logits/rejected": -2.0011420249938965, | |
| "logps/chosen": -4.672276496887207, | |
| "logps/rejected": -5.433979034423828, | |
| "loss": 23.4548, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.34038934111595154, | |
| "rewards/margins": 0.05264373868703842, | |
| "rewards/rejected": -0.39303308725357056, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.40591567023285086, | |
| "grad_norm": 72.93191528320312, | |
| "learning_rate": 2.991108162101862e-05, | |
| "logits/chosen": -1.8639154434204102, | |
| "logits/rejected": -2.00860333442688, | |
| "logps/chosen": -4.0379438400268555, | |
| "logps/rejected": -4.966481685638428, | |
| "loss": 24.2063, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.3016512095928192, | |
| "rewards/margins": 0.05989114195108414, | |
| "rewards/rejected": -0.36154234409332275, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.4090623033354311, | |
| "grad_norm": 241.30491638183594, | |
| "learning_rate": 2.971966826485212e-05, | |
| "logits/chosen": -2.0276923179626465, | |
| "logits/rejected": -2.075092077255249, | |
| "logps/chosen": -3.9584078788757324, | |
| "logps/rejected": -4.5398454666137695, | |
| "loss": 22.3358, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.2686071991920471, | |
| "rewards/margins": 0.05414595082402229, | |
| "rewards/rejected": -0.3227531313896179, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.4122089364380113, | |
| "grad_norm": 72.65229797363281, | |
| "learning_rate": 2.952708213353636e-05, | |
| "logits/chosen": -2.087306499481201, | |
| "logits/rejected": -2.120595932006836, | |
| "logps/chosen": -2.7464280128479004, | |
| "logps/rejected": -3.2665913105010986, | |
| "loss": 23.396, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.19495923817157745, | |
| "rewards/margins": 0.03470323234796524, | |
| "rewards/rejected": -0.2296624630689621, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.41535556954059155, | |
| "grad_norm": 36.565982818603516, | |
| "learning_rate": 2.9333346464513476e-05, | |
| "logits/chosen": -2.0568580627441406, | |
| "logits/rejected": -2.171510934829712, | |
| "logps/chosen": -3.1527762413024902, | |
| "logps/rejected": -3.5696024894714355, | |
| "loss": 23.204, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.2180822342634201, | |
| "rewards/margins": 0.029619824141263962, | |
| "rewards/rejected": -0.24770204722881317, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.4185022026431718, | |
| "grad_norm": 57.84255599975586, | |
| "learning_rate": 2.9138484633928818e-05, | |
| "logits/chosen": -1.940320372581482, | |
| "logits/rejected": -1.9845908880233765, | |
| "logps/chosen": -3.0434772968292236, | |
| "logps/rejected": -3.5398964881896973, | |
| "loss": 24.3501, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.2063741683959961, | |
| "rewards/margins": 0.023456847295165062, | |
| "rewards/rejected": -0.2298310250043869, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.42164883574575207, | |
| "grad_norm": 56.995887756347656, | |
| "learning_rate": 2.8942520153810396e-05, | |
| "logits/chosen": -2.0002236366271973, | |
| "logits/rejected": -2.08671498298645, | |
| "logps/chosen": -2.834512710571289, | |
| "logps/rejected": -3.5050129890441895, | |
| "loss": 22.4039, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.18090704083442688, | |
| "rewards/margins": 0.04532923549413681, | |
| "rewards/rejected": -0.2262362688779831, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.4247954688483323, | |
| "grad_norm": 75.65125274658203, | |
| "learning_rate": 2.8745476669231894e-05, | |
| "logits/chosen": -2.020886182785034, | |
| "logits/rejected": -2.111823558807373, | |
| "logps/chosen": -3.5571112632751465, | |
| "logps/rejected": -4.481097221374512, | |
| "loss": 22.9676, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.22384686768054962, | |
| "rewards/margins": 0.04108366742730141, | |
| "rewards/rejected": -0.2649305462837219, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.42794210195091253, | |
| "grad_norm": 77.30415344238281, | |
| "learning_rate": 2.8547377955459704e-05, | |
| "logits/chosen": -1.9961265325546265, | |
| "logits/rejected": -2.0482177734375, | |
| "logps/chosen": -2.892690658569336, | |
| "logps/rejected": -3.2253260612487793, | |
| "loss": 25.6658, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.19483526051044464, | |
| "rewards/margins": 0.01912742853164673, | |
| "rewards/rejected": -0.21396267414093018, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.43108873505349277, | |
| "grad_norm": 49.21062088012695, | |
| "learning_rate": 2.834824791508413e-05, | |
| "logits/chosen": -1.930086374282837, | |
| "logits/rejected": -2.131298542022705, | |
| "logps/chosen": -2.739534854888916, | |
| "logps/rejected": -3.5602822303771973, | |
| "loss": 21.1908, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.17746233940124512, | |
| "rewards/margins": 0.06554970890283585, | |
| "rewards/rejected": -0.24301204085350037, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.434235368156073, | |
| "grad_norm": 64.88590240478516, | |
| "learning_rate": 2.814811057513537e-05, | |
| "logits/chosen": -2.0517029762268066, | |
| "logits/rejected": -2.067883253097534, | |
| "logps/chosen": -2.82458758354187, | |
| "logps/rejected": -3.6670260429382324, | |
| "loss": 21.8595, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.1833667755126953, | |
| "rewards/margins": 0.0560932457447052, | |
| "rewards/rejected": -0.2394600361585617, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.43738200125865323, | |
| "grad_norm": 48.841331481933594, | |
| "learning_rate": 2.7946990084184383e-05, | |
| "logits/chosen": -1.798683524131775, | |
| "logits/rejected": -1.9806129932403564, | |
| "logps/chosen": -3.2995662689208984, | |
| "logps/rejected": -4.0815110206604, | |
| "loss": 22.0918, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.2146444320678711, | |
| "rewards/margins": 0.05965212732553482, | |
| "rewards/rejected": -0.27429652214050293, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.44052863436123346, | |
| "grad_norm": 266.59381103515625, | |
| "learning_rate": 2.7744910709429104e-05, | |
| "logits/chosen": -1.800355315208435, | |
| "logits/rejected": -1.9262745380401611, | |
| "logps/chosen": -3.308371067047119, | |
| "logps/rejected": -4.3786821365356445, | |
| "loss": 22.6616, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.20500688254833221, | |
| "rewards/margins": 0.07705695927143097, | |
| "rewards/rejected": -0.2820638120174408, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.4436752674638137, | |
| "grad_norm": 45.74457550048828, | |
| "learning_rate": 2.754189683376641e-05, | |
| "logits/chosen": -1.8245214223861694, | |
| "logits/rejected": -1.9188095331192017, | |
| "logps/chosen": -2.6574292182922363, | |
| "logps/rejected": -3.3347110748291016, | |
| "loss": 21.6472, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.1792256087064743, | |
| "rewards/margins": 0.054762959480285645, | |
| "rewards/rejected": -0.23398856818675995, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.446821900566394, | |
| "grad_norm": 82.67216491699219, | |
| "learning_rate": 2.7337972952850047e-05, | |
| "logits/chosen": -1.764173150062561, | |
| "logits/rejected": -1.9260650873184204, | |
| "logps/chosen": -2.8055293560028076, | |
| "logps/rejected": -3.9603447914123535, | |
| "loss": 21.7022, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.19627173244953156, | |
| "rewards/margins": 0.07794789969921112, | |
| "rewards/rejected": -0.2742196321487427, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.4499685336689742, | |
| "grad_norm": 63.396240234375, | |
| "learning_rate": 2.713316367213499e-05, | |
| "logits/chosen": -1.6747219562530518, | |
| "logits/rejected": -1.8347587585449219, | |
| "logps/chosen": -2.9625911712646484, | |
| "logps/rejected": -3.7656357288360596, | |
| "loss": 22.6149, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.21037223935127258, | |
| "rewards/margins": 0.05833571031689644, | |
| "rewards/rejected": -0.26870793104171753, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.45311516677155445, | |
| "grad_norm": 118.00112915039062, | |
| "learning_rate": 2.692749370390855e-05, | |
| "logits/chosen": -1.7990179061889648, | |
| "logits/rejected": -1.8915067911148071, | |
| "logps/chosen": -3.0249316692352295, | |
| "logps/rejected": -4.06134033203125, | |
| "loss": 23.4425, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.21054935455322266, | |
| "rewards/margins": 0.05246324464678764, | |
| "rewards/rejected": -0.2630125880241394, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.4562617998741347, | |
| "grad_norm": 64.52631378173828, | |
| "learning_rate": 2.6720987864308603e-05, | |
| "logits/chosen": -1.695908546447754, | |
| "logits/rejected": -1.7583353519439697, | |
| "logps/chosen": -2.815432548522949, | |
| "logps/rejected": -4.123710632324219, | |
| "loss": 21.0095, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.1960502415895462, | |
| "rewards/margins": 0.08241166174411774, | |
| "rewards/rejected": -0.27846187353134155, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.4594084329767149, | |
| "grad_norm": 59.4410285949707, | |
| "learning_rate": 2.6513671070329244e-05, | |
| "logits/chosen": -1.7788522243499756, | |
| "logits/rejected": -1.8245208263397217, | |
| "logps/chosen": -3.012934446334839, | |
| "logps/rejected": -4.003429412841797, | |
| "loss": 21.1484, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.2150099277496338, | |
| "rewards/margins": 0.07829871028661728, | |
| "rewards/rejected": -0.2933086156845093, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.46255506607929514, | |
| "grad_norm": 84.89627075195312, | |
| "learning_rate": 2.630556833681434e-05, | |
| "logits/chosen": -1.738438606262207, | |
| "logits/rejected": -1.8424345254898071, | |
| "logps/chosen": -2.7983458042144775, | |
| "logps/rejected": -4.087245941162109, | |
| "loss": 19.2453, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.19751907885074615, | |
| "rewards/margins": 0.09776587784290314, | |
| "rewards/rejected": -0.2952849566936493, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.4657016991818754, | |
| "grad_norm": 101.38806915283203, | |
| "learning_rate": 2.609670477343921e-05, | |
| "logits/chosen": -1.6957628726959229, | |
| "logits/rejected": -1.825757384300232, | |
| "logps/chosen": -4.030215263366699, | |
| "logps/rejected": -5.008100509643555, | |
| "loss": 22.1478, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.30844345688819885, | |
| "rewards/margins": 0.0614703968167305, | |
| "rewards/rejected": -0.36991381645202637, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.46884833228445566, | |
| "grad_norm": 101.18181610107422, | |
| "learning_rate": 2.5887105581680905e-05, | |
| "logits/chosen": -1.7838348150253296, | |
| "logits/rejected": -1.7674500942230225, | |
| "logps/chosen": -4.438131809234619, | |
| "logps/rejected": -5.542893886566162, | |
| "loss": 23.806, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.36128634214401245, | |
| "rewards/margins": 0.07241909205913544, | |
| "rewards/rejected": -0.43370547890663147, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.4719949653870359, | |
| "grad_norm": 89.2279052734375, | |
| "learning_rate": 2.567679605177739e-05, | |
| "logits/chosen": -1.7873433828353882, | |
| "logits/rejected": -1.831865906715393, | |
| "logps/chosen": -4.315898895263672, | |
| "logps/rejected": -5.43391227722168, | |
| "loss": 20.4258, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.32439109683036804, | |
| "rewards/margins": 0.09124849736690521, | |
| "rewards/rejected": -0.41563957929611206, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.4751415984896161, | |
| "grad_norm": 68.27491760253906, | |
| "learning_rate": 2.5465801559676033e-05, | |
| "logits/chosen": -1.716103196144104, | |
| "logits/rejected": -1.744837999343872, | |
| "logps/chosen": -3.913160800933838, | |
| "logps/rejected": -5.709442615509033, | |
| "loss": 19.3215, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.30374833941459656, | |
| "rewards/margins": 0.12692494690418243, | |
| "rewards/rejected": -0.4306732714176178, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.47828823159219636, | |
| "grad_norm": 149.6294708251953, | |
| "learning_rate": 2.525414756397174e-05, | |
| "logits/chosen": -1.7440742254257202, | |
| "logits/rejected": -1.8239097595214844, | |
| "logps/chosen": -3.586292266845703, | |
| "logps/rejected": -4.596356391906738, | |
| "loss": 19.9662, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.2702713906764984, | |
| "rewards/margins": 0.08218260109424591, | |
| "rewards/rejected": -0.3524540364742279, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.4814348646947766, | |
| "grad_norm": 102.944580078125, | |
| "learning_rate": 2.504185960283512e-05, | |
| "logits/chosen": -1.7996543645858765, | |
| "logits/rejected": -1.8109557628631592, | |
| "logps/chosen": -4.447735786437988, | |
| "logps/rejected": -5.870986461639404, | |
| "loss": 20.4207, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.35062670707702637, | |
| "rewards/margins": 0.09269314259290695, | |
| "rewards/rejected": -0.4433198869228363, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.4845814977973568, | |
| "grad_norm": 128.53907775878906, | |
| "learning_rate": 2.482896329093106e-05, | |
| "logits/chosen": -1.9051790237426758, | |
| "logits/rejected": -1.9270706176757812, | |
| "logps/chosen": -5.1721906661987305, | |
| "logps/rejected": -6.744166374206543, | |
| "loss": 19.0615, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.4306749701499939, | |
| "rewards/margins": 0.1142655462026596, | |
| "rewards/rejected": -0.5449405312538147, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.48772813089993705, | |
| "grad_norm": 123.44400024414062, | |
| "learning_rate": 2.4615484316328023e-05, | |
| "logits/chosen": -1.8487358093261719, | |
| "logits/rejected": -1.8219711780548096, | |
| "logps/chosen": -5.741638660430908, | |
| "logps/rejected": -7.048303127288818, | |
| "loss": 22.6075, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.4748842120170593, | |
| "rewards/margins": 0.09859482944011688, | |
| "rewards/rejected": -0.5734790563583374, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.4908747640025173, | |
| "grad_norm": 97.28683471679688, | |
| "learning_rate": 2.440144843739857e-05, | |
| "logits/chosen": -1.8166711330413818, | |
| "logits/rejected": -1.856359839439392, | |
| "logps/chosen": -6.369978904724121, | |
| "logps/rejected": -7.745943546295166, | |
| "loss": 21.1624, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.5159797072410583, | |
| "rewards/margins": 0.09467221796512604, | |
| "rewards/rejected": -0.610651969909668, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.49402139710509757, | |
| "grad_norm": 94.76971435546875, | |
| "learning_rate": 2.4186881479711338e-05, | |
| "logits/chosen": -1.8901869058609009, | |
| "logits/rejected": -1.996917724609375, | |
| "logps/chosen": -5.151943206787109, | |
| "logps/rejected": -6.655333518981934, | |
| "loss": 17.5696, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.3987768888473511, | |
| "rewards/margins": 0.11971308290958405, | |
| "rewards/rejected": -0.5184900164604187, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.4971680302076778, | |
| "grad_norm": 362.07489013671875, | |
| "learning_rate": 2.397180933291491e-05, | |
| "logits/chosen": -1.6789305210113525, | |
| "logits/rejected": -1.75827157497406, | |
| "logps/chosen": -4.5332841873168945, | |
| "logps/rejected": -5.266444206237793, | |
| "loss": 22.7215, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.3525087535381317, | |
| "rewards/margins": 0.07219593226909637, | |
| "rewards/rejected": -0.42470473051071167, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.500314663310258, | |
| "grad_norm": 181.0984344482422, | |
| "learning_rate": 2.375625794761401e-05, | |
| "logits/chosen": -1.769201636314392, | |
| "logits/rejected": -1.7219161987304688, | |
| "logps/chosen": -4.633937358856201, | |
| "logps/rejected": -5.043046474456787, | |
| "loss": 26.0541, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.3703366816043854, | |
| "rewards/margins": 0.028562629595398903, | |
| "rewards/rejected": -0.3988993167877197, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.5034612964128382, | |
| "grad_norm": 120.9494857788086, | |
| "learning_rate": 2.3540253332238266e-05, | |
| "logits/chosen": -1.6151552200317383, | |
| "logits/rejected": -1.646795630455017, | |
| "logps/chosen": -4.029574394226074, | |
| "logps/rejected": -5.215254783630371, | |
| "loss": 20.2479, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.314275324344635, | |
| "rewards/margins": 0.08437344431877136, | |
| "rewards/rejected": -0.39864879846572876, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5066079295154186, | |
| "grad_norm": 119.4858169555664, | |
| "learning_rate": 2.3323821549904038e-05, | |
| "logits/chosen": -1.670577049255371, | |
| "logits/rejected": -1.5533939599990845, | |
| "logps/chosen": -3.9187912940979004, | |
| "logps/rejected": -4.743254661560059, | |
| "loss": 23.6037, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.3001677095890045, | |
| "rewards/margins": 0.06169123575091362, | |
| "rewards/rejected": -0.36185896396636963, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.5097545626179988, | |
| "grad_norm": 316.2997741699219, | |
| "learning_rate": 2.310698871526966e-05, | |
| "logits/chosen": -1.5207440853118896, | |
| "logits/rejected": -1.6267799139022827, | |
| "logps/chosen": -3.097418785095215, | |
| "logps/rejected": -4.804646015167236, | |
| "loss": 21.8575, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.22499537467956543, | |
| "rewards/margins": 0.11616162210702896, | |
| "rewards/rejected": -0.3411570191383362, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.512901195720579, | |
| "grad_norm": 78.00189971923828, | |
| "learning_rate": 2.288978099138443e-05, | |
| "logits/chosen": -1.5745933055877686, | |
| "logits/rejected": -1.5564606189727783, | |
| "logps/chosen": -2.8804163932800293, | |
| "logps/rejected": -3.5308539867401123, | |
| "loss": 22.241, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.20254115760326385, | |
| "rewards/margins": 0.05405501648783684, | |
| "rewards/rejected": -0.2565961480140686, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.5160478288231592, | |
| "grad_norm": 118.51643371582031, | |
| "learning_rate": 2.267222458653179e-05, | |
| "logits/chosen": -1.5091989040374756, | |
| "logits/rejected": -1.6645923852920532, | |
| "logps/chosen": -3.255237579345703, | |
| "logps/rejected": -4.126650333404541, | |
| "loss": 22.0187, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.23314771056175232, | |
| "rewards/margins": 0.06177164986729622, | |
| "rewards/rejected": -0.29491934180259705, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.5191944619257395, | |
| "grad_norm": 68.80047607421875, | |
| "learning_rate": 2.245434575106702e-05, | |
| "logits/chosen": -1.525356411933899, | |
| "logits/rejected": -1.701436996459961, | |
| "logps/chosen": -3.166797161102295, | |
| "logps/rejected": -4.742985248565674, | |
| "loss": 20.3686, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.2261020839214325, | |
| "rewards/margins": 0.08829782903194427, | |
| "rewards/rejected": -0.3143998980522156, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.5223410950283197, | |
| "grad_norm": 73.1375503540039, | |
| "learning_rate": 2.223617077424988e-05, | |
| "logits/chosen": -1.6771663427352905, | |
| "logits/rejected": -1.7121098041534424, | |
| "logps/chosen": -3.020296573638916, | |
| "logps/rejected": -4.426422119140625, | |
| "loss": 20.0836, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.21738722920417786, | |
| "rewards/margins": 0.09777109324932098, | |
| "rewards/rejected": -0.31515830755233765, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.5254877281309, | |
| "grad_norm": 76.68984985351562, | |
| "learning_rate": 2.2017725981072536e-05, | |
| "logits/chosen": -1.4603363275527954, | |
| "logits/rejected": -1.5595886707305908, | |
| "logps/chosen": -3.6973624229431152, | |
| "logps/rejected": -5.027807712554932, | |
| "loss": 20.512, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.2720819115638733, | |
| "rewards/margins": 0.08642515540122986, | |
| "rewards/rejected": -0.35850709676742554, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.5286343612334802, | |
| "grad_norm": 122.99668884277344, | |
| "learning_rate": 2.1799037729083213e-05, | |
| "logits/chosen": -1.5949891805648804, | |
| "logits/rejected": -1.7137962579727173, | |
| "logps/chosen": -3.5109829902648926, | |
| "logps/rejected": -4.95348596572876, | |
| "loss": 21.517, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.27030450105667114, | |
| "rewards/margins": 0.09910550713539124, | |
| "rewards/rejected": -0.36940997838974, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.5317809943360604, | |
| "grad_norm": 65.23582458496094, | |
| "learning_rate": 2.1580132405205862e-05, | |
| "logits/chosen": -1.4871020317077637, | |
| "logits/rejected": -1.5624678134918213, | |
| "logps/chosen": -4.474881172180176, | |
| "logps/rejected": -5.375269412994385, | |
| "loss": 23.3138, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.3389451503753662, | |
| "rewards/margins": 0.06582923233509064, | |
| "rewards/rejected": -0.40477436780929565, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.5349276274386406, | |
| "grad_norm": 175.08432006835938, | |
| "learning_rate": 2.1361036422556337e-05, | |
| "logits/chosen": -1.5353832244873047, | |
| "logits/rejected": -1.596407175064087, | |
| "logps/chosen": -3.814873218536377, | |
| "logps/rejected": -4.92036771774292, | |
| "loss": 21.5442, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.2893931567668915, | |
| "rewards/margins": 0.07075894623994827, | |
| "rewards/rejected": -0.36015206575393677, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.5380742605412209, | |
| "grad_norm": 64.21197509765625, | |
| "learning_rate": 2.1141776217255365e-05, | |
| "logits/chosen": -1.567317247390747, | |
| "logits/rejected": -1.5555747747421265, | |
| "logps/chosen": -3.8906242847442627, | |
| "logps/rejected": -4.897479057312012, | |
| "loss": 21.8379, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.29526472091674805, | |
| "rewards/margins": 0.06354343891143799, | |
| "rewards/rejected": -0.35880815982818604, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.5412208936438011, | |
| "grad_norm": 104.57052612304688, | |
| "learning_rate": 2.0922378245238787e-05, | |
| "logits/chosen": -1.5869696140289307, | |
| "logits/rejected": -1.6049997806549072, | |
| "logps/chosen": -3.8140482902526855, | |
| "logps/rejected": -4.755133628845215, | |
| "loss": 23.1968, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.29255491495132446, | |
| "rewards/margins": 0.052004069089889526, | |
| "rewards/rejected": -0.3445590138435364, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.5443675267463813, | |
| "grad_norm": 92.2053451538086, | |
| "learning_rate": 2.070286897906537e-05, | |
| "logits/chosen": -1.602929711341858, | |
| "logits/rejected": -1.6071062088012695, | |
| "logps/chosen": -3.990319013595581, | |
| "logps/rejected": -5.2248215675354, | |
| "loss": 20.3706, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.3073904812335968, | |
| "rewards/margins": 0.09087739139795303, | |
| "rewards/rejected": -0.39826786518096924, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.5475141598489616, | |
| "grad_norm": 83.128662109375, | |
| "learning_rate": 2.0483274904722647e-05, | |
| "logits/chosen": -1.7051680088043213, | |
| "logits/rejected": -1.6087182760238647, | |
| "logps/chosen": -3.986027956008911, | |
| "logps/rejected": -4.851881980895996, | |
| "loss": 21.4848, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.30843600630760193, | |
| "rewards/margins": 0.06898938864469528, | |
| "rewards/rejected": -0.3774254322052002, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.5506607929515418, | |
| "grad_norm": 62.2298583984375, | |
| "learning_rate": 2.026362251843109e-05, | |
| "logits/chosen": -1.6034513711929321, | |
| "logits/rejected": -1.699464201927185, | |
| "logps/chosen": -3.4193336963653564, | |
| "logps/rejected": -4.403960227966309, | |
| "loss": 21.3108, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.2610613703727722, | |
| "rewards/margins": 0.08181565254926682, | |
| "rewards/rejected": -0.34287700057029724, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.5538074260541221, | |
| "grad_norm": 88.62437438964844, | |
| "learning_rate": 2.004393832344711e-05, | |
| "logits/chosen": -1.6719697713851929, | |
| "logits/rejected": -1.5851457118988037, | |
| "logps/chosen": -3.8325066566467285, | |
| "logps/rejected": -5.3017473220825195, | |
| "loss": 19.635, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.3032756447792053, | |
| "rewards/margins": 0.09231220185756683, | |
| "rewards/rejected": -0.39558783173561096, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.5569540591567024, | |
| "grad_norm": 64.06165313720703, | |
| "learning_rate": 1.9824248826865124e-05, | |
| "logits/chosen": -1.5828460454940796, | |
| "logits/rejected": -1.6327168941497803, | |
| "logps/chosen": -4.681789398193359, | |
| "logps/rejected": -6.566616058349609, | |
| "loss": 18.3853, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -0.3667379915714264, | |
| "rewards/margins": 0.12741395831108093, | |
| "rewards/rejected": -0.49415192008018494, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.5601006922592826, | |
| "grad_norm": 204.93890380859375, | |
| "learning_rate": 1.9604580536419254e-05, | |
| "logits/chosen": -1.572584867477417, | |
| "logits/rejected": -1.6088756322860718, | |
| "logps/chosen": -5.441628456115723, | |
| "logps/rejected": -7.085760593414307, | |
| "loss": 24.9097, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.45653265714645386, | |
| "rewards/margins": 0.0925588458776474, | |
| "rewards/rejected": -0.5490914583206177, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.5632473253618628, | |
| "grad_norm": 162.79714965820312, | |
| "learning_rate": 1.93849599572849e-05, | |
| "logits/chosen": -1.6288610696792603, | |
| "logits/rejected": -1.6398794651031494, | |
| "logps/chosen": -5.213116645812988, | |
| "logps/rejected": -6.9830803871154785, | |
| "loss": 20.22, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.42777156829833984, | |
| "rewards/margins": 0.12980665266513824, | |
| "rewards/rejected": -0.5575782060623169, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.5663939584644431, | |
| "grad_norm": 75.16659545898438, | |
| "learning_rate": 1.916541358888062e-05, | |
| "logits/chosen": -1.6041675806045532, | |
| "logits/rejected": -1.6970984935760498, | |
| "logps/chosen": -4.644831657409668, | |
| "logps/rejected": -5.80092716217041, | |
| "loss": 20.4964, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.37974128127098083, | |
| "rewards/margins": 0.09219308942556381, | |
| "rewards/rejected": -0.47193440794944763, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.5695405915670233, | |
| "grad_norm": 110.90229797363281, | |
| "learning_rate": 1.8945967921670676e-05, | |
| "logits/chosen": -1.619327187538147, | |
| "logits/rejected": -1.6541610956192017, | |
| "logps/chosen": -5.146854400634766, | |
| "logps/rejected": -6.011466026306152, | |
| "loss": 22.4066, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.41492849588394165, | |
| "rewards/margins": 0.07109946012496948, | |
| "rewards/rejected": -0.48602795600891113, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.5726872246696035, | |
| "grad_norm": 139.65293884277344, | |
| "learning_rate": 1.872664943396875e-05, | |
| "logits/chosen": -1.6764265298843384, | |
| "logits/rejected": -1.6785293817520142, | |
| "logps/chosen": -4.107344150543213, | |
| "logps/rejected": -5.6308698654174805, | |
| "loss": 20.0103, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.3305855095386505, | |
| "rewards/margins": 0.11647170782089233, | |
| "rewards/rejected": -0.44705715775489807, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.5758338577721838, | |
| "grad_norm": 147.52713012695312, | |
| "learning_rate": 1.8507484588743025e-05, | |
| "logits/chosen": -1.7002742290496826, | |
| "logits/rejected": -1.7680556774139404, | |
| "logps/chosen": -4.6784772872924805, | |
| "logps/rejected": -5.973324775695801, | |
| "loss": 21.0769, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.3756680190563202, | |
| "rewards/margins": 0.09194694459438324, | |
| "rewards/rejected": -0.4676149785518646, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.578980490874764, | |
| "grad_norm": 71.16407012939453, | |
| "learning_rate": 1.828849983042321e-05, | |
| "logits/chosen": -1.7075554132461548, | |
| "logits/rejected": -1.6953094005584717, | |
| "logps/chosen": -4.460357666015625, | |
| "logps/rejected": -5.521221160888672, | |
| "loss": 21.7677, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.35953736305236816, | |
| "rewards/margins": 0.08199813961982727, | |
| "rewards/rejected": -0.44153547286987305, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.5821271239773442, | |
| "grad_norm": 114.27317810058594, | |
| "learning_rate": 1.8069721581709697e-05, | |
| "logits/chosen": -1.6304935216903687, | |
| "logits/rejected": -1.6967551708221436, | |
| "logps/chosen": -4.526963233947754, | |
| "logps/rejected": -5.7123494148254395, | |
| "loss": 21.5069, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.35851508378982544, | |
| "rewards/margins": 0.07467497885227203, | |
| "rewards/rejected": -0.4331900477409363, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.5852737570799245, | |
| "grad_norm": 71.74990844726562, | |
| "learning_rate": 1.785117624038546e-05, | |
| "logits/chosen": -1.704414963722229, | |
| "logits/rejected": -1.7506616115570068, | |
| "logps/chosen": -5.388034820556641, | |
| "logps/rejected": -6.3465657234191895, | |
| "loss": 21.8977, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.4279704689979553, | |
| "rewards/margins": 0.05819786712527275, | |
| "rewards/rejected": -0.48616838455200195, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.5884203901825047, | |
| "grad_norm": 78.14295196533203, | |
| "learning_rate": 1.763289017613085e-05, | |
| "logits/chosen": -1.6152721643447876, | |
| "logits/rejected": -1.640634536743164, | |
| "logps/chosen": -4.3263750076293945, | |
| "logps/rejected": -5.279467582702637, | |
| "loss": 21.887, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.34328222274780273, | |
| "rewards/margins": 0.07140573114156723, | |
| "rewards/rejected": -0.41468796133995056, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.5915670232850849, | |
| "grad_norm": 219.88279724121094, | |
| "learning_rate": 1.741488972734184e-05, | |
| "logits/chosen": -1.5857679843902588, | |
| "logits/rejected": -1.65940260887146, | |
| "logps/chosen": -4.669988632202148, | |
| "logps/rejected": -6.202586650848389, | |
| "loss": 20.5667, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.3540535271167755, | |
| "rewards/margins": 0.10686901956796646, | |
| "rewards/rejected": -0.46092256903648376, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.5947136563876652, | |
| "grad_norm": 90.00337219238281, | |
| "learning_rate": 1.7197201197952065e-05, | |
| "logits/chosen": -1.5206947326660156, | |
| "logits/rejected": -1.53545343875885, | |
| "logps/chosen": -4.086690902709961, | |
| "logps/rejected": -4.490893363952637, | |
| "loss": 25.9453, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.30406898260116577, | |
| "rewards/margins": 0.034761372953653336, | |
| "rewards/rejected": -0.3388303220272064, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.5978602894902454, | |
| "grad_norm": 79.93099212646484, | |
| "learning_rate": 1.6979850854258938e-05, | |
| "logits/chosen": -1.3608052730560303, | |
| "logits/rejected": -1.4760938882827759, | |
| "logps/chosen": -3.6326985359191895, | |
| "logps/rejected": -5.186118125915527, | |
| "loss": 20.6064, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.2708989083766937, | |
| "rewards/margins": 0.10907317698001862, | |
| "rewards/rejected": -0.37997210025787354, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.6010069225928257, | |
| "grad_norm": 54.11685562133789, | |
| "learning_rate": 1.6762864921754426e-05, | |
| "logits/chosen": -1.3788961172103882, | |
| "logits/rejected": -1.4954605102539062, | |
| "logps/chosen": -3.189054250717163, | |
| "logps/rejected": -4.365990161895752, | |
| "loss": 20.0193, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.23121857643127441, | |
| "rewards/margins": 0.09906688332557678, | |
| "rewards/rejected": -0.3302854597568512, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.604153555695406, | |
| "grad_norm": 78.23949432373047, | |
| "learning_rate": 1.654626958196059e-05, | |
| "logits/chosen": -1.509225606918335, | |
| "logits/rejected": -1.4755313396453857, | |
| "logps/chosen": -4.190049648284912, | |
| "logps/rejected": -5.553238391876221, | |
| "loss": 18.6024, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.3084833025932312, | |
| "rewards/margins": 0.10999338328838348, | |
| "rewards/rejected": -0.4184766709804535, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.6073001887979862, | |
| "grad_norm": 46.66254806518555, | |
| "learning_rate": 1.633009096927062e-05, | |
| "logits/chosen": -1.5157467126846313, | |
| "logits/rejected": -1.6129589080810547, | |
| "logps/chosen": -3.3808016777038574, | |
| "logps/rejected": -4.686802864074707, | |
| "loss": 18.8156, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.26101940870285034, | |
| "rewards/margins": 0.10853584110736847, | |
| "rewards/rejected": -0.3695552349090576, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.6104468219005664, | |
| "grad_norm": 76.67229461669922, | |
| "learning_rate": 1.6114355167795407e-05, | |
| "logits/chosen": -1.507666826248169, | |
| "logits/rejected": -1.642401099205017, | |
| "logps/chosen": -4.4493513107299805, | |
| "logps/rejected": -5.8435235023498535, | |
| "loss": 20.6314, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.32613635063171387, | |
| "rewards/margins": 0.10264672338962555, | |
| "rewards/rejected": -0.42878302931785583, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.6135934550031467, | |
| "grad_norm": 97.02481842041016, | |
| "learning_rate": 1.5899088208216215e-05, | |
| "logits/chosen": -1.501697301864624, | |
| "logits/rejected": -1.594618558883667, | |
| "logps/chosen": -4.284520149230957, | |
| "logps/rejected": -4.852963447570801, | |
| "loss": 26.4688, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.33568352460861206, | |
| "rewards/margins": 0.03864779695868492, | |
| "rewards/rejected": -0.37433135509490967, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.6167400881057269, | |
| "grad_norm": 176.32850646972656, | |
| "learning_rate": 1.568431606464388e-05, | |
| "logits/chosen": -1.595866084098816, | |
| "logits/rejected": -1.6668930053710938, | |
| "logps/chosen": -4.345438480377197, | |
| "logps/rejected": -5.242307662963867, | |
| "loss": 21.0145, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.3100406527519226, | |
| "rewards/margins": 0.0767713412642479, | |
| "rewards/rejected": -0.3868120312690735, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.6198867212083071, | |
| "grad_norm": 76.86431884765625, | |
| "learning_rate": 1.547006465148471e-05, | |
| "logits/chosen": -1.5940501689910889, | |
| "logits/rejected": -1.7789547443389893, | |
| "logps/chosen": -4.4857177734375, | |
| "logps/rejected": -5.875302314758301, | |
| "loss": 21.8847, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.3244941830635071, | |
| "rewards/margins": 0.08251677453517914, | |
| "rewards/rejected": -0.4070109724998474, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.6230333543108874, | |
| "grad_norm": 49.81745147705078, | |
| "learning_rate": 1.5256359820313718e-05, | |
| "logits/chosen": -1.550085425376892, | |
| "logits/rejected": -1.5959933996200562, | |
| "logps/chosen": -3.699030637741089, | |
| "logps/rejected": -4.6470842361450195, | |
| "loss": 20.7306, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.2788650095462799, | |
| "rewards/margins": 0.0799705758690834, | |
| "rewards/rejected": -0.3588356077671051, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.6261799874134676, | |
| "grad_norm": 81.01653289794922, | |
| "learning_rate": 1.5043227356755292e-05, | |
| "logits/chosen": -1.58163321018219, | |
| "logits/rejected": -1.663260817527771, | |
| "logps/chosen": -4.869448661804199, | |
| "logps/rejected": -5.365525245666504, | |
| "loss": 24.1646, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.34130221605300903, | |
| "rewards/margins": 0.04551910609006882, | |
| "rewards/rejected": -0.38682132959365845, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.6293266205160478, | |
| "grad_norm": 101.5945053100586, | |
| "learning_rate": 1.4830692977371985e-05, | |
| "logits/chosen": -1.747009038925171, | |
| "logits/rejected": -1.7761609554290771, | |
| "logps/chosen": -4.585317134857178, | |
| "logps/rejected": -5.033480644226074, | |
| "loss": 23.2309, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.3538682162761688, | |
| "rewards/margins": 0.037090349942445755, | |
| "rewards/rejected": -0.39095860719680786, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.632473253618628, | |
| "grad_norm": 55.57672882080078, | |
| "learning_rate": 1.4618782326561483e-05, | |
| "logits/chosen": -1.7331736087799072, | |
| "logits/rejected": -1.771627426147461, | |
| "logps/chosen": -3.9518864154815674, | |
| "logps/rejected": -4.847538948059082, | |
| "loss": 20.4833, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.2925838530063629, | |
| "rewards/margins": 0.0719093531370163, | |
| "rewards/rejected": -0.3644932210445404, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.6356198867212083, | |
| "grad_norm": 75.53394317626953, | |
| "learning_rate": 1.4407520973462408e-05, | |
| "logits/chosen": -1.7358888387680054, | |
| "logits/rejected": -1.7642987966537476, | |
| "logps/chosen": -4.450674057006836, | |
| "logps/rejected": -5.2704572677612305, | |
| "loss": 22.8124, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.3556494116783142, | |
| "rewards/margins": 0.04838743433356285, | |
| "rewards/rejected": -0.40403684973716736, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.6387665198237885, | |
| "grad_norm": 67.8470230102539, | |
| "learning_rate": 1.4196934408869118e-05, | |
| "logits/chosen": -1.8153152465820312, | |
| "logits/rejected": -1.8065166473388672, | |
| "logps/chosen": -5.316075325012207, | |
| "logps/rejected": -6.770912170410156, | |
| "loss": 21.5925, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.3945319950580597, | |
| "rewards/margins": 0.06610045582056046, | |
| "rewards/rejected": -0.46063241362571716, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.6419131529263687, | |
| "grad_norm": 104.53321075439453, | |
| "learning_rate": 1.3987048042155977e-05, | |
| "logits/chosen": -1.6470744609832764, | |
| "logits/rejected": -1.6989984512329102, | |
| "logps/chosen": -4.787189960479736, | |
| "logps/rejected": -5.5443525314331055, | |
| "loss": 22.5867, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.3872155249118805, | |
| "rewards/margins": 0.05858270451426506, | |
| "rewards/rejected": -0.44579824805259705, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.645059786028949, | |
| "grad_norm": 122.49982452392578, | |
| "learning_rate": 1.377788719821149e-05, | |
| "logits/chosen": -1.6421356201171875, | |
| "logits/rejected": -1.702820062637329, | |
| "logps/chosen": -4.435242652893066, | |
| "logps/rejected": -4.579672336578369, | |
| "loss": 25.1424, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.3478389382362366, | |
| "rewards/margins": 0.0215731430798769, | |
| "rewards/rejected": -0.3694121241569519, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.6482064191315292, | |
| "grad_norm": 145.1405487060547, | |
| "learning_rate": 1.3569477114382568e-05, | |
| "logits/chosen": -1.6365470886230469, | |
| "logits/rejected": -1.6954962015151978, | |
| "logps/chosen": -4.985340595245361, | |
| "logps/rejected": -5.898791313171387, | |
| "loss": 21.7627, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.385195255279541, | |
| "rewards/margins": 0.05228766053915024, | |
| "rewards/rejected": -0.43748289346694946, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.6513530522341096, | |
| "grad_norm": 82.04701232910156, | |
| "learning_rate": 1.3361842937429436e-05, | |
| "logits/chosen": -1.6654088497161865, | |
| "logits/rejected": -1.732187032699585, | |
| "logps/chosen": -4.262317180633545, | |
| "logps/rejected": -5.410677909851074, | |
| "loss": 20.2359, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.3416784703731537, | |
| "rewards/margins": 0.08638517558574677, | |
| "rewards/rejected": -0.42806363105773926, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.6544996853366898, | |
| "grad_norm": 95.95136260986328, | |
| "learning_rate": 1.3155009720491368e-05, | |
| "logits/chosen": -1.5801721811294556, | |
| "logits/rejected": -1.5603923797607422, | |
| "logps/chosen": -5.278650760650635, | |
| "logps/rejected": -6.190367698669434, | |
| "loss": 22.4881, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.3998781740665436, | |
| "rewards/margins": 0.06602592766284943, | |
| "rewards/rejected": -0.4659040868282318, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.65764631843927, | |
| "grad_norm": 60.0530891418457, | |
| "learning_rate": 1.2949002420063828e-05, | |
| "logits/chosen": -1.6326820850372314, | |
| "logits/rejected": -1.720810890197754, | |
| "logps/chosen": -4.082489967346191, | |
| "logps/rejected": -5.006215572357178, | |
| "loss": 21.0105, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.32317107915878296, | |
| "rewards/margins": 0.07444654405117035, | |
| "rewards/rejected": -0.3976176679134369, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.6607929515418502, | |
| "grad_norm": 221.81906127929688, | |
| "learning_rate": 1.2743845892987183e-05, | |
| "logits/chosen": -1.6526765823364258, | |
| "logits/rejected": -1.697488784790039, | |
| "logps/chosen": -4.53380823135376, | |
| "logps/rejected": -5.771850109100342, | |
| "loss": 23.2634, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.34263211488723755, | |
| "rewards/margins": 0.07287438213825226, | |
| "rewards/rejected": -0.4155064523220062, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.6639395846444305, | |
| "grad_norm": 137.2283172607422, | |
| "learning_rate": 1.2539564893447489e-05, | |
| "logits/chosen": -1.631956696510315, | |
| "logits/rejected": -1.654306173324585, | |
| "logps/chosen": -4.1559600830078125, | |
| "logps/rejected": -5.033182621002197, | |
| "loss": 22.6183, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.32424792647361755, | |
| "rewards/margins": 0.06618380546569824, | |
| "rewards/rejected": -0.3904317319393158, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.6670862177470107, | |
| "grad_norm": 72.95520782470703, | |
| "learning_rate": 1.2336184069989663e-05, | |
| "logits/chosen": -1.670440435409546, | |
| "logits/rejected": -1.6872297525405884, | |
| "logps/chosen": -3.9552032947540283, | |
| "logps/rejected": -5.303035259246826, | |
| "loss": 19.5681, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.31223589181900024, | |
| "rewards/margins": 0.09164074063301086, | |
| "rewards/rejected": -0.4038766026496887, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.6702328508495909, | |
| "grad_norm": 90.91898345947266, | |
| "learning_rate": 1.2133727962543356e-05, | |
| "logits/chosen": -1.6696465015411377, | |
| "logits/rejected": -1.6963016986846924, | |
| "logps/chosen": -4.434679985046387, | |
| "logps/rejected": -5.158357620239258, | |
| "loss": 21.8675, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.3489730954170227, | |
| "rewards/margins": 0.05557180196046829, | |
| "rewards/rejected": -0.4045449197292328, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.6733794839521712, | |
| "grad_norm": 185.79261779785156, | |
| "learning_rate": 1.193222099946202e-05, | |
| "logits/chosen": -1.6571991443634033, | |
| "logits/rejected": -1.7073132991790771, | |
| "logps/chosen": -4.607517242431641, | |
| "logps/rejected": -5.376668930053711, | |
| "loss": 22.3462, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.35802438855171204, | |
| "rewards/margins": 0.0643647164106369, | |
| "rewards/rejected": -0.42238911986351013, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.6765261170547514, | |
| "grad_norm": 71.50703430175781, | |
| "learning_rate": 1.1731687494575319e-05, | |
| "logits/chosen": -1.585889458656311, | |
| "logits/rejected": -1.6507800817489624, | |
| "logps/chosen": -4.845611572265625, | |
| "logps/rejected": -6.422255516052246, | |
| "loss": 18.5681, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.37992939352989197, | |
| "rewards/margins": 0.10727685689926147, | |
| "rewards/rejected": -0.48720628023147583, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.6796727501573316, | |
| "grad_norm": 210.3772430419922, | |
| "learning_rate": 1.153215164425547e-05, | |
| "logits/chosen": -1.5637327432632446, | |
| "logits/rejected": -1.628791093826294, | |
| "logps/chosen": -4.643498420715332, | |
| "logps/rejected": -5.90508508682251, | |
| "loss": 22.429, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.3530879020690918, | |
| "rewards/margins": 0.07370196282863617, | |
| "rewards/rejected": -0.42678990960121155, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.6828193832599119, | |
| "grad_norm": 324.6168212890625, | |
| "learning_rate": 1.133363752449768e-05, | |
| "logits/chosen": -1.6127498149871826, | |
| "logits/rejected": -1.5895841121673584, | |
| "logps/chosen": -3.8858344554901123, | |
| "logps/rejected": -5.141265392303467, | |
| "loss": 18.9867, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.31075209379196167, | |
| "rewards/margins": 0.10046511888504028, | |
| "rewards/rejected": -0.41121721267700195, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.6859660163624921, | |
| "grad_norm": 269.12744140625, | |
| "learning_rate": 1.1136169088015177e-05, | |
| "logits/chosen": -1.5152666568756104, | |
| "logits/rejected": -1.5772387981414795, | |
| "logps/chosen": -4.37540864944458, | |
| "logps/rejected": -5.073463439941406, | |
| "loss": 22.4614, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.3488185405731201, | |
| "rewards/margins": 0.05801800638437271, | |
| "rewards/rejected": -0.40683650970458984, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.6891126494650723, | |
| "grad_norm": 407.53985595703125, | |
| "learning_rate": 1.0939770161349015e-05, | |
| "logits/chosen": -1.604278802871704, | |
| "logits/rejected": -1.6394538879394531, | |
| "logps/chosen": -4.725668907165527, | |
| "logps/rejected": -6.037966728210449, | |
| "loss": 23.0495, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.3864028751850128, | |
| "rewards/margins": 0.09246636927127838, | |
| "rewards/rejected": -0.4788691997528076, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.6922592825676526, | |
| "grad_norm": 65.52562713623047, | |
| "learning_rate": 1.0744464441993205e-05, | |
| "logits/chosen": -1.4906436204910278, | |
| "logits/rejected": -1.570569634437561, | |
| "logps/chosen": -4.404895782470703, | |
| "logps/rejected": -5.454612731933594, | |
| "loss": 21.9146, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.3443445563316345, | |
| "rewards/margins": 0.07481996715068817, | |
| "rewards/rejected": -0.4191645085811615, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.6954059156702328, | |
| "grad_norm": 60.899654388427734, | |
| "learning_rate": 1.0550275495535382e-05, | |
| "logits/chosen": -1.5062484741210938, | |
| "logits/rejected": -1.5998207330703735, | |
| "logps/chosen": -5.046140193939209, | |
| "logps/rejected": -6.212726593017578, | |
| "loss": 22.0906, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.3926524817943573, | |
| "rewards/margins": 0.08822645246982574, | |
| "rewards/rejected": -0.48087891936302185, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.6985525487728131, | |
| "grad_norm": 85.36582946777344, | |
| "learning_rate": 1.0357226752813343e-05, | |
| "logits/chosen": -1.48141348361969, | |
| "logits/rejected": -1.532138705253601, | |
| "logps/chosen": -4.955922603607178, | |
| "logps/rejected": -6.1522979736328125, | |
| "loss": 19.2663, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.3955458402633667, | |
| "rewards/margins": 0.09683366119861603, | |
| "rewards/rejected": -0.4923795163631439, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.7016991818753934, | |
| "grad_norm": 92.5035171508789, | |
| "learning_rate": 1.0165341507087922e-05, | |
| "logits/chosen": -1.4898306131362915, | |
| "logits/rejected": -1.589817762374878, | |
| "logps/chosen": -4.877270221710205, | |
| "logps/rejected": -6.326567649841309, | |
| "loss": 21.0751, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.3771550953388214, | |
| "rewards/margins": 0.10272278636693954, | |
| "rewards/rejected": -0.47987785935401917, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.7048458149779736, | |
| "grad_norm": 100.18026733398438, | |
| "learning_rate": 9.974642911232413e-06, | |
| "logits/chosen": -1.5176981687545776, | |
| "logits/rejected": -1.5406978130340576, | |
| "logps/chosen": -5.319207191467285, | |
| "logps/rejected": -6.242737770080566, | |
| "loss": 20.9524, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.4210137724876404, | |
| "rewards/margins": 0.07255946844816208, | |
| "rewards/rejected": -0.49357327818870544, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.7079924480805538, | |
| "grad_norm": 176.3753662109375, | |
| "learning_rate": 9.785153974938912e-06, | |
| "logits/chosen": -1.5830824375152588, | |
| "logits/rejected": -1.6101982593536377, | |
| "logps/chosen": -5.879128456115723, | |
| "logps/rejected": -6.807085990905762, | |
| "loss": 22.111, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.44119253754615784, | |
| "rewards/margins": 0.07570262253284454, | |
| "rewards/rejected": -0.5168951749801636, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.7111390811831341, | |
| "grad_norm": 67.40308380126953, | |
| "learning_rate": 9.596897561942026e-06, | |
| "logits/chosen": -1.463176965713501, | |
| "logits/rejected": -1.4804832935333252, | |
| "logps/chosen": -4.481048107147217, | |
| "logps/rejected": -5.287797451019287, | |
| "loss": 22.1994, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.3575854003429413, | |
| "rewards/margins": 0.06364957243204117, | |
| "rewards/rejected": -0.42123493552207947, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.7142857142857143, | |
| "grad_norm": 93.39257049560547, | |
| "learning_rate": 9.409896387260082e-06, | |
| "logits/chosen": -1.4179964065551758, | |
| "logits/rejected": -1.4655730724334717, | |
| "logps/chosen": -4.708760738372803, | |
| "logps/rejected": -6.217686653137207, | |
| "loss": 21.4161, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.37350720167160034, | |
| "rewards/margins": 0.10105752944946289, | |
| "rewards/rejected": -0.47456473112106323, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.7174323473882945, | |
| "grad_norm": 97.41583251953125, | |
| "learning_rate": 9.224173014454372e-06, | |
| "logits/chosen": -1.4397246837615967, | |
| "logits/rejected": -1.4766523838043213, | |
| "logps/chosen": -4.817109107971191, | |
| "logps/rejected": -6.214907169342041, | |
| "loss": 22.7104, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.384985089302063, | |
| "rewards/margins": 0.0952010303735733, | |
| "rewards/rejected": -0.4801861345767975, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.7205789804908748, | |
| "grad_norm": 103.4198989868164, | |
| "learning_rate": 9.039749852906606e-06, | |
| "logits/chosen": -1.368666648864746, | |
| "logits/rejected": -1.4239342212677002, | |
| "logps/chosen": -4.382673740386963, | |
| "logps/rejected": -5.262811183929443, | |
| "loss": 20.8727, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.35242724418640137, | |
| "rewards/margins": 0.075123131275177, | |
| "rewards/rejected": -0.42755040526390076, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.723725613593455, | |
| "grad_norm": 131.38589477539062, | |
| "learning_rate": 8.856649155115002e-06, | |
| "logits/chosen": -1.409711241722107, | |
| "logits/rejected": -1.455235481262207, | |
| "logps/chosen": -4.550191402435303, | |
| "logps/rejected": -5.52540922164917, | |
| "loss": 23.0103, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.3620757460594177, | |
| "rewards/margins": 0.06903600692749023, | |
| "rewards/rejected": -0.43111175298690796, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.7268722466960352, | |
| "grad_norm": 60.0385627746582, | |
| "learning_rate": 8.674893014009311e-06, | |
| "logits/chosen": -1.3705095052719116, | |
| "logits/rejected": -1.4764083623886108, | |
| "logps/chosen": -4.423483848571777, | |
| "logps/rejected": -5.486600875854492, | |
| "loss": 21.3505, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.3566300570964813, | |
| "rewards/margins": 0.07945708185434341, | |
| "rewards/rejected": -0.4360871911048889, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.7300188797986155, | |
| "grad_norm": 80.497802734375, | |
| "learning_rate": 8.494503360285084e-06, | |
| "logits/chosen": -1.406087875366211, | |
| "logits/rejected": -1.5597848892211914, | |
| "logps/chosen": -4.28043270111084, | |
| "logps/rejected": -5.639766216278076, | |
| "loss": 21.9094, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.3276395797729492, | |
| "rewards/margins": 0.07139433920383453, | |
| "rewards/rejected": -0.39903393387794495, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.7331655129011957, | |
| "grad_norm": 106.78560638427734, | |
| "learning_rate": 8.315501959757506e-06, | |
| "logits/chosen": -1.4479920864105225, | |
| "logits/rejected": -1.530386209487915, | |
| "logps/chosen": -5.356269836425781, | |
| "logps/rejected": -6.295357704162598, | |
| "loss": 20.2622, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.39192715287208557, | |
| "rewards/margins": 0.07843243330717087, | |
| "rewards/rejected": -0.47035956382751465, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.7363121460037759, | |
| "grad_norm": 70.2252426147461, | |
| "learning_rate": 8.137910410735119e-06, | |
| "logits/chosen": -1.3913201093673706, | |
| "logits/rejected": -1.5211797952651978, | |
| "logps/chosen": -4.186515808105469, | |
| "logps/rejected": -5.630705833435059, | |
| "loss": 19.5955, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.3086017966270447, | |
| "rewards/margins": 0.1026659831404686, | |
| "rewards/rejected": -0.4112677574157715, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.7394587791063562, | |
| "grad_norm": 192.9811553955078, | |
| "learning_rate": 7.961750141413811e-06, | |
| "logits/chosen": -1.4113714694976807, | |
| "logits/rejected": -1.4863709211349487, | |
| "logps/chosen": -4.043957710266113, | |
| "logps/rejected": -4.903926849365234, | |
| "loss": 21.1766, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.30225270986557007, | |
| "rewards/margins": 0.0713002160191536, | |
| "rewards/rejected": -0.3735528886318207, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.7426054122089364, | |
| "grad_norm": 120.66477966308594, | |
| "learning_rate": 7.787042407291236e-06, | |
| "logits/chosen": -1.4459470510482788, | |
| "logits/rejected": -1.4732497930526733, | |
| "logps/chosen": -4.194180488586426, | |
| "logps/rejected": -5.103634834289551, | |
| "loss": 21.7414, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.33040323853492737, | |
| "rewards/margins": 0.07123108208179474, | |
| "rewards/rejected": -0.4016343653202057, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.7457520453115167, | |
| "grad_norm": 76.2223892211914, | |
| "learning_rate": 7.613808288602185e-06, | |
| "logits/chosen": -1.3101516962051392, | |
| "logits/rejected": -1.410070776939392, | |
| "logps/chosen": -3.897928237915039, | |
| "logps/rejected": -4.853459358215332, | |
| "loss": 20.4936, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.30166110396385193, | |
| "rewards/margins": 0.07222743332386017, | |
| "rewards/rejected": -0.3738885223865509, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.748898678414097, | |
| "grad_norm": 71.2408676147461, | |
| "learning_rate": 7.442068687774983e-06, | |
| "logits/chosen": -1.3900350332260132, | |
| "logits/rejected": -1.4306429624557495, | |
| "logps/chosen": -4.03500509262085, | |
| "logps/rejected": -4.971550941467285, | |
| "loss": 20.8514, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.31287750601768494, | |
| "rewards/margins": 0.07024455070495605, | |
| "rewards/rejected": -0.383122056722641, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.7520453115166772, | |
| "grad_norm": 174.92535400390625, | |
| "learning_rate": 7.271844326909465e-06, | |
| "logits/chosen": -1.3968006372451782, | |
| "logits/rejected": -1.3997862339019775, | |
| "logps/chosen": -4.94242000579834, | |
| "logps/rejected": -5.543642520904541, | |
| "loss": 23.6965, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.36712104082107544, | |
| "rewards/margins": 0.041298940777778625, | |
| "rewards/rejected": -0.40841999650001526, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.7551919446192574, | |
| "grad_norm": 83.12405395507812, | |
| "learning_rate": 7.1031557452765934e-06, | |
| "logits/chosen": -1.4155142307281494, | |
| "logits/rejected": -1.4555690288543701, | |
| "logps/chosen": -3.987143039703369, | |
| "logps/rejected": -5.240988731384277, | |
| "loss": 20.4557, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.312110960483551, | |
| "rewards/margins": 0.08850479125976562, | |
| "rewards/rejected": -0.40061575174331665, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.7583385777218377, | |
| "grad_norm": 82.25894165039062, | |
| "learning_rate": 6.936023296840211e-06, | |
| "logits/chosen": -1.3227570056915283, | |
| "logits/rejected": -1.4542601108551025, | |
| "logps/chosen": -4.520358562469482, | |
| "logps/rejected": -5.628200531005859, | |
| "loss": 21.0717, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.3516823947429657, | |
| "rewards/margins": 0.07068557292222977, | |
| "rewards/rejected": -0.42236796021461487, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.7614852108244179, | |
| "grad_norm": 63.93009567260742, | |
| "learning_rate": 6.770467147801152e-06, | |
| "logits/chosen": -1.3352692127227783, | |
| "logits/rejected": -1.4765124320983887, | |
| "logps/chosen": -3.903353452682495, | |
| "logps/rejected": -5.777923107147217, | |
| "loss": 18.1176, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.30418699979782104, | |
| "rewards/margins": 0.1330038160085678, | |
| "rewards/rejected": -0.43719083070755005, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.7646318439269981, | |
| "grad_norm": 123.6090087890625, | |
| "learning_rate": 6.606507274163949e-06, | |
| "logits/chosen": -1.4196144342422485, | |
| "logits/rejected": -1.5160802602767944, | |
| "logps/chosen": -4.3763604164123535, | |
| "logps/rejected": -5.507956504821777, | |
| "loss": 21.3593, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.3455001711845398, | |
| "rewards/margins": 0.08908528089523315, | |
| "rewards/rejected": -0.43458548188209534, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.7677784770295784, | |
| "grad_norm": 79.51527404785156, | |
| "learning_rate": 6.444163459326569e-06, | |
| "logits/chosen": -1.3841816186904907, | |
| "logits/rejected": -1.44673752784729, | |
| "logps/chosen": -4.642246723175049, | |
| "logps/rejected": -5.952216625213623, | |
| "loss": 20.2826, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.37046322226524353, | |
| "rewards/margins": 0.10095451772212982, | |
| "rewards/rejected": -0.47141775488853455, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.7709251101321586, | |
| "grad_norm": 115.33991241455078, | |
| "learning_rate": 6.283455291693303e-06, | |
| "logits/chosen": -1.2804498672485352, | |
| "logits/rejected": -1.336126446723938, | |
| "logps/chosen": -4.530810356140137, | |
| "logps/rejected": -5.714901924133301, | |
| "loss": 23.5811, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.3543975353240967, | |
| "rewards/margins": 0.07916755974292755, | |
| "rewards/rejected": -0.43356508016586304, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.7740717432347388, | |
| "grad_norm": 102.68350219726562, | |
| "learning_rate": 6.124402162311274e-06, | |
| "logits/chosen": -1.3455007076263428, | |
| "logits/rejected": -1.3819594383239746, | |
| "logps/chosen": -4.560150146484375, | |
| "logps/rejected": -5.909863471984863, | |
| "loss": 21.4806, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.36466413736343384, | |
| "rewards/margins": 0.07453545182943344, | |
| "rewards/rejected": -0.4391995966434479, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.777218376337319, | |
| "grad_norm": 78.0007553100586, | |
| "learning_rate": 5.9670232625306955e-06, | |
| "logits/chosen": -1.3267484903335571, | |
| "logits/rejected": -1.3938989639282227, | |
| "logps/chosen": -4.1908979415893555, | |
| "logps/rejected": -4.819875240325928, | |
| "loss": 24.9323, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.3211560845375061, | |
| "rewards/margins": 0.054157011210918427, | |
| "rewards/rejected": -0.3753131031990051, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.7803650094398993, | |
| "grad_norm": 910.2023315429688, | |
| "learning_rate": 5.81133758168922e-06, | |
| "logits/chosen": -1.4007585048675537, | |
| "logits/rejected": -1.4542076587677002, | |
| "logps/chosen": -5.091724872589111, | |
| "logps/rejected": -6.444447994232178, | |
| "loss": 20.9318, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.36028310656547546, | |
| "rewards/margins": 0.09358057379722595, | |
| "rewards/rejected": -0.4538637101650238, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.7835116425424795, | |
| "grad_norm": 68.25672912597656, | |
| "learning_rate": 5.6573639048207315e-06, | |
| "logits/chosen": -1.3604391813278198, | |
| "logits/rejected": -1.3182973861694336, | |
| "logps/chosen": -4.621526718139648, | |
| "logps/rejected": -5.245944023132324, | |
| "loss": 21.9955, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.32450687885284424, | |
| "rewards/margins": 0.07323630154132843, | |
| "rewards/rejected": -0.3977431654930115, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.7866582756450597, | |
| "grad_norm": 106.51322937011719, | |
| "learning_rate": 5.5051208103887025e-06, | |
| "logits/chosen": -1.3608815670013428, | |
| "logits/rejected": -1.4448637962341309, | |
| "logps/chosen": -4.045924663543701, | |
| "logps/rejected": -5.57630729675293, | |
| "loss": 20.889, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.3083491623401642, | |
| "rewards/margins": 0.09919731318950653, | |
| "rewards/rejected": -0.40754643082618713, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.78980490874764, | |
| "grad_norm": 70.59749603271484, | |
| "learning_rate": 5.354626668044535e-06, | |
| "logits/chosen": -1.3460859060287476, | |
| "logits/rejected": -1.412706732749939, | |
| "logps/chosen": -3.734891891479492, | |
| "logps/rejected": -4.818475246429443, | |
| "loss": 21.0468, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.2917167842388153, | |
| "rewards/margins": 0.07943135499954224, | |
| "rewards/rejected": -0.37114813923835754, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.7929515418502202, | |
| "grad_norm": 83.2120361328125, | |
| "learning_rate": 5.205899636411078e-06, | |
| "logits/chosen": -1.3329652547836304, | |
| "logits/rejected": -1.3952248096466064, | |
| "logps/chosen": -4.460053443908691, | |
| "logps/rejected": -4.993377685546875, | |
| "loss": 25.4182, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.34508955478668213, | |
| "rewards/margins": 0.03861779719591141, | |
| "rewards/rejected": -0.38370734453201294, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.7960981749528006, | |
| "grad_norm": 74.94086456298828, | |
| "learning_rate": 5.058957660891613e-06, | |
| "logits/chosen": -1.353829264640808, | |
| "logits/rejected": -1.36537766456604, | |
| "logps/chosen": -3.8537967205047607, | |
| "logps/rejected": -4.86336612701416, | |
| "loss": 21.0046, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.29704272747039795, | |
| "rewards/margins": 0.07927833497524261, | |
| "rewards/rejected": -0.376321017742157, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.7992448080553808, | |
| "grad_norm": 68.53548431396484, | |
| "learning_rate": 4.913818471504552e-06, | |
| "logits/chosen": -1.3891483545303345, | |
| "logits/rejected": -1.4956327676773071, | |
| "logps/chosen": -3.83349609375, | |
| "logps/rejected": -5.111277103424072, | |
| "loss": 20.258, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.299319326877594, | |
| "rewards/margins": 0.09995778650045395, | |
| "rewards/rejected": -0.3992771506309509, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.802391441157961, | |
| "grad_norm": 161.29922485351562, | |
| "learning_rate": 4.770499580744125e-06, | |
| "logits/chosen": -1.3398183584213257, | |
| "logits/rejected": -1.3453642129898071, | |
| "logps/chosen": -3.9315247535705566, | |
| "logps/rejected": -4.841611862182617, | |
| "loss": 22.4824, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.30572158098220825, | |
| "rewards/margins": 0.06097061559557915, | |
| "rewards/rejected": -0.3666921854019165, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.8055380742605412, | |
| "grad_norm": 68.45879364013672, | |
| "learning_rate": 4.629018281467357e-06, | |
| "logits/chosen": -1.297154188156128, | |
| "logits/rejected": -1.338921070098877, | |
| "logps/chosen": -3.7794177532196045, | |
| "logps/rejected": -4.509110927581787, | |
| "loss": 21.658, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.2888321876525879, | |
| "rewards/margins": 0.05916588753461838, | |
| "rewards/rejected": -0.3479980528354645, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.8086847073631215, | |
| "grad_norm": 74.77375793457031, | |
| "learning_rate": 4.489391644807462e-06, | |
| "logits/chosen": -1.4385647773742676, | |
| "logits/rejected": -1.5144340991973877, | |
| "logps/chosen": -3.69215726852417, | |
| "logps/rejected": -4.667183876037598, | |
| "loss": 21.0338, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.2842097282409668, | |
| "rewards/margins": 0.07265909761190414, | |
| "rewards/rejected": -0.35686883330345154, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.8118313404657017, | |
| "grad_norm": 78.63387298583984, | |
| "learning_rate": 4.351636518114091e-06, | |
| "logits/chosen": -1.3093000650405884, | |
| "logits/rejected": -1.3893928527832031, | |
| "logps/chosen": -3.599902629852295, | |
| "logps/rejected": -4.570587635040283, | |
| "loss": 22.1635, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.2749050259590149, | |
| "rewards/margins": 0.08025064319372177, | |
| "rewards/rejected": -0.3551556468009949, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.8149779735682819, | |
| "grad_norm": 78.53893280029297, | |
| "learning_rate": 4.215769522920487e-06, | |
| "logits/chosen": -1.2443653345108032, | |
| "logits/rejected": -1.3605782985687256, | |
| "logps/chosen": -3.2713770866394043, | |
| "logps/rejected": -4.569630146026611, | |
| "loss": 20.9369, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.24853453040122986, | |
| "rewards/margins": 0.10017738491296768, | |
| "rewards/rejected": -0.34871190786361694, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.8181246066708622, | |
| "grad_norm": 82.4554672241211, | |
| "learning_rate": 4.0818070529379715e-06, | |
| "logits/chosen": -1.383690357208252, | |
| "logits/rejected": -1.4704560041427612, | |
| "logps/chosen": -4.524319171905518, | |
| "logps/rejected": -5.7077460289001465, | |
| "loss": 21.9118, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.316571444272995, | |
| "rewards/margins": 0.0641409307718277, | |
| "rewards/rejected": -0.3807123601436615, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.8212712397734424, | |
| "grad_norm": 71.1880111694336, | |
| "learning_rate": 3.949765272077843e-06, | |
| "logits/chosen": -1.3107343912124634, | |
| "logits/rejected": -1.3561115264892578, | |
| "logps/chosen": -3.846195936203003, | |
| "logps/rejected": -4.79428768157959, | |
| "loss": 21.0994, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.27155357599258423, | |
| "rewards/margins": 0.07163957506418228, | |
| "rewards/rejected": -0.3431931734085083, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.8244178728760226, | |
| "grad_norm": 50.073204040527344, | |
| "learning_rate": 3.819660112501053e-06, | |
| "logits/chosen": -1.2764497995376587, | |
| "logits/rejected": -1.3517284393310547, | |
| "logps/chosen": -3.5745315551757812, | |
| "logps/rejected": -4.921723365783691, | |
| "loss": 19.6469, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.27279648184776306, | |
| "rewards/margins": 0.1019618958234787, | |
| "rewards/rejected": -0.37475839257240295, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.8275645059786029, | |
| "grad_norm": 83.62207794189453, | |
| "learning_rate": 3.6915072726958514e-06, | |
| "logits/chosen": -1.2466180324554443, | |
| "logits/rejected": -1.2861813306808472, | |
| "logps/chosen": -3.430490016937256, | |
| "logps/rejected": -4.824821949005127, | |
| "loss": 20.5161, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.2643739581108093, | |
| "rewards/margins": 0.1028999462723732, | |
| "rewards/rejected": -0.3672739565372467, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.8307111390811831, | |
| "grad_norm": 76.6629638671875, | |
| "learning_rate": 3.5653222155835686e-06, | |
| "logits/chosen": -1.2766977548599243, | |
| "logits/rejected": -1.3114259243011475, | |
| "logps/chosen": -4.222517967224121, | |
| "logps/rejected": -5.029845714569092, | |
| "loss": 22.1218, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.3145274221897125, | |
| "rewards/margins": 0.06165830045938492, | |
| "rewards/rejected": -0.37618574500083923, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.8338577721837633, | |
| "grad_norm": 159.4115447998047, | |
| "learning_rate": 3.4411201666529003e-06, | |
| "logits/chosen": -1.3758924007415771, | |
| "logits/rejected": -1.4244683980941772, | |
| "logps/chosen": -4.457423210144043, | |
| "logps/rejected": -5.342848300933838, | |
| "loss": 23.3834, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.31679028272628784, | |
| "rewards/margins": 0.06267707049846649, | |
| "rewards/rejected": -0.3794673979282379, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.8370044052863436, | |
| "grad_norm": 56.71870803833008, | |
| "learning_rate": 3.3189161121227564e-06, | |
| "logits/chosen": -1.3166803121566772, | |
| "logits/rejected": -1.385522723197937, | |
| "logps/chosen": -3.8323776721954346, | |
| "logps/rejected": -4.732277870178223, | |
| "loss": 23.3384, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.2978932559490204, | |
| "rewards/margins": 0.0644349679350853, | |
| "rewards/rejected": -0.3623282313346863, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.8401510383889238, | |
| "grad_norm": 66.62996673583984, | |
| "learning_rate": 3.198724797134074e-06, | |
| "logits/chosen": -1.2822662591934204, | |
| "logits/rejected": -1.4124181270599365, | |
| "logps/chosen": -3.9724369049072266, | |
| "logps/rejected": -5.0466437339782715, | |
| "loss": 22.4903, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.2994682192802429, | |
| "rewards/margins": 0.0788046196103096, | |
| "rewards/rejected": -0.3782728910446167, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.8432976714915041, | |
| "grad_norm": 70.8177261352539, | |
| "learning_rate": 3.080560723970616e-06, | |
| "logits/chosen": -1.2813329696655273, | |
| "logits/rejected": -1.3586981296539307, | |
| "logps/chosen": -3.6214439868927, | |
| "logps/rejected": -4.637081623077393, | |
| "loss": 20.5515, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.28146275877952576, | |
| "rewards/margins": 0.07804764062166214, | |
| "rewards/rejected": -0.3595103919506073, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.8464443045940844, | |
| "grad_norm": 64.40753173828125, | |
| "learning_rate": 2.96443815030917e-06, | |
| "logits/chosen": -1.3396605253219604, | |
| "logits/rejected": -1.4255945682525635, | |
| "logps/chosen": -3.604154586791992, | |
| "logps/rejected": -4.95128059387207, | |
| "loss": 20.7037, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.2776135206222534, | |
| "rewards/margins": 0.09353432059288025, | |
| "rewards/rejected": -0.37114784121513367, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.8495909376966646, | |
| "grad_norm": 93.99842071533203, | |
| "learning_rate": 2.850371087499195e-06, | |
| "logits/chosen": -1.381260633468628, | |
| "logits/rejected": -1.4631612300872803, | |
| "logps/chosen": -4.883763790130615, | |
| "logps/rejected": -6.07845401763916, | |
| "loss": 21.0858, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.3591059148311615, | |
| "rewards/margins": 0.09570769965648651, | |
| "rewards/rejected": -0.4548136591911316, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.8527375707992448, | |
| "grad_norm": 62.075279235839844, | |
| "learning_rate": 2.7383732988722057e-06, | |
| "logits/chosen": -1.3089946508407593, | |
| "logits/rejected": -1.3634613752365112, | |
| "logps/chosen": -3.7724010944366455, | |
| "logps/rejected": -4.929832458496094, | |
| "loss": 19.0202, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.273120641708374, | |
| "rewards/margins": 0.09602681547403336, | |
| "rewards/rejected": -0.36914747953414917, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.8558842039018251, | |
| "grad_norm": 80.0210189819336, | |
| "learning_rate": 2.6284582980811136e-06, | |
| "logits/chosen": -1.4461333751678467, | |
| "logits/rejected": -1.370339035987854, | |
| "logps/chosen": -4.136780738830566, | |
| "logps/rejected": -5.008397579193115, | |
| "loss": 23.5672, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.3027392327785492, | |
| "rewards/margins": 0.062295325100421906, | |
| "rewards/rejected": -0.3650345206260681, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.8590308370044053, | |
| "grad_norm": 169.91099548339844, | |
| "learning_rate": 2.5206393474696422e-06, | |
| "logits/chosen": -1.2922241687774658, | |
| "logits/rejected": -1.3685882091522217, | |
| "logps/chosen": -3.8860459327697754, | |
| "logps/rejected": -4.820228099822998, | |
| "loss": 20.1345, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.2878992557525635, | |
| "rewards/margins": 0.07816118001937866, | |
| "rewards/rejected": -0.36606043577194214, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.8621774701069855, | |
| "grad_norm": 291.87542724609375, | |
| "learning_rate": 2.4149294564721146e-06, | |
| "logits/chosen": -1.390933632850647, | |
| "logits/rejected": -1.477757215499878, | |
| "logps/chosen": -4.5947346687316895, | |
| "logps/rejected": -5.662859916687012, | |
| "loss": 22.1173, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.32581329345703125, | |
| "rewards/margins": 0.0882103443145752, | |
| "rewards/rejected": -0.4140236973762512, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.8653241032095658, | |
| "grad_norm": 50.774810791015625, | |
| "learning_rate": 2.3113413800437145e-06, | |
| "logits/chosen": -1.3678381443023682, | |
| "logits/rejected": -1.4147788286209106, | |
| "logps/chosen": -4.411424160003662, | |
| "logps/rejected": -5.547976970672607, | |
| "loss": 20.419, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.3136950135231018, | |
| "rewards/margins": 0.08119923621416092, | |
| "rewards/rejected": -0.3948942720890045, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.868470736312146, | |
| "grad_norm": 75.1661605834961, | |
| "learning_rate": 2.2098876171215e-06, | |
| "logits/chosen": -1.2949163913726807, | |
| "logits/rejected": -1.4591166973114014, | |
| "logps/chosen": -3.913958787918091, | |
| "logps/rejected": -4.945563316345215, | |
| "loss": 20.5075, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.30175596475601196, | |
| "rewards/margins": 0.09277000278234482, | |
| "rewards/rejected": -0.394525945186615, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.8716173694147262, | |
| "grad_norm": 116.18523406982422, | |
| "learning_rate": 2.110580409116261e-06, | |
| "logits/chosen": -1.3234283924102783, | |
| "logits/rejected": -1.3651349544525146, | |
| "logps/chosen": -4.782530307769775, | |
| "logps/rejected": -5.800885200500488, | |
| "loss": 22.8406, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.3511677384376526, | |
| "rewards/margins": 0.07397367060184479, | |
| "rewards/rejected": -0.4251413345336914, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.8747640025173065, | |
| "grad_norm": 145.46861267089844, | |
| "learning_rate": 2.013431738435465e-06, | |
| "logits/chosen": -1.3332188129425049, | |
| "logits/rejected": -1.4134724140167236, | |
| "logps/chosen": -4.268718242645264, | |
| "logps/rejected": -5.433601379394531, | |
| "loss": 22.5056, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.3332800269126892, | |
| "rewards/margins": 0.07072637230157852, | |
| "rewards/rejected": -0.4040064215660095, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.8779106356198867, | |
| "grad_norm": 117.83720397949219, | |
| "learning_rate": 1.9184533270374928e-06, | |
| "logits/chosen": -1.3927792310714722, | |
| "logits/rejected": -1.4590123891830444, | |
| "logps/chosen": -4.519114017486572, | |
| "logps/rejected": -5.810807228088379, | |
| "loss": 21.2018, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.34003710746765137, | |
| "rewards/margins": 0.08822458237409592, | |
| "rewards/rejected": -0.4282616972923279, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.8810572687224669, | |
| "grad_norm": 128.75563049316406, | |
| "learning_rate": 1.8256566350172211e-06, | |
| "logits/chosen": -1.4642970561981201, | |
| "logits/rejected": -1.56011962890625, | |
| "logps/chosen": -5.124087810516357, | |
| "logps/rejected": -6.271437168121338, | |
| "loss": 20.9824, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.34806352853775024, | |
| "rewards/margins": 0.0969148576259613, | |
| "rewards/rejected": -0.44497838616371155, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.8842039018250472, | |
| "grad_norm": 88.87577056884766, | |
| "learning_rate": 1.7350528592232962e-06, | |
| "logits/chosen": -1.3359493017196655, | |
| "logits/rejected": -1.4811887741088867, | |
| "logps/chosen": -4.525036811828613, | |
| "logps/rejected": -5.623012542724609, | |
| "loss": 22.1104, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.3581879138946533, | |
| "rewards/margins": 0.07608196139335632, | |
| "rewards/rejected": -0.43426984548568726, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.8873505349276274, | |
| "grad_norm": 69.19255065917969, | |
| "learning_rate": 1.6466529319070735e-06, | |
| "logits/chosen": -1.2726246118545532, | |
| "logits/rejected": -1.39580237865448, | |
| "logps/chosen": -3.7457852363586426, | |
| "logps/rejected": -5.324977397918701, | |
| "loss": 18.2434, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.2871127724647522, | |
| "rewards/margins": 0.11219409853219986, | |
| "rewards/rejected": -0.39930686354637146, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.8904971680302077, | |
| "grad_norm": 73.79737854003906, | |
| "learning_rate": 1.560467519403579e-06, | |
| "logits/chosen": -1.3266379833221436, | |
| "logits/rejected": -1.3948261737823486, | |
| "logps/chosen": -4.1067681312561035, | |
| "logps/rejected": -4.673392295837402, | |
| "loss": 22.1702, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.3159501254558563, | |
| "rewards/margins": 0.04971395805478096, | |
| "rewards/rejected": -0.3656640946865082, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.893643801132788, | |
| "grad_norm": 106.870361328125, | |
| "learning_rate": 1.4765070208444732e-06, | |
| "logits/chosen": -1.3216549158096313, | |
| "logits/rejected": -1.35343337059021, | |
| "logps/chosen": -4.343778133392334, | |
| "logps/rejected": -5.122066497802734, | |
| "loss": 22.7187, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.33430585265159607, | |
| "rewards/margins": 0.06294408440589905, | |
| "rewards/rejected": -0.3972499370574951, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.8967904342353682, | |
| "grad_norm": 62.6711311340332, | |
| "learning_rate": 1.3947815669033026e-06, | |
| "logits/chosen": -1.3594673871994019, | |
| "logits/rejected": -1.4739999771118164, | |
| "logps/chosen": -4.087611198425293, | |
| "logps/rejected": -5.339770317077637, | |
| "loss": 20.526, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.31806594133377075, | |
| "rewards/margins": 0.08883042633533478, | |
| "rewards/rejected": -0.40689635276794434, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.8999370673379484, | |
| "grad_norm": 98.1043930053711, | |
| "learning_rate": 1.3153010185731495e-06, | |
| "logits/chosen": -1.2508734464645386, | |
| "logits/rejected": -1.32900869846344, | |
| "logps/chosen": -4.235801696777344, | |
| "logps/rejected": -5.670529842376709, | |
| "loss": 20.3076, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.3296756148338318, | |
| "rewards/margins": 0.09636791795492172, | |
| "rewards/rejected": -0.4260435700416565, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.9030837004405287, | |
| "grad_norm": 87.73750305175781, | |
| "learning_rate": 1.2380749659767766e-06, | |
| "logits/chosen": -1.3343340158462524, | |
| "logits/rejected": -1.3880221843719482, | |
| "logps/chosen": -4.322578430175781, | |
| "logps/rejected": -5.371191501617432, | |
| "loss": 20.9961, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.33630794286727905, | |
| "rewards/margins": 0.0794602781534195, | |
| "rewards/rejected": -0.41576823592185974, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.9062303335431089, | |
| "grad_norm": 72.0036392211914, | |
| "learning_rate": 1.1631127272095077e-06, | |
| "logits/chosen": -1.3422092199325562, | |
| "logits/rejected": -1.4017739295959473, | |
| "logps/chosen": -3.97587251663208, | |
| "logps/rejected": -5.63102388381958, | |
| "loss": 18.4484, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.302670419216156, | |
| "rewards/margins": 0.1103433147072792, | |
| "rewards/rejected": -0.413013756275177, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.9093769666456891, | |
| "grad_norm": 55.72761917114258, | |
| "learning_rate": 1.0904233472148862e-06, | |
| "logits/chosen": -1.4325498342514038, | |
| "logits/rejected": -1.5191594362258911, | |
| "logps/chosen": -4.523946285247803, | |
| "logps/rejected": -5.913887023925781, | |
| "loss": 20.9945, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.34643903374671936, | |
| "rewards/margins": 0.07747067511081696, | |
| "rewards/rejected": -0.4239097237586975, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.9125235997482694, | |
| "grad_norm": 74.03398132324219, | |
| "learning_rate": 1.0200155966933333e-06, | |
| "logits/chosen": -1.3860814571380615, | |
| "logits/rejected": -1.4824600219726562, | |
| "logps/chosen": -4.180668830871582, | |
| "logps/rejected": -5.086295127868652, | |
| "loss": 22.6256, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.31416332721710205, | |
| "rewards/margins": 0.06807545572519302, | |
| "rewards/rejected": -0.3822387754917145, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.9156702328508496, | |
| "grad_norm": 55.17578887939453, | |
| "learning_rate": 9.51897971043847e-07, | |
| "logits/chosen": -1.277956485748291, | |
| "logits/rejected": -1.4699045419692993, | |
| "logps/chosen": -3.923815965652466, | |
| "logps/rejected": -5.776226997375488, | |
| "loss": 18.1837, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.30271369218826294, | |
| "rewards/margins": 0.13357527554035187, | |
| "rewards/rejected": -0.4362889230251312, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.9188168659534298, | |
| "grad_norm": 67.42135620117188, | |
| "learning_rate": 8.860786893389761e-07, | |
| "logits/chosen": -1.3501498699188232, | |
| "logits/rejected": -1.4162402153015137, | |
| "logps/chosen": -4.456291198730469, | |
| "logps/rejected": -4.891867637634277, | |
| "loss": 23.4746, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.35184237360954285, | |
| "rewards/margins": 0.03937570005655289, | |
| "rewards/rejected": -0.3912180960178375, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.92196349905601, | |
| "grad_norm": 86.8721923828125, | |
| "learning_rate": 8.225656933330972e-07, | |
| "logits/chosen": -1.396032691001892, | |
| "logits/rejected": -1.3607252836227417, | |
| "logps/chosen": -4.139504909515381, | |
| "logps/rejected": -5.256811618804932, | |
| "loss": 20.6197, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.31887346506118774, | |
| "rewards/margins": 0.08756524324417114, | |
| "rewards/rejected": -0.4064387381076813, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.9251101321585903, | |
| "grad_norm": 63.26131057739258, | |
| "learning_rate": 7.613666465041492e-07, | |
| "logits/chosen": -1.296687364578247, | |
| "logits/rejected": -1.338370442390442, | |
| "logps/chosen": -4.0869526863098145, | |
| "logps/rejected": -4.680004596710205, | |
| "loss": 22.3496, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.30148619413375854, | |
| "rewards/margins": 0.06435124576091766, | |
| "rewards/rejected": -0.365837424993515, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.9282567652611705, | |
| "grad_norm": 64.71456909179688, | |
| "learning_rate": 7.024889331289731e-07, | |
| "logits/chosen": -1.3576750755310059, | |
| "logits/rejected": -1.4629138708114624, | |
| "logps/chosen": -4.305732250213623, | |
| "logps/rejected": -6.287524700164795, | |
| "loss": 19.0147, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.3270648717880249, | |
| "rewards/margins": 0.12565208971500397, | |
| "rewards/rejected": -0.45271697640419006, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.9314033983637507, | |
| "grad_norm": 79.55664825439453, | |
| "learning_rate": 6.459396573923227e-07, | |
| "logits/chosen": -1.2750294208526611, | |
| "logits/rejected": -1.3182651996612549, | |
| "logps/chosen": -3.8780131340026855, | |
| "logps/rejected": -5.497721195220947, | |
| "loss": 19.3141, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.29957860708236694, | |
| "rewards/margins": 0.11124887317419052, | |
| "rewards/rejected": -0.41082748770713806, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.934550031466331, | |
| "grad_norm": 97.28962707519531, | |
| "learning_rate": 5.917256425296725e-07, | |
| "logits/chosen": -1.3326900005340576, | |
| "logits/rejected": -1.3848145008087158, | |
| "logps/chosen": -4.326709270477295, | |
| "logps/rejected": -5.8570427894592285, | |
| "loss": 17.956, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.32169783115386963, | |
| "rewards/margins": 0.11987517029047012, | |
| "rewards/rejected": -0.44157299399375916, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.9376966645689113, | |
| "grad_norm": 104.4383773803711, | |
| "learning_rate": 5.398534300039227e-07, | |
| "logits/chosen": -1.3669896125793457, | |
| "logits/rejected": -1.4102351665496826, | |
| "logps/chosen": -4.2153167724609375, | |
| "logps/rejected": -5.1999030113220215, | |
| "loss": 20.9588, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.3310829997062683, | |
| "rewards/margins": 0.07336001843214035, | |
| "rewards/rejected": -0.40444302558898926, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.9408432976714916, | |
| "grad_norm": 59.6121826171875, | |
| "learning_rate": 4.903292787161129e-07, | |
| "logits/chosen": -1.4228112697601318, | |
| "logits/rejected": -1.528313159942627, | |
| "logps/chosen": -4.338911533355713, | |
| "logps/rejected": -5.048561096191406, | |
| "loss": 22.4697, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.3458613455295563, | |
| "rewards/margins": 0.05565253645181656, | |
| "rewards/rejected": -0.40151387453079224, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.9439899307740718, | |
| "grad_norm": 134.8368377685547, | |
| "learning_rate": 4.4315916425021755e-07, | |
| "logits/chosen": -1.4706683158874512, | |
| "logits/rejected": -1.5189244747161865, | |
| "logps/chosen": -4.430064678192139, | |
| "logps/rejected": -4.881100177764893, | |
| "loss": 24.7599, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.34278133511543274, | |
| "rewards/margins": 0.03427756577730179, | |
| "rewards/rejected": -0.37705889344215393, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.947136563876652, | |
| "grad_norm": 75.44186401367188, | |
| "learning_rate": 3.983487781521311e-07, | |
| "logits/chosen": -1.3628993034362793, | |
| "logits/rejected": -1.5227676630020142, | |
| "logps/chosen": -4.508485317230225, | |
| "logps/rejected": -5.836249351501465, | |
| "loss": 21.4824, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.35081833600997925, | |
| "rewards/margins": 0.0795225128531456, | |
| "rewards/rejected": -0.43034085631370544, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.9502831969792322, | |
| "grad_norm": 53.86139678955078, | |
| "learning_rate": 3.5590352724293565e-07, | |
| "logits/chosen": -1.2814509868621826, | |
| "logits/rejected": -1.383336067199707, | |
| "logps/chosen": -3.697767972946167, | |
| "logps/rejected": -5.5374345779418945, | |
| "loss": 18.3089, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.28700894117355347, | |
| "rewards/margins": 0.12951508164405823, | |
| "rewards/rejected": -0.4165240228176117, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.9534298300818125, | |
| "grad_norm": 55.83627700805664, | |
| "learning_rate": 3.1582853296649785e-07, | |
| "logits/chosen": -1.3301982879638672, | |
| "logits/rejected": -1.4231036901474, | |
| "logps/chosen": -3.7521042823791504, | |
| "logps/rejected": -4.861963748931885, | |
| "loss": 19.3616, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.2891950309276581, | |
| "rewards/margins": 0.09701049327850342, | |
| "rewards/rejected": -0.3862055242061615, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.9565764631843927, | |
| "grad_norm": 88.61446380615234, | |
| "learning_rate": 2.7812863077153253e-07, | |
| "logits/chosen": -1.2899259328842163, | |
| "logits/rejected": -1.398050308227539, | |
| "logps/chosen": -4.068936824798584, | |
| "logps/rejected": -5.717960357666016, | |
| "loss": 17.8938, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.31747734546661377, | |
| "rewards/margins": 0.11797485500574112, | |
| "rewards/rejected": -0.4354521632194519, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.9597230962869729, | |
| "grad_norm": 58.96453857421875, | |
| "learning_rate": 2.4280836952814913e-07, | |
| "logits/chosen": -1.3611301183700562, | |
| "logits/rejected": -1.4117127656936646, | |
| "logps/chosen": -4.0526018142700195, | |
| "logps/rejected": -5.437824249267578, | |
| "loss": 21.3406, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.31340762972831726, | |
| "rewards/margins": 0.07557393610477448, | |
| "rewards/rejected": -0.38898158073425293, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.9628697293895532, | |
| "grad_norm": 82.22030639648438, | |
| "learning_rate": 2.0987201097897757e-07, | |
| "logits/chosen": -1.290305256843567, | |
| "logits/rejected": -1.3669493198394775, | |
| "logps/chosen": -4.012240409851074, | |
| "logps/rejected": -6.001503944396973, | |
| "loss": 18.4697, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.3110642433166504, | |
| "rewards/margins": 0.14227357506752014, | |
| "rewards/rejected": -0.45333781838417053, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.9660163624921334, | |
| "grad_norm": 69.16776275634766, | |
| "learning_rate": 1.7932352922496844e-07, | |
| "logits/chosen": -1.3238952159881592, | |
| "logits/rejected": -1.4009875059127808, | |
| "logps/chosen": -4.168734550476074, | |
| "logps/rejected": -5.520012855529785, | |
| "loss": 18.6757, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.3233780264854431, | |
| "rewards/margins": 0.10560549795627594, | |
| "rewards/rejected": -0.42898350954055786, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.9691629955947136, | |
| "grad_norm": 87.41554260253906, | |
| "learning_rate": 1.5116661024584756e-07, | |
| "logits/chosen": -1.3047425746917725, | |
| "logits/rejected": -1.2935268878936768, | |
| "logps/chosen": -3.8972859382629395, | |
| "logps/rejected": -5.7956743240356445, | |
| "loss": 19.4437, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.2976101040840149, | |
| "rewards/margins": 0.13567054271697998, | |
| "rewards/rejected": -0.4332806169986725, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.9723096286972939, | |
| "grad_norm": 129.71432495117188, | |
| "learning_rate": 1.254046514553986e-07, | |
| "logits/chosen": -1.3411355018615723, | |
| "logits/rejected": -1.3150873184204102, | |
| "logps/chosen": -4.793996334075928, | |
| "logps/rejected": -6.1579155921936035, | |
| "loss": 22.5465, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.36123085021972656, | |
| "rewards/margins": 0.08643898367881775, | |
| "rewards/rejected": -0.4476698338985443, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.9754562617998741, | |
| "grad_norm": 156.82296752929688, | |
| "learning_rate": 1.0204076129150198e-07, | |
| "logits/chosen": -1.3176259994506836, | |
| "logits/rejected": -1.371140956878662, | |
| "logps/chosen": -4.381787300109863, | |
| "logps/rejected": -5.822647571563721, | |
| "loss": 20.2445, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.322052001953125, | |
| "rewards/margins": 0.08496570587158203, | |
| "rewards/rejected": -0.40701770782470703, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.9786028949024543, | |
| "grad_norm": 101.22770690917969, | |
| "learning_rate": 8.107775884109048e-08, | |
| "logits/chosen": -1.377939224243164, | |
| "logits/rejected": -1.460756540298462, | |
| "logps/chosen": -4.821037292480469, | |
| "logps/rejected": -5.5621137619018555, | |
| "loss": 23.1685, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.37864041328430176, | |
| "rewards/margins": 0.05817138031125069, | |
| "rewards/rejected": -0.43681177496910095, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 0.9817495280050346, | |
| "grad_norm": 93.55181884765625, | |
| "learning_rate": 6.251817349998578e-08, | |
| "logits/chosen": -1.2559947967529297, | |
| "logits/rejected": -1.3171112537384033, | |
| "logps/chosen": -3.9931647777557373, | |
| "logps/rejected": -5.348459243774414, | |
| "loss": 22.9477, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.30862289667129517, | |
| "rewards/margins": 0.0842631608247757, | |
| "rewards/rejected": -0.39288607239723206, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.9848961611076148, | |
| "grad_norm": 80.63821411132812, | |
| "learning_rate": 4.636424466771372e-08, | |
| "logits/chosen": -1.24492347240448, | |
| "logits/rejected": -1.3349525928497314, | |
| "logps/chosen": -4.380553245544434, | |
| "logps/rejected": -5.421158313751221, | |
| "loss": 22.0329, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.34523719549179077, | |
| "rewards/margins": 0.07052381336688995, | |
| "rewards/rejected": -0.4157610535621643, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 0.9880427942101951, | |
| "grad_norm": 55.254642486572266, | |
| "learning_rate": 3.261792147728704e-08, | |
| "logits/chosen": -1.3501121997833252, | |
| "logits/rejected": -1.3522610664367676, | |
| "logps/chosen": -4.829428195953369, | |
| "logps/rejected": -5.480432033538818, | |
| "loss": 22.6751, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.3382914662361145, | |
| "rewards/margins": 0.05635923147201538, | |
| "rewards/rejected": -0.3946506381034851, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.9911894273127754, | |
| "grad_norm": 102.65123748779297, | |
| "learning_rate": 2.1280862560026927e-08, | |
| "logits/chosen": -1.350527048110962, | |
| "logits/rejected": -1.3495935201644897, | |
| "logps/chosen": -3.8183772563934326, | |
| "logps/rejected": -4.949650764465332, | |
| "loss": 22.3353, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.3000851273536682, | |
| "rewards/margins": 0.07860491424798965, | |
| "rewards/rejected": -0.37869006395339966, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.9943360604153556, | |
| "grad_norm": 67.94386291503906, | |
| "learning_rate": 1.2354435845436385e-08, | |
| "logits/chosen": -1.2602336406707764, | |
| "logits/rejected": -1.2594802379608154, | |
| "logps/chosen": -3.5885491371154785, | |
| "logps/rejected": -4.909377098083496, | |
| "loss": 18.7801, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.27459749579429626, | |
| "rewards/margins": 0.10287781804800034, | |
| "rewards/rejected": -0.3774753212928772, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.9974826935179358, | |
| "grad_norm": 78.847412109375, | |
| "learning_rate": 5.8397183961411694e-09, | |
| "logits/chosen": -1.4188308715820312, | |
| "logits/rejected": -1.3911654949188232, | |
| "logps/chosen": -4.257325649261475, | |
| "logps/rejected": -5.559029579162598, | |
| "loss": 20.67, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.30969610810279846, | |
| "rewards/margins": 0.08111827820539474, | |
| "rewards/rejected": -0.3908143639564514, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 1589, | |
| "total_flos": 0.0, | |
| "train_loss": 22.009478435192264, | |
| "train_runtime": 23016.83, | |
| "train_samples_per_second": 1.105, | |
| "train_steps_per_second": 0.069 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1589, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 10, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |