| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 610, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.009842116054951815, |
| "grad_norm": 2.7811431884765625, |
| "learning_rate": 3.278688524590164e-08, |
| "logits/chosen": -2.533964157104492, |
| "logits/rejected": -2.5735201835632324, |
| "logps/chosen": -0.4404516816139221, |
| "logps/rejected": -0.44272005558013916, |
| "loss": 1.3164, |
| "rewards/accuracies": 0.4895833432674408, |
| "rewards/chosen": -0.8809033632278442, |
| "rewards/margins": 0.00453670509159565, |
| "rewards/rejected": -0.8854400515556335, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.01968423210990363, |
| "grad_norm": 3.193603992462158, |
| "learning_rate": 8.196721311475409e-08, |
| "logits/chosen": -2.5366404056549072, |
| "logits/rejected": -2.580451250076294, |
| "logps/chosen": -0.4687342345714569, |
| "logps/rejected": -0.4700230658054352, |
| "loss": 1.319, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -0.9374685287475586, |
| "rewards/margins": 0.002577731851488352, |
| "rewards/rejected": -0.9400461316108704, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.029526348164855443, |
| "grad_norm": 2.600137233734131, |
| "learning_rate": 1.3114754098360656e-07, |
| "logits/chosen": -2.457185983657837, |
| "logits/rejected": -2.439577579498291, |
| "logps/chosen": -0.4295300841331482, |
| "logps/rejected": -0.44163596630096436, |
| "loss": 1.3021, |
| "rewards/accuracies": 0.5260416865348816, |
| "rewards/chosen": -0.8590601682662964, |
| "rewards/margins": 0.024211766198277473, |
| "rewards/rejected": -0.8832719326019287, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.03936846421980726, |
| "grad_norm": 3.330892324447632, |
| "learning_rate": 1.80327868852459e-07, |
| "logits/chosen": -2.6398239135742188, |
| "logits/rejected": -2.584653615951538, |
| "logps/chosen": -0.4457446336746216, |
| "logps/rejected": -0.4714476466178894, |
| "loss": 1.283, |
| "rewards/accuracies": 0.5520833134651184, |
| "rewards/chosen": -0.8914893269538879, |
| "rewards/margins": 0.05140605568885803, |
| "rewards/rejected": -0.9428953528404236, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.04921058027475907, |
| "grad_norm": 3.791318416595459, |
| "learning_rate": 2.2950819672131146e-07, |
| "logits/chosen": -2.695323944091797, |
| "logits/rejected": -2.656032085418701, |
| "logps/chosen": -0.44835442304611206, |
| "logps/rejected": -0.45536863803863525, |
| "loss": 1.3095, |
| "rewards/accuracies": 0.484375, |
| "rewards/chosen": -0.8967088460922241, |
| "rewards/margins": 0.0140285175293684, |
| "rewards/rejected": -0.9107372760772705, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.059052696329710885, |
| "grad_norm": 3.3407955169677734, |
| "learning_rate": 2.786885245901639e-07, |
| "logits/chosen": -2.5617423057556152, |
| "logits/rejected": -2.535289764404297, |
| "logps/chosen": -0.4351993799209595, |
| "logps/rejected": -0.4376446008682251, |
| "loss": 1.3161, |
| "rewards/accuracies": 0.4791666865348816, |
| "rewards/chosen": -0.870398759841919, |
| "rewards/margins": 0.004890482872724533, |
| "rewards/rejected": -0.8752892017364502, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0688948123846627, |
| "grad_norm": 2.501546621322632, |
| "learning_rate": 3.2786885245901637e-07, |
| "logits/chosen": -2.4582276344299316, |
| "logits/rejected": -2.462825059890747, |
| "logps/chosen": -0.4415985345840454, |
| "logps/rejected": -0.45250964164733887, |
| "loss": 1.3035, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -0.8831970691680908, |
| "rewards/margins": 0.021822253242135048, |
| "rewards/rejected": -0.9050193428993225, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.07873692843961452, |
| "grad_norm": 3.018726348876953, |
| "learning_rate": 3.770491803278688e-07, |
| "logits/chosen": -2.4941272735595703, |
| "logits/rejected": -2.504913568496704, |
| "logps/chosen": -0.4402480721473694, |
| "logps/rejected": -0.43463078141212463, |
| "loss": 1.3275, |
| "rewards/accuracies": 0.463541716337204, |
| "rewards/chosen": -0.8804961442947388, |
| "rewards/margins": -0.011234622448682785, |
| "rewards/rejected": -0.8692615032196045, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.08857904449456633, |
| "grad_norm": 2.763169527053833, |
| "learning_rate": 4.2622950819672127e-07, |
| "logits/chosen": -2.6146433353424072, |
| "logits/rejected": -2.6000146865844727, |
| "logps/chosen": -0.4407660961151123, |
| "logps/rejected": -0.44831183552742004, |
| "loss": 1.3084, |
| "rewards/accuracies": 0.5104166865348816, |
| "rewards/chosen": -0.8815321922302246, |
| "rewards/margins": 0.015091471374034882, |
| "rewards/rejected": -0.8966236114501953, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.09842116054951815, |
| "grad_norm": 2.839801073074341, |
| "learning_rate": 4.754098360655737e-07, |
| "logits/chosen": -2.450800895690918, |
| "logits/rejected": -2.3901357650756836, |
| "logps/chosen": -0.4461948275566101, |
| "logps/rejected": -0.4516681730747223, |
| "loss": 1.3124, |
| "rewards/accuracies": 0.5052083134651184, |
| "rewards/chosen": -0.8923896551132202, |
| "rewards/margins": 0.010946739464998245, |
| "rewards/rejected": -0.9033364057540894, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.10826327660446997, |
| "grad_norm": 3.3853325843811035, |
| "learning_rate": 5.245901639344262e-07, |
| "logits/chosen": -2.3206844329833984, |
| "logits/rejected": -2.391359329223633, |
| "logps/chosen": -0.4440017640590668, |
| "logps/rejected": -0.43253856897354126, |
| "loss": 1.3369, |
| "rewards/accuracies": 0.453125, |
| "rewards/chosen": -0.8880034685134888, |
| "rewards/margins": -0.022926393896341324, |
| "rewards/rejected": -0.8650770783424377, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.11810539265942177, |
| "grad_norm": 3.0044500827789307, |
| "learning_rate": 5.737704918032786e-07, |
| "logits/chosen": -2.3636105060577393, |
| "logits/rejected": -2.3779592514038086, |
| "logps/chosen": -0.44306662678718567, |
| "logps/rejected": -0.439554363489151, |
| "loss": 1.3247, |
| "rewards/accuracies": 0.4739583432674408, |
| "rewards/chosen": -0.8861331939697266, |
| "rewards/margins": -0.007024487480521202, |
| "rewards/rejected": -0.879108726978302, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.1279475087143736, |
| "grad_norm": 2.8635122776031494, |
| "learning_rate": 6.229508196721311e-07, |
| "logits/chosen": -2.499943733215332, |
| "logits/rejected": -2.465728282928467, |
| "logps/chosen": -0.44928643107414246, |
| "logps/rejected": -0.457317590713501, |
| "loss": 1.3083, |
| "rewards/accuracies": 0.5364583730697632, |
| "rewards/chosen": -0.8985728621482849, |
| "rewards/margins": 0.016062280163168907, |
| "rewards/rejected": -0.914635181427002, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.1377896247693254, |
| "grad_norm": 2.9235432147979736, |
| "learning_rate": 6.721311475409835e-07, |
| "logits/chosen": -2.313934326171875, |
| "logits/rejected": -2.4113597869873047, |
| "logps/chosen": -0.43200892210006714, |
| "logps/rejected": -0.4239245653152466, |
| "loss": 1.3305, |
| "rewards/accuracies": 0.4322916865348816, |
| "rewards/chosen": -0.8640178442001343, |
| "rewards/margins": -0.01616874523460865, |
| "rewards/rejected": -0.8478491306304932, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.1476317408242772, |
| "grad_norm": 2.583570957183838, |
| "learning_rate": 7.21311475409836e-07, |
| "logits/chosen": -2.4851250648498535, |
| "logits/rejected": -2.4660589694976807, |
| "logps/chosen": -0.42572450637817383, |
| "logps/rejected": -0.4340115785598755, |
| "loss": 1.3069, |
| "rewards/accuracies": 0.5052083730697632, |
| "rewards/chosen": -0.8514490723609924, |
| "rewards/margins": 0.016574125736951828, |
| "rewards/rejected": -0.868023157119751, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.15747385687922905, |
| "grad_norm": 2.7927021980285645, |
| "learning_rate": 7.704918032786884e-07, |
| "logits/chosen": -2.5673110485076904, |
| "logits/rejected": -2.5053553581237793, |
| "logps/chosen": -0.42316704988479614, |
| "logps/rejected": -0.41728323698043823, |
| "loss": 1.3275, |
| "rewards/accuracies": 0.4427083432674408, |
| "rewards/chosen": -0.8463341593742371, |
| "rewards/margins": -0.011767696589231491, |
| "rewards/rejected": -0.8345664739608765, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.16731597293418085, |
| "grad_norm": 2.707350730895996, |
| "learning_rate": 8.196721311475409e-07, |
| "logits/chosen": -2.493770122528076, |
| "logits/rejected": -2.497191905975342, |
| "logps/chosen": -0.413898766040802, |
| "logps/rejected": -0.42288342118263245, |
| "loss": 1.3051, |
| "rewards/accuracies": 0.5260416865348816, |
| "rewards/chosen": -0.8277975916862488, |
| "rewards/margins": 0.01796923577785492, |
| "rewards/rejected": -0.8457668423652649, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.17715808898913266, |
| "grad_norm": 2.8383336067199707, |
| "learning_rate": 8.688524590163933e-07, |
| "logits/chosen": -2.3650898933410645, |
| "logits/rejected": -2.4355216026306152, |
| "logps/chosen": -0.41062265634536743, |
| "logps/rejected": -0.40795671939849854, |
| "loss": 1.3217, |
| "rewards/accuracies": 0.4739583432674408, |
| "rewards/chosen": -0.8212453126907349, |
| "rewards/margins": -0.005331846419721842, |
| "rewards/rejected": -0.8159134387969971, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.1870002050440845, |
| "grad_norm": 3.264463424682617, |
| "learning_rate": 9.180327868852458e-07, |
| "logits/chosen": -2.3384056091308594, |
| "logits/rejected": -2.3722102642059326, |
| "logps/chosen": -0.415290892124176, |
| "logps/rejected": -0.4106101989746094, |
| "loss": 1.3245, |
| "rewards/accuracies": 0.4687500298023224, |
| "rewards/chosen": -0.830581784248352, |
| "rewards/margins": -0.009361350908875465, |
| "rewards/rejected": -0.8212203979492188, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.1968423210990363, |
| "grad_norm": 2.702704429626465, |
| "learning_rate": 9.672131147540984e-07, |
| "logits/chosen": -2.6796839237213135, |
| "logits/rejected": -2.70339298248291, |
| "logps/chosen": -0.38649609684944153, |
| "logps/rejected": -0.389847993850708, |
| "loss": 1.3119, |
| "rewards/accuracies": 0.5208333730697632, |
| "rewards/chosen": -0.7729922533035278, |
| "rewards/margins": 0.0067038037814199924, |
| "rewards/rejected": -0.7796960473060608, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.2066844371539881, |
| "grad_norm": 3.295572519302368, |
| "learning_rate": 9.981785063752275e-07, |
| "logits/chosen": -2.549654483795166, |
| "logits/rejected": -2.5630557537078857, |
| "logps/chosen": -0.3998144268989563, |
| "logps/rejected": -0.4005410671234131, |
| "loss": 1.3159, |
| "rewards/accuracies": 0.4583333432674408, |
| "rewards/chosen": -0.7996287941932678, |
| "rewards/margins": 0.0014533549547195435, |
| "rewards/rejected": -0.8010821342468262, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.21652655320893993, |
| "grad_norm": 3.1199636459350586, |
| "learning_rate": 9.927140255009107e-07, |
| "logits/chosen": -2.619666337966919, |
| "logits/rejected": -2.6088433265686035, |
| "logps/chosen": -0.3825004994869232, |
| "logps/rejected": -0.3724328875541687, |
| "loss": 1.3313, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.7650009393692017, |
| "rewards/margins": -0.020135192200541496, |
| "rewards/rejected": -0.7448657751083374, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.22636866926389174, |
| "grad_norm": 2.7287650108337402, |
| "learning_rate": 9.872495446265937e-07, |
| "logits/chosen": -2.1145222187042236, |
| "logits/rejected": -2.1984646320343018, |
| "logps/chosen": -0.3847331702709198, |
| "logps/rejected": -0.3833593726158142, |
| "loss": 1.318, |
| "rewards/accuracies": 0.4739583730697632, |
| "rewards/chosen": -0.7694664001464844, |
| "rewards/margins": -0.0027476283721625805, |
| "rewards/rejected": -0.7667187452316284, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.23621078531884354, |
| "grad_norm": 3.2083938121795654, |
| "learning_rate": 9.817850637522768e-07, |
| "logits/chosen": -2.5148842334747314, |
| "logits/rejected": -2.515829086303711, |
| "logps/chosen": -0.37398943305015564, |
| "logps/rejected": -0.37285110354423523, |
| "loss": 1.3179, |
| "rewards/accuracies": 0.4843750298023224, |
| "rewards/chosen": -0.7479788661003113, |
| "rewards/margins": -0.0022766790352761745, |
| "rewards/rejected": -0.7457021474838257, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.24605290137379537, |
| "grad_norm": 2.843623638153076, |
| "learning_rate": 9.7632058287796e-07, |
| "logits/chosen": -2.1537227630615234, |
| "logits/rejected": -2.186638355255127, |
| "logps/chosen": -0.3700721561908722, |
| "logps/rejected": -0.3716978430747986, |
| "loss": 1.3131, |
| "rewards/accuracies": 0.5520833730697632, |
| "rewards/chosen": -0.7401443123817444, |
| "rewards/margins": 0.0032513481564819813, |
| "rewards/rejected": -0.7433956861495972, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.2558950174287472, |
| "grad_norm": 2.9954681396484375, |
| "learning_rate": 9.70856102003643e-07, |
| "logits/chosen": -2.37508487701416, |
| "logits/rejected": -2.3783116340637207, |
| "logps/chosen": -0.3550634980201721, |
| "logps/rejected": -0.36521193385124207, |
| "loss": 1.3005, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.7101269960403442, |
| "rewards/margins": 0.020296888425946236, |
| "rewards/rejected": -0.7304238677024841, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.265737133483699, |
| "grad_norm": 2.9920599460601807, |
| "learning_rate": 9.65391621129326e-07, |
| "logits/chosen": -2.3454084396362305, |
| "logits/rejected": -2.2591981887817383, |
| "logps/chosen": -0.3407334089279175, |
| "logps/rejected": -0.3508540987968445, |
| "loss": 1.3001, |
| "rewards/accuracies": 0.6041666865348816, |
| "rewards/chosen": -0.6814668774604797, |
| "rewards/margins": 0.02024134062230587, |
| "rewards/rejected": -0.701708197593689, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.2755792495386508, |
| "grad_norm": 2.8075363636016846, |
| "learning_rate": 9.599271402550091e-07, |
| "logits/chosen": -2.3530383110046387, |
| "logits/rejected": -2.3570096492767334, |
| "logps/chosen": -0.3530980944633484, |
| "logps/rejected": -0.34843918681144714, |
| "loss": 1.3219, |
| "rewards/accuracies": 0.484375, |
| "rewards/chosen": -0.7061961889266968, |
| "rewards/margins": -0.009317765012383461, |
| "rewards/rejected": -0.6968783736228943, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.28542136559360265, |
| "grad_norm": 3.062509536743164, |
| "learning_rate": 9.54462659380692e-07, |
| "logits/chosen": -2.3423635959625244, |
| "logits/rejected": -2.3681883811950684, |
| "logps/chosen": -0.3502524793148041, |
| "logps/rejected": -0.3471028804779053, |
| "loss": 1.3204, |
| "rewards/accuracies": 0.4895833432674408, |
| "rewards/chosen": -0.7005049586296082, |
| "rewards/margins": -0.006299168802797794, |
| "rewards/rejected": -0.6942057013511658, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.2952634816485544, |
| "grad_norm": 2.8204712867736816, |
| "learning_rate": 9.489981785063752e-07, |
| "logits/chosen": -2.2016220092773438, |
| "logits/rejected": -2.176506996154785, |
| "logps/chosen": -0.32860067486763, |
| "logps/rejected": -0.3329722583293915, |
| "loss": 1.3082, |
| "rewards/accuracies": 0.5677083730697632, |
| "rewards/chosen": -0.6572014093399048, |
| "rewards/margins": 0.00874313898384571, |
| "rewards/rejected": -0.665944516658783, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.30510559770350626, |
| "grad_norm": 2.8776941299438477, |
| "learning_rate": 9.435336976320582e-07, |
| "logits/chosen": -2.272819995880127, |
| "logits/rejected": -2.2633821964263916, |
| "logps/chosen": -0.33269160985946655, |
| "logps/rejected": -0.3359033465385437, |
| "loss": 1.3102, |
| "rewards/accuracies": 0.5104166865348816, |
| "rewards/chosen": -0.6653832197189331, |
| "rewards/margins": 0.006423423532396555, |
| "rewards/rejected": -0.6718066930770874, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.3149477137584581, |
| "grad_norm": 2.593137264251709, |
| "learning_rate": 9.380692167577413e-07, |
| "logits/chosen": -2.213297128677368, |
| "logits/rejected": -2.2387821674346924, |
| "logps/chosen": -0.3308699429035187, |
| "logps/rejected": -0.3250887393951416, |
| "loss": 1.3232, |
| "rewards/accuracies": 0.4739583730697632, |
| "rewards/chosen": -0.6617398262023926, |
| "rewards/margins": -0.011562440544366837, |
| "rewards/rejected": -0.6501774191856384, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.32478982981340987, |
| "grad_norm": 2.9904048442840576, |
| "learning_rate": 9.326047358834243e-07, |
| "logits/chosen": -2.3887205123901367, |
| "logits/rejected": -2.4502334594726562, |
| "logps/chosen": -0.3256734609603882, |
| "logps/rejected": -0.32411491870880127, |
| "loss": 1.3169, |
| "rewards/accuracies": 0.4635416865348816, |
| "rewards/chosen": -0.6513469219207764, |
| "rewards/margins": -0.0031170835718512535, |
| "rewards/rejected": -0.6482298970222473, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.3346319458683617, |
| "grad_norm": 2.659723997116089, |
| "learning_rate": 9.271402550091074e-07, |
| "logits/chosen": -2.0707240104675293, |
| "logits/rejected": -2.098159074783325, |
| "logps/chosen": -0.3261147737503052, |
| "logps/rejected": -0.32286298274993896, |
| "loss": 1.3197, |
| "rewards/accuracies": 0.4895833730697632, |
| "rewards/chosen": -0.6522295475006104, |
| "rewards/margins": -0.006503552198410034, |
| "rewards/rejected": -0.6457259654998779, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.34447406192331353, |
| "grad_norm": 2.617114782333374, |
| "learning_rate": 9.216757741347905e-07, |
| "logits/chosen": -2.3909406661987305, |
| "logits/rejected": -2.2843799591064453, |
| "logps/chosen": -0.31216752529144287, |
| "logps/rejected": -0.31390637159347534, |
| "loss": 1.3118, |
| "rewards/accuracies": 0.484375, |
| "rewards/chosen": -0.624334990978241, |
| "rewards/margins": 0.0034777685068547726, |
| "rewards/rejected": -0.6278128027915955, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.3543161779782653, |
| "grad_norm": 2.900043249130249, |
| "learning_rate": 9.162112932604735e-07, |
| "logits/chosen": -2.1978025436401367, |
| "logits/rejected": -2.2727980613708496, |
| "logps/chosen": -0.3117530345916748, |
| "logps/rejected": -0.30951735377311707, |
| "loss": 1.3177, |
| "rewards/accuracies": 0.479166716337204, |
| "rewards/chosen": -0.6235060691833496, |
| "rewards/margins": -0.004471416585147381, |
| "rewards/rejected": -0.6190346479415894, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.36415829403321714, |
| "grad_norm": 2.785658359527588, |
| "learning_rate": 9.107468123861566e-07, |
| "logits/chosen": -2.2299718856811523, |
| "logits/rejected": -2.2097911834716797, |
| "logps/chosen": -0.3144725561141968, |
| "logps/rejected": -0.31601589918136597, |
| "loss": 1.3121, |
| "rewards/accuracies": 0.484375, |
| "rewards/chosen": -0.6289451122283936, |
| "rewards/margins": 0.003086656332015991, |
| "rewards/rejected": -0.6320317983627319, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.374000410088169, |
| "grad_norm": 3.3421730995178223, |
| "learning_rate": 9.052823315118397e-07, |
| "logits/chosen": -2.1038126945495605, |
| "logits/rejected": -2.0839600563049316, |
| "logps/chosen": -0.3226383328437805, |
| "logps/rejected": -0.32167741656303406, |
| "loss": 1.3162, |
| "rewards/accuracies": 0.4791666865348816, |
| "rewards/chosen": -0.645276665687561, |
| "rewards/margins": -0.0019218978704884648, |
| "rewards/rejected": -0.6433548331260681, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.38384252614312075, |
| "grad_norm": 2.9325544834136963, |
| "learning_rate": 8.998178506375227e-07, |
| "logits/chosen": -2.20025634765625, |
| "logits/rejected": -2.2536721229553223, |
| "logps/chosen": -0.311615526676178, |
| "logps/rejected": -0.31010982394218445, |
| "loss": 1.3166, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -0.623231053352356, |
| "rewards/margins": -0.0030114587862044573, |
| "rewards/rejected": -0.6202195882797241, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.3936846421980726, |
| "grad_norm": 2.7917957305908203, |
| "learning_rate": 8.943533697632057e-07, |
| "logits/chosen": -2.3032007217407227, |
| "logits/rejected": -2.2467172145843506, |
| "logps/chosen": -0.31110310554504395, |
| "logps/rejected": -0.30983883142471313, |
| "loss": 1.316, |
| "rewards/accuracies": 0.4531250298023224, |
| "rewards/chosen": -0.6222062110900879, |
| "rewards/margins": -0.002528547076508403, |
| "rewards/rejected": -0.6196776628494263, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4035267582530244, |
| "grad_norm": 2.7737972736358643, |
| "learning_rate": 8.888888888888888e-07, |
| "logits/chosen": -2.2247915267944336, |
| "logits/rejected": -2.185898542404175, |
| "logps/chosen": -0.30768126249313354, |
| "logps/rejected": -0.31419891119003296, |
| "loss": 1.3047, |
| "rewards/accuracies": 0.5416666865348816, |
| "rewards/chosen": -0.6153625249862671, |
| "rewards/margins": 0.013035254552960396, |
| "rewards/rejected": -0.6283978223800659, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.4133688743079762, |
| "grad_norm": 2.8648312091827393, |
| "learning_rate": 8.834244080145718e-07, |
| "logits/chosen": -2.2792012691497803, |
| "logits/rejected": -2.2944419384002686, |
| "logps/chosen": -0.3053058981895447, |
| "logps/rejected": -0.30649513006210327, |
| "loss": 1.3127, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -0.6106117963790894, |
| "rewards/margins": 0.0023784590885043144, |
| "rewards/rejected": -0.6129902601242065, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.42321099036292803, |
| "grad_norm": 3.244568109512329, |
| "learning_rate": 8.77959927140255e-07, |
| "logits/chosen": -2.3420162200927734, |
| "logits/rejected": -2.3962247371673584, |
| "logps/chosen": -0.29792097210884094, |
| "logps/rejected": -0.3045887351036072, |
| "loss": 1.3048, |
| "rewards/accuracies": 0.5572916865348816, |
| "rewards/chosen": -0.5958418846130371, |
| "rewards/margins": 0.01333555020391941, |
| "rewards/rejected": -0.6091774702072144, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.43305310641787986, |
| "grad_norm": 3.226536989212036, |
| "learning_rate": 8.724954462659381e-07, |
| "logits/chosen": -2.076824188232422, |
| "logits/rejected": -2.115086793899536, |
| "logps/chosen": -0.30216705799102783, |
| "logps/rejected": -0.3021796941757202, |
| "loss": 1.3144, |
| "rewards/accuracies": 0.4479166865348816, |
| "rewards/chosen": -0.6043341159820557, |
| "rewards/margins": 2.5328248739242554e-05, |
| "rewards/rejected": -0.6043593883514404, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.44289522247283164, |
| "grad_norm": 2.785240411758423, |
| "learning_rate": 8.670309653916211e-07, |
| "logits/chosen": -2.142348289489746, |
| "logits/rejected": -2.162036418914795, |
| "logps/chosen": -0.3002782166004181, |
| "logps/rejected": -0.29516181349754333, |
| "loss": 1.3219, |
| "rewards/accuracies": 0.4843750298023224, |
| "rewards/chosen": -0.6005564332008362, |
| "rewards/margins": -0.010232776403427124, |
| "rewards/rejected": -0.5903236269950867, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.4527373385277835, |
| "grad_norm": 2.65671968460083, |
| "learning_rate": 8.615664845173042e-07, |
| "logits/chosen": -2.1252198219299316, |
| "logits/rejected": -2.1430177688598633, |
| "logps/chosen": -0.3017180562019348, |
| "logps/rejected": -0.31176209449768066, |
| "loss": 1.2998, |
| "rewards/accuracies": 0.5625000596046448, |
| "rewards/chosen": -0.6034361124038696, |
| "rewards/margins": 0.02008809708058834, |
| "rewards/rejected": -0.6235241889953613, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.4625794545827353, |
| "grad_norm": 2.574594020843506, |
| "learning_rate": 8.561020036429873e-07, |
| "logits/chosen": -2.204986095428467, |
| "logits/rejected": -2.273808002471924, |
| "logps/chosen": -0.3037709593772888, |
| "logps/rejected": -0.3029387295246124, |
| "loss": 1.3155, |
| "rewards/accuracies": 0.4947916865348816, |
| "rewards/chosen": -0.6075419187545776, |
| "rewards/margins": -0.0016644850838929415, |
| "rewards/rejected": -0.6058773994445801, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.4724215706376871, |
| "grad_norm": 3.181304931640625, |
| "learning_rate": 8.506375227686703e-07, |
| "logits/chosen": -2.242096424102783, |
| "logits/rejected": -2.195438861846924, |
| "logps/chosen": -0.30111944675445557, |
| "logps/rejected": -0.3043938875198364, |
| "loss": 1.3097, |
| "rewards/accuracies": 0.5104167461395264, |
| "rewards/chosen": -0.6022388935089111, |
| "rewards/margins": 0.00654886569827795, |
| "rewards/rejected": -0.6087877750396729, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.4822636866926389, |
| "grad_norm": 3.2186405658721924, |
| "learning_rate": 8.451730418943533e-07, |
| "logits/chosen": -2.217180013656616, |
| "logits/rejected": -2.3594436645507812, |
| "logps/chosen": -0.3065149486064911, |
| "logps/rejected": -0.3052757978439331, |
| "loss": 1.3163, |
| "rewards/accuracies": 0.5208333730697632, |
| "rewards/chosen": -0.6130298972129822, |
| "rewards/margins": -0.002478264272212982, |
| "rewards/rejected": -0.6105515956878662, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.49210580274759075, |
| "grad_norm": 3.3180651664733887, |
| "learning_rate": 8.397085610200364e-07, |
| "logits/chosen": -2.209411859512329, |
| "logits/rejected": -2.153970718383789, |
| "logps/chosen": -0.30137962102890015, |
| "logps/rejected": -0.3014276325702667, |
| "loss": 1.3146, |
| "rewards/accuracies": 0.5260417461395264, |
| "rewards/chosen": -0.6027592420578003, |
| "rewards/margins": 9.602296631783247e-05, |
| "rewards/rejected": -0.6028553247451782, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5019479188025425, |
| "grad_norm": 2.8915350437164307, |
| "learning_rate": 8.342440801457194e-07, |
| "logits/chosen": -2.2405731678009033, |
| "logits/rejected": -2.197042465209961, |
| "logps/chosen": -0.2927592694759369, |
| "logps/rejected": -0.2949146032333374, |
| "loss": 1.3112, |
| "rewards/accuracies": 0.5833333730697632, |
| "rewards/chosen": -0.5855185985565186, |
| "rewards/margins": 0.0043106647208333015, |
| "rewards/rejected": -0.5898292064666748, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.5117900348574944, |
| "grad_norm": 3.5988616943359375, |
| "learning_rate": 8.287795992714025e-07, |
| "logits/chosen": -2.556368112564087, |
| "logits/rejected": -2.5539543628692627, |
| "logps/chosen": -0.29329127073287964, |
| "logps/rejected": -0.30116719007492065, |
| "loss": 1.3027, |
| "rewards/accuracies": 0.5364583730697632, |
| "rewards/chosen": -0.5865825414657593, |
| "rewards/margins": 0.015751861035823822, |
| "rewards/rejected": -0.6023343801498413, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.5216321509124462, |
| "grad_norm": 3.1660470962524414, |
| "learning_rate": 8.233151183970856e-07, |
| "logits/chosen": -2.296084403991699, |
| "logits/rejected": -2.319601058959961, |
| "logps/chosen": -0.29829874634742737, |
| "logps/rejected": -0.3049090504646301, |
| "loss": 1.3047, |
| "rewards/accuracies": 0.5208333730697632, |
| "rewards/chosen": -0.5965974926948547, |
| "rewards/margins": 0.013220642693340778, |
| "rewards/rejected": -0.6098181009292603, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.531474266967398, |
| "grad_norm": 3.3348488807678223, |
| "learning_rate": 8.178506375227686e-07, |
| "logits/chosen": -2.1131765842437744, |
| "logits/rejected": -2.0808582305908203, |
| "logps/chosen": -0.3017235994338989, |
| "logps/rejected": -0.30382847785949707, |
| "loss": 1.3113, |
| "rewards/accuracies": 0.5104166865348816, |
| "rewards/chosen": -0.6034471988677979, |
| "rewards/margins": 0.004209776874631643, |
| "rewards/rejected": -0.6076569557189941, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.5413163830223499, |
| "grad_norm": 3.311103582382202, |
| "learning_rate": 8.123861566484517e-07, |
| "logits/chosen": -2.4615557193756104, |
| "logits/rejected": -2.393066644668579, |
| "logps/chosen": -0.296355664730072, |
| "logps/rejected": -0.30595722794532776, |
| "loss": 1.3007, |
| "rewards/accuracies": 0.5677083730697632, |
| "rewards/chosen": -0.592711329460144, |
| "rewards/margins": 0.01920315995812416, |
| "rewards/rejected": -0.6119144558906555, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.5511584990773016, |
| "grad_norm": 3.7719385623931885, |
| "learning_rate": 8.069216757741348e-07, |
| "logits/chosen": -2.6202847957611084, |
| "logits/rejected": -2.6656908988952637, |
| "logps/chosen": -0.299972265958786, |
| "logps/rejected": -0.2976400554180145, |
| "loss": 1.318, |
| "rewards/accuracies": 0.5104166865348816, |
| "rewards/chosen": -0.5999445915222168, |
| "rewards/margins": -0.004664432257413864, |
| "rewards/rejected": -0.595280110836029, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.5610006151322534, |
| "grad_norm": 2.891984462738037, |
| "learning_rate": 8.014571948998177e-07, |
| "logits/chosen": -2.2948923110961914, |
| "logits/rejected": -2.3426403999328613, |
| "logps/chosen": -0.2913352847099304, |
| "logps/rejected": -0.29462340474128723, |
| "loss": 1.3094, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -0.5826705098152161, |
| "rewards/margins": 0.006576266605407, |
| "rewards/rejected": -0.5892468690872192, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.5708427311872053, |
| "grad_norm": 3.1071033477783203, |
| "learning_rate": 7.959927140255008e-07, |
| "logits/chosen": -2.54439115524292, |
| "logits/rejected": -2.484774112701416, |
| "logps/chosen": -0.2987141013145447, |
| "logps/rejected": -0.29878202080726624, |
| "loss": 1.3143, |
| "rewards/accuracies": 0.5052083730697632, |
| "rewards/chosen": -0.5974282026290894, |
| "rewards/margins": 0.0001358254812657833, |
| "rewards/rejected": -0.5975640416145325, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.5806848472421571, |
| "grad_norm": 3.294955253601074, |
| "learning_rate": 7.905282331511839e-07, |
| "logits/chosen": -2.5297629833221436, |
| "logits/rejected": -2.6556806564331055, |
| "logps/chosen": -0.2948570251464844, |
| "logps/rejected": -0.292624831199646, |
| "loss": 1.3182, |
| "rewards/accuracies": 0.4687500298023224, |
| "rewards/chosen": -0.5897141098976135, |
| "rewards/margins": -0.004464422352612019, |
| "rewards/rejected": -0.585249662399292, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.5905269632971089, |
| "grad_norm": 3.587242364883423, |
| "learning_rate": 7.850637522768669e-07, |
| "logits/chosen": -2.43650484085083, |
| "logits/rejected": -2.4716460704803467, |
| "logps/chosen": -0.3128420114517212, |
| "logps/rejected": -0.30596795678138733, |
| "loss": 1.3251, |
| "rewards/accuracies": 0.4739583730697632, |
| "rewards/chosen": -0.6256840229034424, |
| "rewards/margins": -0.01374807208776474, |
| "rewards/rejected": -0.6119359135627747, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6003690793520607, |
| "grad_norm": 3.2697906494140625, |
| "learning_rate": 7.795992714025501e-07, |
| "logits/chosen": -2.6053969860076904, |
| "logits/rejected": -2.605846881866455, |
| "logps/chosen": -0.30022186040878296, |
| "logps/rejected": -0.2978324294090271, |
| "loss": 1.3181, |
| "rewards/accuracies": 0.46875, |
| "rewards/chosen": -0.6004437208175659, |
| "rewards/margins": -0.004778880625963211, |
| "rewards/rejected": -0.5956648588180542, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.6102111954070125, |
| "grad_norm": 5.84366512298584, |
| "learning_rate": 7.741347905282332e-07, |
| "logits/chosen": -2.2874748706817627, |
| "logits/rejected": -2.3448486328125, |
| "logps/chosen": -0.2971286177635193, |
| "logps/rejected": -0.29523584246635437, |
| "loss": 1.3177, |
| "rewards/accuracies": 0.46875, |
| "rewards/chosen": -0.5942572355270386, |
| "rewards/margins": -0.0037855547852814198, |
| "rewards/rejected": -0.590471625328064, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.6200533114619643, |
| "grad_norm": 3.066098213195801, |
| "learning_rate": 7.686703096539162e-07, |
| "logits/chosen": -2.271486520767212, |
| "logits/rejected": -2.32065486907959, |
| "logps/chosen": -0.29515424370765686, |
| "logps/rejected": -0.30097514390945435, |
| "loss": 1.306, |
| "rewards/accuracies": 0.4895833730697632, |
| "rewards/chosen": -0.5903085470199585, |
| "rewards/margins": 0.011641697026789188, |
| "rewards/rejected": -0.6019502878189087, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.6298954275169162, |
| "grad_norm": 3.2268269062042236, |
| "learning_rate": 7.632058287795993e-07, |
| "logits/chosen": -2.565883159637451, |
| "logits/rejected": -2.644766330718994, |
| "logps/chosen": -0.28512534499168396, |
| "logps/rejected": -0.28749996423721313, |
| "loss": 1.3108, |
| "rewards/accuracies": 0.4895833730697632, |
| "rewards/chosen": -0.5702506899833679, |
| "rewards/margins": 0.004749252460896969, |
| "rewards/rejected": -0.5749999284744263, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.639737543571868, |
| "grad_norm": 3.53078031539917, |
| "learning_rate": 7.577413479052824e-07, |
| "logits/chosen": -2.412142515182495, |
| "logits/rejected": -2.4293620586395264, |
| "logps/chosen": -0.2922815978527069, |
| "logps/rejected": -0.2959037125110626, |
| "loss": 1.3091, |
| "rewards/accuracies": 0.5364583730697632, |
| "rewards/chosen": -0.5845631957054138, |
| "rewards/margins": 0.0072442456148564816, |
| "rewards/rejected": -0.5918074250221252, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.6495796596268197, |
| "grad_norm": 2.991565227508545, |
| "learning_rate": 7.522768670309653e-07, |
| "logits/chosen": -2.1828391551971436, |
| "logits/rejected": -2.2708098888397217, |
| "logps/chosen": -0.2814570367336273, |
| "logps/rejected": -0.2823142409324646, |
| "loss": 1.3132, |
| "rewards/accuracies": 0.479166716337204, |
| "rewards/chosen": -0.5629140734672546, |
| "rewards/margins": 0.001714351586997509, |
| "rewards/rejected": -0.5646284222602844, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.6594217756817716, |
| "grad_norm": 3.2695488929748535, |
| "learning_rate": 7.468123861566484e-07, |
| "logits/chosen": -2.651029109954834, |
| "logits/rejected": -2.5657577514648438, |
| "logps/chosen": -0.2821059226989746, |
| "logps/rejected": -0.288613885641098, |
| "loss": 1.3049, |
| "rewards/accuracies": 0.5885416865348816, |
| "rewards/chosen": -0.564211905002594, |
| "rewards/margins": 0.013015862554311752, |
| "rewards/rejected": -0.577227771282196, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.6692638917367234, |
| "grad_norm": 3.027118444442749, |
| "learning_rate": 7.413479052823315e-07, |
| "logits/chosen": -2.4459362030029297, |
| "logits/rejected": -2.4375133514404297, |
| "logps/chosen": -0.2884170114994049, |
| "logps/rejected": -0.29369017481803894, |
| "loss": 1.3066, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -0.576833963394165, |
| "rewards/margins": 0.010546308942139149, |
| "rewards/rejected": -0.5873803496360779, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.6791060077916752, |
| "grad_norm": 3.216331720352173, |
| "learning_rate": 7.358834244080145e-07, |
| "logits/chosen": -2.2358179092407227, |
| "logits/rejected": -2.272609233856201, |
| "logps/chosen": -0.29196596145629883, |
| "logps/rejected": -0.29673081636428833, |
| "loss": 1.3078, |
| "rewards/accuracies": 0.5520833730697632, |
| "rewards/chosen": -0.5839319229125977, |
| "rewards/margins": 0.009529721923172474, |
| "rewards/rejected": -0.5934616327285767, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.6889481238466271, |
| "grad_norm": 3.1151554584503174, |
| "learning_rate": 7.304189435336976e-07, |
| "logits/chosen": -2.1243269443511963, |
| "logits/rejected": -2.25223708152771, |
| "logps/chosen": -0.28017833828926086, |
| "logps/rejected": -0.28699177503585815, |
| "loss": 1.3044, |
| "rewards/accuracies": 0.5729166865348816, |
| "rewards/chosen": -0.5603566765785217, |
| "rewards/margins": 0.013626816682517529, |
| "rewards/rejected": -0.5739835500717163, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.6987902399015788, |
| "grad_norm": 2.807512044906616, |
| "learning_rate": 7.249544626593807e-07, |
| "logits/chosen": -2.404712677001953, |
| "logits/rejected": -2.342726945877075, |
| "logps/chosen": -0.274359792470932, |
| "logps/rejected": -0.27572181820869446, |
| "loss": 1.3124, |
| "rewards/accuracies": 0.494791716337204, |
| "rewards/chosen": -0.548719584941864, |
| "rewards/margins": 0.002724003978073597, |
| "rewards/rejected": -0.5514436364173889, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.7086323559565306, |
| "grad_norm": 2.903782606124878, |
| "learning_rate": 7.194899817850637e-07, |
| "logits/chosen": -2.2218027114868164, |
| "logits/rejected": -2.2185091972351074, |
| "logps/chosen": -0.2856960594654083, |
| "logps/rejected": -0.2866899073123932, |
| "loss": 1.3127, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -0.5713921189308167, |
| "rewards/margins": 0.001987707568332553, |
| "rewards/rejected": -0.5733798742294312, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.7184744720114825, |
| "grad_norm": 2.9293315410614014, |
| "learning_rate": 7.140255009107468e-07, |
| "logits/chosen": -2.249828338623047, |
| "logits/rejected": -2.340233087539673, |
| "logps/chosen": -0.2771129608154297, |
| "logps/rejected": -0.2712464928627014, |
| "loss": 1.3229, |
| "rewards/accuracies": 0.4687500298023224, |
| "rewards/chosen": -0.5542259216308594, |
| "rewards/margins": -0.011732938699424267, |
| "rewards/rejected": -0.5424929857254028, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.7283165880664343, |
| "grad_norm": 3.209792137145996, |
| "learning_rate": 7.085610200364299e-07, |
| "logits/chosen": -2.6383891105651855, |
| "logits/rejected": -2.5945024490356445, |
| "logps/chosen": -0.275699257850647, |
| "logps/rejected": -0.2788482904434204, |
| "loss": 1.3099, |
| "rewards/accuracies": 0.5104166865348816, |
| "rewards/chosen": -0.551398515701294, |
| "rewards/margins": 0.0062980144284665585, |
| "rewards/rejected": -0.5576965808868408, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.7381587041213861, |
| "grad_norm": 3.3083884716033936, |
| "learning_rate": 7.030965391621128e-07, |
| "logits/chosen": -2.1804957389831543, |
| "logits/rejected": -2.2220664024353027, |
| "logps/chosen": -0.2919140160083771, |
| "logps/rejected": -0.28688621520996094, |
| "loss": 1.3218, |
| "rewards/accuracies": 0.4739583432674408, |
| "rewards/chosen": -0.5838280320167542, |
| "rewards/margins": -0.01005559042096138, |
| "rewards/rejected": -0.5737724304199219, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.748000820176338, |
| "grad_norm": 3.429899215698242, |
| "learning_rate": 6.976320582877959e-07, |
| "logits/chosen": -2.5299549102783203, |
| "logits/rejected": -2.5133347511291504, |
| "logps/chosen": -0.27780574560165405, |
| "logps/rejected": -0.2840309739112854, |
| "loss": 1.3053, |
| "rewards/accuracies": 0.4635416865348816, |
| "rewards/chosen": -0.5556114912033081, |
| "rewards/margins": 0.01245046779513359, |
| "rewards/rejected": -0.568061888217926, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.7578429362312897, |
| "grad_norm": 3.071539878845215, |
| "learning_rate": 6.92167577413479e-07, |
| "logits/chosen": -2.312579393386841, |
| "logits/rejected": -2.415515422821045, |
| "logps/chosen": -0.27620574831962585, |
| "logps/rejected": -0.27957403659820557, |
| "loss": 1.3096, |
| "rewards/accuracies": 0.5364583730697632, |
| "rewards/chosen": -0.5524114966392517, |
| "rewards/margins": 0.006736626382917166, |
| "rewards/rejected": -0.5591480731964111, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.7676850522862415, |
| "grad_norm": 3.4358389377593994, |
| "learning_rate": 6.86703096539162e-07, |
| "logits/chosen": -2.452547550201416, |
| "logits/rejected": -2.4726529121398926, |
| "logps/chosen": -0.284848153591156, |
| "logps/rejected": -0.2895206809043884, |
| "loss": 1.3076, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -0.569696307182312, |
| "rewards/margins": 0.009345123544335365, |
| "rewards/rejected": -0.5790413618087769, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.7775271683411934, |
| "grad_norm": 3.199431896209717, |
| "learning_rate": 6.812386156648452e-07, |
| "logits/chosen": -2.2307732105255127, |
| "logits/rejected": -2.2812376022338867, |
| "logps/chosen": -0.29158759117126465, |
| "logps/rejected": -0.2949972152709961, |
| "loss": 1.3096, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -0.5831751823425293, |
| "rewards/margins": 0.006819295696914196, |
| "rewards/rejected": -0.5899944305419922, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.7873692843961452, |
| "grad_norm": 2.6213815212249756, |
| "learning_rate": 6.757741347905283e-07, |
| "logits/chosen": -2.1308844089508057, |
| "logits/rejected": -2.17814302444458, |
| "logps/chosen": -0.27422723174095154, |
| "logps/rejected": -0.2723295986652374, |
| "loss": 1.3171, |
| "rewards/accuracies": 0.5104166865348816, |
| "rewards/chosen": -0.5484545230865479, |
| "rewards/margins": -0.0037952661514282227, |
| "rewards/rejected": -0.5446591973304749, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.797211400451097, |
| "grad_norm": 2.6146914958953857, |
| "learning_rate": 6.703096539162113e-07, |
| "logits/chosen": -2.217012405395508, |
| "logits/rejected": -2.30794095993042, |
| "logps/chosen": -0.2751448452472687, |
| "logps/rejected": -0.2789701223373413, |
| "loss": 1.3088, |
| "rewards/accuracies": 0.5052083730697632, |
| "rewards/chosen": -0.5502896904945374, |
| "rewards/margins": 0.007650562096387148, |
| "rewards/rejected": -0.5579402446746826, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.8070535165060488, |
| "grad_norm": 3.266503095626831, |
| "learning_rate": 6.648451730418944e-07, |
| "logits/chosen": -2.3135907649993896, |
| "logits/rejected": -2.2477428913116455, |
| "logps/chosen": -0.2836703360080719, |
| "logps/rejected": -0.28257811069488525, |
| "loss": 1.3161, |
| "rewards/accuracies": 0.4739583730697632, |
| "rewards/chosen": -0.5673407316207886, |
| "rewards/margins": -0.002184514421969652, |
| "rewards/rejected": -0.5651561617851257, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.8168956325610006, |
| "grad_norm": 3.104182481765747, |
| "learning_rate": 6.593806921675775e-07, |
| "logits/chosen": -2.328064441680908, |
| "logits/rejected": -2.3774280548095703, |
| "logps/chosen": -0.276460200548172, |
| "logps/rejected": -0.2795277237892151, |
| "loss": 1.3097, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -0.552920401096344, |
| "rewards/margins": 0.006135078612715006, |
| "rewards/rejected": -0.559055507183075, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.8267377486159524, |
| "grad_norm": 3.145758867263794, |
| "learning_rate": 6.539162112932604e-07, |
| "logits/chosen": -2.0514845848083496, |
| "logits/rejected": -2.1181387901306152, |
| "logps/chosen": -0.2676684558391571, |
| "logps/rejected": -0.2710112929344177, |
| "loss": 1.3094, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -0.5353369116783142, |
| "rewards/margins": 0.006685652770102024, |
| "rewards/rejected": -0.5420225262641907, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.8365798646709043, |
| "grad_norm": 2.8558096885681152, |
| "learning_rate": 6.484517304189435e-07, |
| "logits/chosen": -2.133450984954834, |
| "logits/rejected": -2.174488067626953, |
| "logps/chosen": -0.2785017192363739, |
| "logps/rejected": -0.27986931800842285, |
| "loss": 1.3122, |
| "rewards/accuracies": 0.4895833730697632, |
| "rewards/chosen": -0.557003378868103, |
| "rewards/margins": 0.0027352517936378717, |
| "rewards/rejected": -0.5597386360168457, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.8464219807258561, |
| "grad_norm": 2.6255104541778564, |
| "learning_rate": 6.429872495446266e-07, |
| "logits/chosen": -2.120138168334961, |
| "logits/rejected": -2.167184829711914, |
| "logps/chosen": -0.2761051654815674, |
| "logps/rejected": -0.27183249592781067, |
| "loss": 1.3209, |
| "rewards/accuracies": 0.4843750298023224, |
| "rewards/chosen": -0.5522103309631348, |
| "rewards/margins": -0.008545313030481339, |
| "rewards/rejected": -0.5436649918556213, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.8562640967808078, |
| "grad_norm": 3.049882173538208, |
| "learning_rate": 6.375227686703096e-07, |
| "logits/chosen": -2.144340991973877, |
| "logits/rejected": -2.2074198722839355, |
| "logps/chosen": -0.28039246797561646, |
| "logps/rejected": -0.2806190848350525, |
| "loss": 1.314, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -0.5607849359512329, |
| "rewards/margins": 0.00045326724648475647, |
| "rewards/rejected": -0.5612382292747498, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.8661062128357597, |
| "grad_norm": 3.1524369716644287, |
| "learning_rate": 6.320582877959927e-07, |
| "logits/chosen": -2.2970197200775146, |
| "logits/rejected": -2.2662580013275146, |
| "logps/chosen": -0.2740846872329712, |
| "logps/rejected": -0.27690619230270386, |
| "loss": 1.3101, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -0.5481693744659424, |
| "rewards/margins": 0.005643073469400406, |
| "rewards/rejected": -0.5538123846054077, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.8759483288907115, |
| "grad_norm": 2.8429999351501465, |
| "learning_rate": 6.265938069216758e-07, |
| "logits/chosen": -2.36765456199646, |
| "logits/rejected": -2.348548650741577, |
| "logps/chosen": -0.2780293822288513, |
| "logps/rejected": -0.2812359631061554, |
| "loss": 1.3096, |
| "rewards/accuracies": 0.5104166865348816, |
| "rewards/chosen": -0.5560587644577026, |
| "rewards/margins": 0.00641320226714015, |
| "rewards/rejected": -0.5624719262123108, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.8857904449456633, |
| "grad_norm": 3.0595552921295166, |
| "learning_rate": 6.211293260473588e-07, |
| "logits/chosen": -2.509822368621826, |
| "logits/rejected": -2.49569034576416, |
| "logps/chosen": -0.2772996425628662, |
| "logps/rejected": -0.28799355030059814, |
| "loss": 1.299, |
| "rewards/accuracies": 0.5729166865348816, |
| "rewards/chosen": -0.5545992851257324, |
| "rewards/margins": 0.021387770771980286, |
| "rewards/rejected": -0.5759870409965515, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.8956325610006152, |
| "grad_norm": 3.2352564334869385, |
| "learning_rate": 6.156648451730419e-07, |
| "logits/chosen": -2.1871042251586914, |
| "logits/rejected": -2.1954808235168457, |
| "logps/chosen": -0.27809447050094604, |
| "logps/rejected": -0.2834944427013397, |
| "loss": 1.3066, |
| "rewards/accuracies": 0.5364583730697632, |
| "rewards/chosen": -0.5561889410018921, |
| "rewards/margins": 0.010799943469464779, |
| "rewards/rejected": -0.5669888854026794, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.905474677055567, |
| "grad_norm": 2.931501865386963, |
| "learning_rate": 6.102003642987249e-07, |
| "logits/chosen": -2.225825309753418, |
| "logits/rejected": -2.2486400604248047, |
| "logps/chosen": -0.280958890914917, |
| "logps/rejected": -0.2846115827560425, |
| "loss": 1.3091, |
| "rewards/accuracies": 0.5052083730697632, |
| "rewards/chosen": -0.561917781829834, |
| "rewards/margins": 0.0073054153472185135, |
| "rewards/rejected": -0.569223165512085, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.9153167931105187, |
| "grad_norm": 3.286054849624634, |
| "learning_rate": 6.047358834244079e-07, |
| "logits/chosen": -2.196420669555664, |
| "logits/rejected": -2.27847957611084, |
| "logps/chosen": -0.2832242250442505, |
| "logps/rejected": -0.28931811451911926, |
| "loss": 1.306, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -0.5664485096931458, |
| "rewards/margins": 0.012187773361802101, |
| "rewards/rejected": -0.5786362290382385, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.9251589091654706, |
| "grad_norm": 2.9873507022857666, |
| "learning_rate": 5.99271402550091e-07, |
| "logits/chosen": -2.2308907508850098, |
| "logits/rejected": -2.211625576019287, |
| "logps/chosen": -0.28117528557777405, |
| "logps/rejected": -0.28368309140205383, |
| "loss": 1.3107, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -0.5623506307601929, |
| "rewards/margins": 0.005015634000301361, |
| "rewards/rejected": -0.5673662424087524, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.9350010252204224, |
| "grad_norm": 3.100102663040161, |
| "learning_rate": 5.93806921675774e-07, |
| "logits/chosen": -2.177976369857788, |
| "logits/rejected": -2.0842134952545166, |
| "logps/chosen": -0.27946293354034424, |
| "logps/rejected": -0.28198277950286865, |
| "loss": 1.3106, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -0.5589258670806885, |
| "rewards/margins": 0.005039653740823269, |
| "rewards/rejected": -0.5639654994010925, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.9448431412753742, |
| "grad_norm": 3.292073965072632, |
| "learning_rate": 5.883424408014571e-07, |
| "logits/chosen": -2.5255026817321777, |
| "logits/rejected": -2.5117194652557373, |
| "logps/chosen": -0.27470558881759644, |
| "logps/rejected": -0.2794868052005768, |
| "loss": 1.3077, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.5494111776351929, |
| "rewards/margins": 0.009562441147863865, |
| "rewards/rejected": -0.5589736104011536, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.954685257330326, |
| "grad_norm": 2.966644048690796, |
| "learning_rate": 5.828779599271403e-07, |
| "logits/chosen": -2.282688856124878, |
| "logits/rejected": -2.3302011489868164, |
| "logps/chosen": -0.281721293926239, |
| "logps/rejected": -0.2839483320713043, |
| "loss": 1.311, |
| "rewards/accuracies": 0.5052083134651184, |
| "rewards/chosen": -0.563442587852478, |
| "rewards/margins": 0.004454084672033787, |
| "rewards/rejected": -0.5678966641426086, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.9645273733852778, |
| "grad_norm": 3.1071043014526367, |
| "learning_rate": 5.774134790528234e-07, |
| "logits/chosen": -2.2296414375305176, |
| "logits/rejected": -2.2465267181396484, |
| "logps/chosen": -0.27823498845100403, |
| "logps/rejected": -0.27505937218666077, |
| "loss": 1.319, |
| "rewards/accuracies": 0.4895833730697632, |
| "rewards/chosen": -0.5564700365066528, |
| "rewards/margins": -0.006351290736347437, |
| "rewards/rejected": -0.5501187443733215, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.9743694894402296, |
| "grad_norm": 3.342630386352539, |
| "learning_rate": 5.719489981785064e-07, |
| "logits/chosen": -2.301790714263916, |
| "logits/rejected": -2.232879638671875, |
| "logps/chosen": -0.28276318311691284, |
| "logps/rejected": -0.2836660146713257, |
| "loss": 1.3133, |
| "rewards/accuracies": 0.5729166865348816, |
| "rewards/chosen": -0.5655263662338257, |
| "rewards/margins": 0.0018056412227451801, |
| "rewards/rejected": -0.5673320293426514, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.9842116054951815, |
| "grad_norm": 2.8772573471069336, |
| "learning_rate": 5.664845173041895e-07, |
| "logits/chosen": -2.7162556648254395, |
| "logits/rejected": -2.6399483680725098, |
| "logps/chosen": -0.276977002620697, |
| "logps/rejected": -0.27825504541397095, |
| "loss": 1.3131, |
| "rewards/accuracies": 0.4791666865348816, |
| "rewards/chosen": -0.553954005241394, |
| "rewards/margins": 0.00255610141903162, |
| "rewards/rejected": -0.5565100908279419, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.9940537215501333, |
| "grad_norm": 2.8912346363067627, |
| "learning_rate": 5.610200364298725e-07, |
| "logits/chosen": -2.5115840435028076, |
| "logits/rejected": -2.4576642513275146, |
| "logps/chosen": -0.2784987688064575, |
| "logps/rejected": -0.2799062728881836, |
| "loss": 1.3124, |
| "rewards/accuracies": 0.5364583730697632, |
| "rewards/chosen": -0.556997537612915, |
| "rewards/margins": 0.00281504332087934, |
| "rewards/rejected": -0.5598125457763672, |
| "step": 303 |
| }, |
| { |
| "epoch": 1.0032807053516506, |
| "grad_norm": 3.153007745742798, |
| "learning_rate": 5.555555555555555e-07, |
| "logits/chosen": -2.0730063915252686, |
| "logits/rejected": -2.148167371749878, |
| "logps/chosen": -0.28567373752593994, |
| "logps/rejected": -0.30247652530670166, |
| "loss": 1.21, |
| "rewards/accuracies": 0.6166666746139526, |
| "rewards/chosen": -0.5713474750518799, |
| "rewards/margins": 0.03360557556152344, |
| "rewards/rejected": -0.6049530506134033, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.0131228214066024, |
| "grad_norm": 3.5555901527404785, |
| "learning_rate": 5.500910746812386e-07, |
| "logits/chosen": -2.525221824645996, |
| "logits/rejected": -2.485888957977295, |
| "logps/chosen": -0.26666826009750366, |
| "logps/rejected": -0.29451531171798706, |
| "loss": 1.2742, |
| "rewards/accuracies": 0.6822916865348816, |
| "rewards/chosen": -0.5333365201950073, |
| "rewards/margins": 0.055694133043289185, |
| "rewards/rejected": -0.5890306234359741, |
| "step": 309 |
| }, |
| { |
| "epoch": 1.0229649374615541, |
| "grad_norm": 3.351257085800171, |
| "learning_rate": 5.446265938069217e-07, |
| "logits/chosen": -2.073094129562378, |
| "logits/rejected": -2.082315444946289, |
| "logps/chosen": -0.2812093496322632, |
| "logps/rejected": -0.30162906646728516, |
| "loss": 1.2852, |
| "rewards/accuracies": 0.6197916865348816, |
| "rewards/chosen": -0.5624186992645264, |
| "rewards/margins": 0.04083945229649544, |
| "rewards/rejected": -0.6032581329345703, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.0328070535165061, |
| "grad_norm": 3.3620002269744873, |
| "learning_rate": 5.391621129326047e-07, |
| "logits/chosen": -2.327975034713745, |
| "logits/rejected": -2.2632720470428467, |
| "logps/chosen": -0.28332576155662537, |
| "logps/rejected": -0.30366650223731995, |
| "loss": 1.2851, |
| "rewards/accuracies": 0.6614583730697632, |
| "rewards/chosen": -0.5666515231132507, |
| "rewards/margins": 0.04068151116371155, |
| "rewards/rejected": -0.6073330044746399, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.042649169571458, |
| "grad_norm": 3.8589353561401367, |
| "learning_rate": 5.336976320582878e-07, |
| "logits/chosen": -2.3901515007019043, |
| "logits/rejected": -2.3333704471588135, |
| "logps/chosen": -0.27824974060058594, |
| "logps/rejected": -0.3028562366962433, |
| "loss": 1.2793, |
| "rewards/accuracies": 0.6510416865348816, |
| "rewards/chosen": -0.5564994215965271, |
| "rewards/margins": 0.04921308159828186, |
| "rewards/rejected": -0.6057125329971313, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.0524912856264097, |
| "grad_norm": 2.8481204509735107, |
| "learning_rate": 5.282331511839709e-07, |
| "logits/chosen": -2.478609323501587, |
| "logits/rejected": -2.4115850925445557, |
| "logps/chosen": -0.2729317545890808, |
| "logps/rejected": -0.2993568778038025, |
| "loss": 1.2768, |
| "rewards/accuracies": 0.6354166865348816, |
| "rewards/chosen": -0.5458635687828064, |
| "rewards/margins": 0.05285021662712097, |
| "rewards/rejected": -0.598713755607605, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.0623334016813615, |
| "grad_norm": 3.818915367126465, |
| "learning_rate": 5.227686703096539e-07, |
| "logits/chosen": -2.4622111320495605, |
| "logits/rejected": -2.5216636657714844, |
| "logps/chosen": -0.27243781089782715, |
| "logps/rejected": -0.3012939691543579, |
| "loss": 1.2731, |
| "rewards/accuracies": 0.6770833730697632, |
| "rewards/chosen": -0.5448756217956543, |
| "rewards/margins": 0.05771230533719063, |
| "rewards/rejected": -0.6025879383087158, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.0721755177363133, |
| "grad_norm": 4.67787504196167, |
| "learning_rate": 5.17304189435337e-07, |
| "logits/chosen": -2.3278470039367676, |
| "logits/rejected": -2.316049575805664, |
| "logps/chosen": -0.29923686385154724, |
| "logps/rejected": -0.3328157663345337, |
| "loss": 1.2673, |
| "rewards/accuracies": 0.6510417461395264, |
| "rewards/chosen": -0.5984737277030945, |
| "rewards/margins": 0.06715783476829529, |
| "rewards/rejected": -0.6656315326690674, |
| "step": 327 |
| }, |
| { |
| "epoch": 1.082017633791265, |
| "grad_norm": 4.611724853515625, |
| "learning_rate": 5.118397085610199e-07, |
| "logits/chosen": -2.601393461227417, |
| "logits/rejected": -2.50719952583313, |
| "logps/chosen": -0.2986273169517517, |
| "logps/rejected": -0.3138871192932129, |
| "loss": 1.2934, |
| "rewards/accuracies": 0.5520833730697632, |
| "rewards/chosen": -0.5972546339035034, |
| "rewards/margins": 0.030519628897309303, |
| "rewards/rejected": -0.6277742385864258, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.091859749846217, |
| "grad_norm": 4.721919536590576, |
| "learning_rate": 5.06375227686703e-07, |
| "logits/chosen": -2.552699565887451, |
| "logits/rejected": -2.6258749961853027, |
| "logps/chosen": -0.28720822930336, |
| "logps/rejected": -0.3122625946998596, |
| "loss": 1.2794, |
| "rewards/accuracies": 0.6041666865348816, |
| "rewards/chosen": -0.5744165182113647, |
| "rewards/margins": 0.050108686089515686, |
| "rewards/rejected": -0.6245251893997192, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.1017018659011688, |
| "grad_norm": 5.033515453338623, |
| "learning_rate": 5.009107468123861e-07, |
| "logits/chosen": -2.828622817993164, |
| "logits/rejected": -2.8359427452087402, |
| "logps/chosen": -0.29961028695106506, |
| "logps/rejected": -0.3207009732723236, |
| "loss": 1.2851, |
| "rewards/accuracies": 0.5885416865348816, |
| "rewards/chosen": -0.5992205739021301, |
| "rewards/margins": 0.04218133166432381, |
| "rewards/rejected": -0.641402006149292, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.1115439819561206, |
| "grad_norm": 5.343788146972656, |
| "learning_rate": 4.954462659380693e-07, |
| "logits/chosen": -3.03733229637146, |
| "logits/rejected": -3.0399529933929443, |
| "logps/chosen": -0.30108141899108887, |
| "logps/rejected": -0.32776015996932983, |
| "loss": 1.2779, |
| "rewards/accuracies": 0.5885416865348816, |
| "rewards/chosen": -0.602162778377533, |
| "rewards/margins": 0.05335747450590134, |
| "rewards/rejected": -0.6555203199386597, |
| "step": 339 |
| }, |
| { |
| "epoch": 1.1213860980110724, |
| "grad_norm": 5.3723602294921875, |
| "learning_rate": 4.899817850637522e-07, |
| "logits/chosen": -2.981997013092041, |
| "logits/rejected": -2.949530839920044, |
| "logps/chosen": -0.3047109544277191, |
| "logps/rejected": -0.33689817786216736, |
| "loss": 1.2696, |
| "rewards/accuracies": 0.6197917461395264, |
| "rewards/chosen": -0.6094219088554382, |
| "rewards/margins": 0.06437446177005768, |
| "rewards/rejected": -0.6737963557243347, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.1312282140660241, |
| "grad_norm": 4.727919101715088, |
| "learning_rate": 4.845173041894353e-07, |
| "logits/chosen": -2.695420980453491, |
| "logits/rejected": -2.6776719093322754, |
| "logps/chosen": -0.30032098293304443, |
| "logps/rejected": -0.3254402279853821, |
| "loss": 1.2794, |
| "rewards/accuracies": 0.5677083730697632, |
| "rewards/chosen": -0.6006419658660889, |
| "rewards/margins": 0.0502384789288044, |
| "rewards/rejected": -0.6508804559707642, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.141070330120976, |
| "grad_norm": 5.740645885467529, |
| "learning_rate": 4.790528233151183e-07, |
| "logits/chosen": -2.89811372756958, |
| "logits/rejected": -2.8297605514526367, |
| "logps/chosen": -0.3110088109970093, |
| "logps/rejected": -0.3469686210155487, |
| "loss": 1.2643, |
| "rewards/accuracies": 0.6822916865348816, |
| "rewards/chosen": -0.6220176219940186, |
| "rewards/margins": 0.07191960513591766, |
| "rewards/rejected": -0.6939372420310974, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.150912446175928, |
| "grad_norm": 5.677977561950684, |
| "learning_rate": 4.735883424408014e-07, |
| "logits/chosen": -2.919649362564087, |
| "logits/rejected": -2.7204208374023438, |
| "logps/chosen": -0.31897199153900146, |
| "logps/rejected": -0.3537505269050598, |
| "loss": 1.2661, |
| "rewards/accuracies": 0.6510416865348816, |
| "rewards/chosen": -0.6379439830780029, |
| "rewards/margins": 0.06955704838037491, |
| "rewards/rejected": -0.7075010538101196, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.1607545622308797, |
| "grad_norm": 5.252042293548584, |
| "learning_rate": 4.681238615664845e-07, |
| "logits/chosen": -2.69753360748291, |
| "logits/rejected": -2.699644088745117, |
| "logps/chosen": -0.3222702145576477, |
| "logps/rejected": -0.3525330424308777, |
| "loss": 1.2736, |
| "rewards/accuracies": 0.6145833730697632, |
| "rewards/chosen": -0.6445404291152954, |
| "rewards/margins": 0.06052564084529877, |
| "rewards/rejected": -0.7050661444664001, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.1705966782858315, |
| "grad_norm": 6.118551254272461, |
| "learning_rate": 4.6265938069216755e-07, |
| "logits/chosen": -2.956627368927002, |
| "logits/rejected": -2.8907008171081543, |
| "logps/chosen": -0.31340500712394714, |
| "logps/rejected": -0.3473677635192871, |
| "loss": 1.2679, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -0.6268100738525391, |
| "rewards/margins": 0.06792548298835754, |
| "rewards/rejected": -0.6947354674339294, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.1804387943407832, |
| "grad_norm": 6.526240348815918, |
| "learning_rate": 4.5719489981785067e-07, |
| "logits/chosen": -2.988039493560791, |
| "logits/rejected": -2.947343349456787, |
| "logps/chosen": -0.3325228691101074, |
| "logps/rejected": -0.3738601505756378, |
| "loss": 1.2573, |
| "rewards/accuracies": 0.6354166865348816, |
| "rewards/chosen": -0.6650457382202148, |
| "rewards/margins": 0.08267448842525482, |
| "rewards/rejected": -0.7477203011512756, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.190280910395735, |
| "grad_norm": 6.204545497894287, |
| "learning_rate": 4.517304189435337e-07, |
| "logits/chosen": -2.952932357788086, |
| "logits/rejected": -2.989872455596924, |
| "logps/chosen": -0.34380555152893066, |
| "logps/rejected": -0.37077945470809937, |
| "loss": 1.2777, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -0.6876111030578613, |
| "rewards/margins": 0.053947802633047104, |
| "rewards/rejected": -0.7415589094161987, |
| "step": 363 |
| }, |
| { |
| "epoch": 1.2001230264506868, |
| "grad_norm": 9.464421272277832, |
| "learning_rate": 4.4626593806921675e-07, |
| "logits/chosen": -3.19895601272583, |
| "logits/rejected": -3.2810332775115967, |
| "logps/chosen": -0.3429660499095917, |
| "logps/rejected": -0.37942421436309814, |
| "loss": 1.2644, |
| "rewards/accuracies": 0.6354166865348816, |
| "rewards/chosen": -0.6859321594238281, |
| "rewards/margins": 0.07291623204946518, |
| "rewards/rejected": -0.7588483095169067, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.2099651425056388, |
| "grad_norm": 7.448987007141113, |
| "learning_rate": 4.408014571948998e-07, |
| "logits/chosen": -2.8745474815368652, |
| "logits/rejected": -2.9124350547790527, |
| "logps/chosen": -0.3625420033931732, |
| "logps/rejected": -0.40313583612442017, |
| "loss": 1.2592, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.7250840663909912, |
| "rewards/margins": 0.0811876431107521, |
| "rewards/rejected": -0.8062716722488403, |
| "step": 369 |
| }, |
| { |
| "epoch": 1.2198072585605906, |
| "grad_norm": 8.745219230651855, |
| "learning_rate": 4.353369763205829e-07, |
| "logits/chosen": -3.0614566802978516, |
| "logits/rejected": -3.113219738006592, |
| "logps/chosen": -0.3707306683063507, |
| "logps/rejected": -0.40356966853141785, |
| "loss": 1.2707, |
| "rewards/accuracies": 0.6458333730697632, |
| "rewards/chosen": -0.7414613962173462, |
| "rewards/margins": 0.06567797809839249, |
| "rewards/rejected": -0.8071393966674805, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.2296493746155424, |
| "grad_norm": 6.997180461883545, |
| "learning_rate": 4.298724954462659e-07, |
| "logits/chosen": -2.9044456481933594, |
| "logits/rejected": -3.052482843399048, |
| "logps/chosen": -0.3646923005580902, |
| "logps/rejected": -0.39414650201797485, |
| "loss": 1.2752, |
| "rewards/accuracies": 0.6354166865348816, |
| "rewards/chosen": -0.7293846011161804, |
| "rewards/margins": 0.05890839919447899, |
| "rewards/rejected": -0.7882929444313049, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.2394914906704941, |
| "grad_norm": 10.55185317993164, |
| "learning_rate": 4.2440801457194896e-07, |
| "logits/chosen": -3.2242932319641113, |
| "logits/rejected": -3.0834662914276123, |
| "logps/chosen": -0.4069775640964508, |
| "logps/rejected": -0.463001012802124, |
| "loss": 1.2405, |
| "rewards/accuracies": 0.6354166865348816, |
| "rewards/chosen": -0.8139551281929016, |
| "rewards/margins": 0.11204691231250763, |
| "rewards/rejected": -0.9260020852088928, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.249333606725446, |
| "grad_norm": 20.299375534057617, |
| "learning_rate": 4.1894353369763203e-07, |
| "logits/chosen": -3.1378321647644043, |
| "logits/rejected": -3.108181953430176, |
| "logps/chosen": -0.40514567494392395, |
| "logps/rejected": -0.44149208068847656, |
| "loss": 1.2673, |
| "rewards/accuracies": 0.6458333730697632, |
| "rewards/chosen": -0.8102913498878479, |
| "rewards/margins": 0.07269280403852463, |
| "rewards/rejected": -0.8829841613769531, |
| "step": 381 |
| }, |
| { |
| "epoch": 1.2591757227803977, |
| "grad_norm": 8.039090156555176, |
| "learning_rate": 4.134790528233151e-07, |
| "logits/chosen": -2.9066882133483887, |
| "logits/rejected": -2.9090116024017334, |
| "logps/chosen": -0.4023885130882263, |
| "logps/rejected": -0.44079360365867615, |
| "loss": 1.2646, |
| "rewards/accuracies": 0.5989583730697632, |
| "rewards/chosen": -0.8047770261764526, |
| "rewards/margins": 0.07681018859148026, |
| "rewards/rejected": -0.8815872073173523, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.2690178388353495, |
| "grad_norm": 9.490564346313477, |
| "learning_rate": 4.0801457194899816e-07, |
| "logits/chosen": -3.021369457244873, |
| "logits/rejected": -2.905494213104248, |
| "logps/chosen": -0.409457266330719, |
| "logps/rejected": -0.4580920338630676, |
| "loss": 1.2519, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -0.818914532661438, |
| "rewards/margins": 0.09726953506469727, |
| "rewards/rejected": -0.9161840677261353, |
| "step": 387 |
| }, |
| { |
| "epoch": 1.2788599548903015, |
| "grad_norm": 9.488383293151855, |
| "learning_rate": 4.0255009107468123e-07, |
| "logits/chosen": -3.3218090534210205, |
| "logits/rejected": -3.2660317420959473, |
| "logps/chosen": -0.41564124822616577, |
| "logps/rejected": -0.43702036142349243, |
| "loss": 1.2879, |
| "rewards/accuracies": 0.6145833730697632, |
| "rewards/chosen": -0.8312824368476868, |
| "rewards/margins": 0.04275830462574959, |
| "rewards/rejected": -0.8740407228469849, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.2887020709452532, |
| "grad_norm": 9.484984397888184, |
| "learning_rate": 3.970856102003643e-07, |
| "logits/chosen": -3.089040756225586, |
| "logits/rejected": -3.155805826187134, |
| "logps/chosen": -0.4346025586128235, |
| "logps/rejected": -0.4728645384311676, |
| "loss": 1.2674, |
| "rewards/accuracies": 0.5729166865348816, |
| "rewards/chosen": -0.869205117225647, |
| "rewards/margins": 0.07652393728494644, |
| "rewards/rejected": -0.9457290768623352, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.298544187000205, |
| "grad_norm": 10.901666641235352, |
| "learning_rate": 3.9162112932604736e-07, |
| "logits/chosen": -2.9061741828918457, |
| "logits/rejected": -2.9783828258514404, |
| "logps/chosen": -0.4548156261444092, |
| "logps/rejected": -0.4865460991859436, |
| "loss": 1.2736, |
| "rewards/accuracies": 0.5885416865348816, |
| "rewards/chosen": -0.9096312522888184, |
| "rewards/margins": 0.06346089392900467, |
| "rewards/rejected": -0.9730921387672424, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.3083863030551568, |
| "grad_norm": 10.082768440246582, |
| "learning_rate": 3.8615664845173043e-07, |
| "logits/chosen": -3.2704148292541504, |
| "logits/rejected": -3.3333544731140137, |
| "logps/chosen": -0.4401400685310364, |
| "logps/rejected": -0.47681349515914917, |
| "loss": 1.2666, |
| "rewards/accuracies": 0.6302083730697632, |
| "rewards/chosen": -0.8802801370620728, |
| "rewards/margins": 0.07334680110216141, |
| "rewards/rejected": -0.9536269903182983, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.3182284191101088, |
| "grad_norm": 8.94642448425293, |
| "learning_rate": 3.8069216757741344e-07, |
| "logits/chosen": -3.1391327381134033, |
| "logits/rejected": -3.2130041122436523, |
| "logps/chosen": -0.405760794878006, |
| "logps/rejected": -0.4274853765964508, |
| "loss": 1.289, |
| "rewards/accuracies": 0.5208333730697632, |
| "rewards/chosen": -0.8115215301513672, |
| "rewards/margins": 0.043449223041534424, |
| "rewards/rejected": -0.8549707531929016, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.3280705351650606, |
| "grad_norm": 8.592528343200684, |
| "learning_rate": 3.752276867030965e-07, |
| "logits/chosen": -2.9488821029663086, |
| "logits/rejected": -2.944669485092163, |
| "logps/chosen": -0.40197545289993286, |
| "logps/rejected": -0.4468863308429718, |
| "loss": 1.2564, |
| "rewards/accuracies": 0.6458333730697632, |
| "rewards/chosen": -0.8039509654045105, |
| "rewards/margins": 0.08982174843549728, |
| "rewards/rejected": -0.8937727212905884, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.3379126512200123, |
| "grad_norm": 8.592171669006348, |
| "learning_rate": 3.697632058287796e-07, |
| "logits/chosen": -3.245591878890991, |
| "logits/rejected": -3.060716390609741, |
| "logps/chosen": -0.39691171050071716, |
| "logps/rejected": -0.45668381452560425, |
| "loss": 1.2334, |
| "rewards/accuracies": 0.6927083730697632, |
| "rewards/chosen": -0.7938233613967896, |
| "rewards/margins": 0.11954419314861298, |
| "rewards/rejected": -0.9133676290512085, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.3477547672749641, |
| "grad_norm": 8.549724578857422, |
| "learning_rate": 3.6429872495446264e-07, |
| "logits/chosen": -3.181145191192627, |
| "logits/rejected": -3.142042875289917, |
| "logps/chosen": -0.4377954304218292, |
| "logps/rejected": -0.48063966631889343, |
| "loss": 1.2598, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.8755908012390137, |
| "rewards/margins": 0.085688516497612, |
| "rewards/rejected": -0.9612793326377869, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.357596883329916, |
| "grad_norm": 9.25687026977539, |
| "learning_rate": 3.5883424408014566e-07, |
| "logits/chosen": -2.8549139499664307, |
| "logits/rejected": -2.8353519439697266, |
| "logps/chosen": -0.43043509125709534, |
| "logps/rejected": -0.47761547565460205, |
| "loss": 1.252, |
| "rewards/accuracies": 0.6041666865348816, |
| "rewards/chosen": -0.8608702421188354, |
| "rewards/margins": 0.09436076134443283, |
| "rewards/rejected": -0.9552309513092041, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.3674389993848677, |
| "grad_norm": 10.600887298583984, |
| "learning_rate": 3.533697632058288e-07, |
| "logits/chosen": -3.306717872619629, |
| "logits/rejected": -3.220644235610962, |
| "logps/chosen": -0.44229361414909363, |
| "logps/rejected": -0.4992312490940094, |
| "loss": 1.2405, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -0.884587287902832, |
| "rewards/margins": 0.11387524008750916, |
| "rewards/rejected": -0.9984624981880188, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.3772811154398195, |
| "grad_norm": 9.458456993103027, |
| "learning_rate": 3.4790528233151184e-07, |
| "logits/chosen": -3.274623394012451, |
| "logits/rejected": -3.216095447540283, |
| "logps/chosen": -0.42786625027656555, |
| "logps/rejected": -0.47491148114204407, |
| "loss": 1.2528, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.8557324409484863, |
| "rewards/margins": 0.0940905213356018, |
| "rewards/rejected": -0.9498229622840881, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.3871232314947715, |
| "grad_norm": 10.303235054016113, |
| "learning_rate": 3.424408014571949e-07, |
| "logits/chosen": -3.2499632835388184, |
| "logits/rejected": -3.265618324279785, |
| "logps/chosen": -0.44693538546562195, |
| "logps/rejected": -0.49852439761161804, |
| "loss": 1.2465, |
| "rewards/accuracies": 0.6458333730697632, |
| "rewards/chosen": -0.8938708305358887, |
| "rewards/margins": 0.103178009390831, |
| "rewards/rejected": -0.9970487952232361, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.3969653475497232, |
| "grad_norm": 11.34437370300293, |
| "learning_rate": 3.369763205828779e-07, |
| "logits/chosen": -3.083287239074707, |
| "logits/rejected": -2.965116500854492, |
| "logps/chosen": -0.4754757583141327, |
| "logps/rejected": -0.5378195643424988, |
| "loss": 1.234, |
| "rewards/accuracies": 0.5885417461395264, |
| "rewards/chosen": -0.9509515166282654, |
| "rewards/margins": 0.12468753010034561, |
| "rewards/rejected": -1.0756391286849976, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.406807463604675, |
| "grad_norm": 10.716517448425293, |
| "learning_rate": 3.31511839708561e-07, |
| "logits/chosen": -2.8256187438964844, |
| "logits/rejected": -2.988969326019287, |
| "logps/chosen": -0.46769338846206665, |
| "logps/rejected": -0.509919285774231, |
| "loss": 1.2604, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.9353867769241333, |
| "rewards/margins": 0.0844518169760704, |
| "rewards/rejected": -1.019838571548462, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.4166495796596268, |
| "grad_norm": 11.292407989501953, |
| "learning_rate": 3.2604735883424406e-07, |
| "logits/chosen": -3.2503647804260254, |
| "logits/rejected": -3.2309374809265137, |
| "logps/chosen": -0.47588497400283813, |
| "logps/rejected": -0.5366601943969727, |
| "loss": 1.2343, |
| "rewards/accuracies": 0.6614583730697632, |
| "rewards/chosen": -0.9517699480056763, |
| "rewards/margins": 0.12155050784349442, |
| "rewards/rejected": -1.0733203887939453, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.4264916957145786, |
| "grad_norm": 10.401873588562012, |
| "learning_rate": 3.205828779599271e-07, |
| "logits/chosen": -2.9665112495422363, |
| "logits/rejected": -3.125314950942993, |
| "logps/chosen": -0.46969038248062134, |
| "logps/rejected": -0.5004762411117554, |
| "loss": 1.2755, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -0.9393807649612427, |
| "rewards/margins": 0.0615716427564621, |
| "rewards/rejected": -1.0009524822235107, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.4363338117695306, |
| "grad_norm": 11.833475112915039, |
| "learning_rate": 3.151183970856102e-07, |
| "logits/chosen": -3.078540325164795, |
| "logits/rejected": -3.116511821746826, |
| "logps/chosen": -0.48638832569122314, |
| "logps/rejected": -0.5369706153869629, |
| "loss": 1.2495, |
| "rewards/accuracies": 0.6145833730697632, |
| "rewards/chosen": -0.9727766513824463, |
| "rewards/margins": 0.10116463154554367, |
| "rewards/rejected": -1.0739412307739258, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.4461759278244823, |
| "grad_norm": 9.525228500366211, |
| "learning_rate": 3.096539162112932e-07, |
| "logits/chosen": -3.2219226360321045, |
| "logits/rejected": -3.1760897636413574, |
| "logps/chosen": -0.4626237154006958, |
| "logps/rejected": -0.5069611072540283, |
| "loss": 1.2574, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -0.9252474308013916, |
| "rewards/margins": 0.08867473900318146, |
| "rewards/rejected": -1.0139222145080566, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.4560180438794341, |
| "grad_norm": 10.4345121383667, |
| "learning_rate": 3.041894353369763e-07, |
| "logits/chosen": -3.1432931423187256, |
| "logits/rejected": -3.2274186611175537, |
| "logps/chosen": -0.4711434245109558, |
| "logps/rejected": -0.5244415998458862, |
| "loss": 1.2465, |
| "rewards/accuracies": 0.5833333730697632, |
| "rewards/chosen": -0.9422869086265564, |
| "rewards/margins": 0.10659627616405487, |
| "rewards/rejected": -1.0488831996917725, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.465860159934386, |
| "grad_norm": 11.221490859985352, |
| "learning_rate": 2.987249544626594e-07, |
| "logits/chosen": -3.4779763221740723, |
| "logits/rejected": -3.4805781841278076, |
| "logps/chosen": -0.44780024886131287, |
| "logps/rejected": -0.4945845901966095, |
| "loss": 1.2547, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.8956004977226257, |
| "rewards/margins": 0.0935685932636261, |
| "rewards/rejected": -0.9891691207885742, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.4757022759893377, |
| "grad_norm": 11.709156036376953, |
| "learning_rate": 2.9326047358834246e-07, |
| "logits/chosen": -3.1414666175842285, |
| "logits/rejected": -3.1980159282684326, |
| "logps/chosen": -0.47789841890335083, |
| "logps/rejected": -0.5440502166748047, |
| "loss": 1.2274, |
| "rewards/accuracies": 0.6770833730697632, |
| "rewards/chosen": -0.9557968378067017, |
| "rewards/margins": 0.1323036104440689, |
| "rewards/rejected": -1.0881004333496094, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.4855443920442895, |
| "grad_norm": 10.401344299316406, |
| "learning_rate": 2.8779599271402547e-07, |
| "logits/chosen": -3.1782946586608887, |
| "logits/rejected": -3.184023857116699, |
| "logps/chosen": -0.46204572916030884, |
| "logps/rejected": -0.4961879849433899, |
| "loss": 1.2697, |
| "rewards/accuracies": 0.6145833730697632, |
| "rewards/chosen": -0.9240914583206177, |
| "rewards/margins": 0.06828457117080688, |
| "rewards/rejected": -0.9923759698867798, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.4953865080992412, |
| "grad_norm": 11.5233793258667, |
| "learning_rate": 2.8233151183970854e-07, |
| "logits/chosen": -3.2461302280426025, |
| "logits/rejected": -3.2496047019958496, |
| "logps/chosen": -0.4670412540435791, |
| "logps/rejected": -0.5199131965637207, |
| "loss": 1.2448, |
| "rewards/accuracies": 0.6354166865348816, |
| "rewards/chosen": -0.934082567691803, |
| "rewards/margins": 0.1057438999414444, |
| "rewards/rejected": -1.0398263931274414, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.505228624154193, |
| "grad_norm": 12.667001724243164, |
| "learning_rate": 2.768670309653916e-07, |
| "logits/chosen": -3.2025842666625977, |
| "logits/rejected": -3.323841094970703, |
| "logps/chosen": -0.49344322085380554, |
| "logps/rejected": -0.5365071296691895, |
| "loss": 1.2605, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -0.9868864417076111, |
| "rewards/margins": 0.0861278772354126, |
| "rewards/rejected": -1.073014259338379, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.515070740209145, |
| "grad_norm": 10.09226131439209, |
| "learning_rate": 2.7140255009107467e-07, |
| "logits/chosen": -3.169337749481201, |
| "logits/rejected": -3.1390461921691895, |
| "logps/chosen": -0.4897007346153259, |
| "logps/rejected": -0.5485378503799438, |
| "loss": 1.2382, |
| "rewards/accuracies": 0.6458333730697632, |
| "rewards/chosen": -0.9794015288352966, |
| "rewards/margins": 0.11767425388097763, |
| "rewards/rejected": -1.0970757007598877, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.5249128562640968, |
| "grad_norm": 11.221296310424805, |
| "learning_rate": 2.659380692167577e-07, |
| "logits/chosen": -3.3969221115112305, |
| "logits/rejected": -3.386396884918213, |
| "logps/chosen": -0.47860053181648254, |
| "logps/rejected": -0.5197103023529053, |
| "loss": 1.2622, |
| "rewards/accuracies": 0.5885417461395264, |
| "rewards/chosen": -0.9572010636329651, |
| "rewards/margins": 0.08221958577632904, |
| "rewards/rejected": -1.0394206047058105, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.5347549723190486, |
| "grad_norm": 11.274225234985352, |
| "learning_rate": 2.6047358834244075e-07, |
| "logits/chosen": -2.8993306159973145, |
| "logits/rejected": -2.910104513168335, |
| "logps/chosen": -0.49471789598464966, |
| "logps/rejected": -0.5440719723701477, |
| "loss": 1.2494, |
| "rewards/accuracies": 0.6197916865348816, |
| "rewards/chosen": -0.9894358515739441, |
| "rewards/margins": 0.0987081229686737, |
| "rewards/rejected": -1.0881439447402954, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.5445970883740006, |
| "grad_norm": 11.793699264526367, |
| "learning_rate": 2.5500910746812387e-07, |
| "logits/chosen": -3.160764694213867, |
| "logits/rejected": -3.1699819564819336, |
| "logps/chosen": -0.5097307562828064, |
| "logps/rejected": -0.556024432182312, |
| "loss": 1.2529, |
| "rewards/accuracies": 0.6510416865348816, |
| "rewards/chosen": -1.0194615125656128, |
| "rewards/margins": 0.09258747845888138, |
| "rewards/rejected": -1.112048864364624, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.5544392044289523, |
| "grad_norm": 10.38713264465332, |
| "learning_rate": 2.495446265938069e-07, |
| "logits/chosen": -3.261507749557495, |
| "logits/rejected": -3.2834768295288086, |
| "logps/chosen": -0.4616134762763977, |
| "logps/rejected": -0.5000269412994385, |
| "loss": 1.2651, |
| "rewards/accuracies": 0.6041667461395264, |
| "rewards/chosen": -0.9232269525527954, |
| "rewards/margins": 0.07682683318853378, |
| "rewards/rejected": -1.000053882598877, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.5642813204839041, |
| "grad_norm": 11.302956581115723, |
| "learning_rate": 2.4408014571949e-07, |
| "logits/chosen": -3.1922333240509033, |
| "logits/rejected": -3.187087059020996, |
| "logps/chosen": -0.5025749206542969, |
| "logps/rejected": -0.5681424736976624, |
| "loss": 1.2272, |
| "rewards/accuracies": 0.7135417461395264, |
| "rewards/chosen": -1.0051498413085938, |
| "rewards/margins": 0.1311352252960205, |
| "rewards/rejected": -1.1362850666046143, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.574123436538856, |
| "grad_norm": 11.857656478881836, |
| "learning_rate": 2.38615664845173e-07, |
| "logits/chosen": -3.1751718521118164, |
| "logits/rejected": -3.2345504760742188, |
| "logps/chosen": -0.4723814129829407, |
| "logps/rejected": -0.5178726315498352, |
| "loss": 1.2564, |
| "rewards/accuracies": 0.5833333730697632, |
| "rewards/chosen": -0.9447628259658813, |
| "rewards/margins": 0.09098244458436966, |
| "rewards/rejected": -1.0357452630996704, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.5839655525938077, |
| "grad_norm": 11.975550651550293, |
| "learning_rate": 2.3315118397085608e-07, |
| "logits/chosen": -3.2860124111175537, |
| "logits/rejected": -3.2100272178649902, |
| "logps/chosen": -0.5020599961280823, |
| "logps/rejected": -0.5415343046188354, |
| "loss": 1.2634, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -1.0041199922561646, |
| "rewards/margins": 0.07894869148731232, |
| "rewards/rejected": -1.083068609237671, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.5938076686487594, |
| "grad_norm": 10.477987289428711, |
| "learning_rate": 2.2768670309653915e-07, |
| "logits/chosen": -3.417156934738159, |
| "logits/rejected": -3.4442646503448486, |
| "logps/chosen": -0.4748501181602478, |
| "logps/rejected": -0.5157420039176941, |
| "loss": 1.2617, |
| "rewards/accuracies": 0.5572916865348816, |
| "rewards/chosen": -0.9497002959251404, |
| "rewards/margins": 0.08178383111953735, |
| "rewards/rejected": -1.0314841270446777, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.6036497847037112, |
| "grad_norm": 10.905505180358887, |
| "learning_rate": 2.222222222222222e-07, |
| "logits/chosen": -3.232025384902954, |
| "logits/rejected": -3.3174943923950195, |
| "logps/chosen": -0.48250630497932434, |
| "logps/rejected": -0.5197854042053223, |
| "loss": 1.2651, |
| "rewards/accuracies": 0.6041666865348816, |
| "rewards/chosen": -0.9650125503540039, |
| "rewards/margins": 0.07455817610025406, |
| "rewards/rejected": -1.039570689201355, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.613491900758663, |
| "grad_norm": 9.681890487670898, |
| "learning_rate": 2.1675774134790528e-07, |
| "logits/chosen": -3.3105580806732178, |
| "logits/rejected": -3.2920236587524414, |
| "logps/chosen": -0.480990469455719, |
| "logps/rejected": -0.5195989608764648, |
| "loss": 1.2647, |
| "rewards/accuracies": 0.5989583730697632, |
| "rewards/chosen": -0.961980938911438, |
| "rewards/margins": 0.07721701264381409, |
| "rewards/rejected": -1.0391979217529297, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.6233340168136148, |
| "grad_norm": 11.535338401794434, |
| "learning_rate": 2.1129326047358833e-07, |
| "logits/chosen": -3.3885207176208496, |
| "logits/rejected": -3.4516897201538086, |
| "logps/chosen": -0.4919550120830536, |
| "logps/rejected": -0.537667989730835, |
| "loss": 1.256, |
| "rewards/accuracies": 0.5520833730697632, |
| "rewards/chosen": -0.9839099645614624, |
| "rewards/margins": 0.09142596274614334, |
| "rewards/rejected": -1.07533597946167, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.6331761328685668, |
| "grad_norm": 11.257711410522461, |
| "learning_rate": 2.058287795992714e-07, |
| "logits/chosen": -3.418330669403076, |
| "logits/rejected": -3.4276938438415527, |
| "logps/chosen": -0.4669812321662903, |
| "logps/rejected": -0.5167567729949951, |
| "loss": 1.251, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -0.9339624643325806, |
| "rewards/margins": 0.09955108910799026, |
| "rewards/rejected": -1.0335136651992798, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.6430182489235186, |
| "grad_norm": 31.353042602539062, |
| "learning_rate": 2.0036429872495443e-07, |
| "logits/chosen": -3.2747483253479004, |
| "logits/rejected": -3.2000725269317627, |
| "logps/chosen": -0.49033480882644653, |
| "logps/rejected": -0.5444329380989075, |
| "loss": 1.2428, |
| "rewards/accuracies": 0.6406250596046448, |
| "rewards/chosen": -0.9806696176528931, |
| "rewards/margins": 0.10819630324840546, |
| "rewards/rejected": -1.0888659954071045, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.6528603649784703, |
| "grad_norm": 12.651705741882324, |
| "learning_rate": 1.9489981785063753e-07, |
| "logits/chosen": -2.9816644191741943, |
| "logits/rejected": -3.024106979370117, |
| "logps/chosen": -0.5482050180435181, |
| "logps/rejected": -0.5978180170059204, |
| "loss": 1.251, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -1.0964101552963257, |
| "rewards/margins": 0.09922590851783752, |
| "rewards/rejected": -1.1956360340118408, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.6627024810334223, |
| "grad_norm": 11.137748718261719, |
| "learning_rate": 1.894353369763206e-07, |
| "logits/chosen": -3.3126227855682373, |
| "logits/rejected": -3.354325532913208, |
| "logps/chosen": -0.4819042980670929, |
| "logps/rejected": -0.5410703420639038, |
| "loss": 1.2365, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.9638086557388306, |
| "rewards/margins": 0.11833213269710541, |
| "rewards/rejected": -1.0821408033370972, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.672544597088374, |
| "grad_norm": 11.545525550842285, |
| "learning_rate": 1.8397085610200363e-07, |
| "logits/chosen": -3.5107603073120117, |
| "logits/rejected": -3.5072875022888184, |
| "logps/chosen": -0.515521764755249, |
| "logps/rejected": -0.5749427080154419, |
| "loss": 1.2368, |
| "rewards/accuracies": 0.6822916865348816, |
| "rewards/chosen": -1.0310436487197876, |
| "rewards/margins": 0.11884190142154694, |
| "rewards/rejected": -1.1498854160308838, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.6823867131433259, |
| "grad_norm": 10.51858139038086, |
| "learning_rate": 1.785063752276867e-07, |
| "logits/chosen": -3.210937023162842, |
| "logits/rejected": -3.261808395385742, |
| "logps/chosen": -0.5181245803833008, |
| "logps/rejected": -0.5811792612075806, |
| "loss": 1.2335, |
| "rewards/accuracies": 0.6354166865348816, |
| "rewards/chosen": -1.0362491607666016, |
| "rewards/margins": 0.12610934674739838, |
| "rewards/rejected": -1.1623585224151611, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.6922288291982777, |
| "grad_norm": 12.993363380432129, |
| "learning_rate": 1.7304189435336974e-07, |
| "logits/chosen": -2.966552972793579, |
| "logits/rejected": -2.980912685394287, |
| "logps/chosen": -0.568572461605072, |
| "logps/rejected": -0.6211986541748047, |
| "loss": 1.2491, |
| "rewards/accuracies": 0.6302083730697632, |
| "rewards/chosen": -1.137144923210144, |
| "rewards/margins": 0.10525240004062653, |
| "rewards/rejected": -1.2423973083496094, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.7020709452532294, |
| "grad_norm": 10.730545043945312, |
| "learning_rate": 1.6757741347905283e-07, |
| "logits/chosen": -3.201765298843384, |
| "logits/rejected": -3.2614786624908447, |
| "logps/chosen": -0.49252548813819885, |
| "logps/rejected": -0.5455992221832275, |
| "loss": 1.2456, |
| "rewards/accuracies": 0.6302083730697632, |
| "rewards/chosen": -0.9850510358810425, |
| "rewards/margins": 0.10614749789237976, |
| "rewards/rejected": -1.091198444366455, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.7119130613081812, |
| "grad_norm": 12.557461738586426, |
| "learning_rate": 1.6211293260473587e-07, |
| "logits/chosen": -3.0382239818573, |
| "logits/rejected": -3.0993409156799316, |
| "logps/chosen": -0.5261319875717163, |
| "logps/rejected": -0.5957756042480469, |
| "loss": 1.2264, |
| "rewards/accuracies": 0.6614583730697632, |
| "rewards/chosen": -1.0522639751434326, |
| "rewards/margins": 0.13928718864917755, |
| "rewards/rejected": -1.1915510892868042, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.721755177363133, |
| "grad_norm": 12.95654296875, |
| "learning_rate": 1.5664845173041894e-07, |
| "logits/chosen": -3.4481818675994873, |
| "logits/rejected": -3.484841823577881, |
| "logps/chosen": -0.5029021501541138, |
| "logps/rejected": -0.5526931285858154, |
| "loss": 1.2491, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -1.0058043003082275, |
| "rewards/margins": 0.09958191215991974, |
| "rewards/rejected": -1.1053862571716309, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.7315972934180848, |
| "grad_norm": 13.081609725952148, |
| "learning_rate": 1.5118397085610198e-07, |
| "logits/chosen": -3.1820714473724365, |
| "logits/rejected": -3.1990368366241455, |
| "logps/chosen": -0.5167220830917358, |
| "logps/rejected": -0.5637269020080566, |
| "loss": 1.2527, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -1.0334441661834717, |
| "rewards/margins": 0.094009630382061, |
| "rewards/rejected": -1.1274538040161133, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.7414394094730365, |
| "grad_norm": 11.695033073425293, |
| "learning_rate": 1.4571948998178507e-07, |
| "logits/chosen": -3.054007053375244, |
| "logits/rejected": -3.153001308441162, |
| "logps/chosen": -0.5482903718948364, |
| "logps/rejected": -0.6053259372711182, |
| "loss": 1.2394, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -1.0965806245803833, |
| "rewards/margins": 0.11407110095024109, |
| "rewards/rejected": -1.2106518745422363, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.7512815255279885, |
| "grad_norm": 12.978899955749512, |
| "learning_rate": 1.402550091074681e-07, |
| "logits/chosen": -3.115644693374634, |
| "logits/rejected": -3.0674333572387695, |
| "logps/chosen": -0.5462538003921509, |
| "logps/rejected": -0.600370466709137, |
| "loss": 1.2462, |
| "rewards/accuracies": 0.5833333730697632, |
| "rewards/chosen": -1.0925076007843018, |
| "rewards/margins": 0.10823334753513336, |
| "rewards/rejected": -1.2007410526275635, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.7611236415829403, |
| "grad_norm": 11.404801368713379, |
| "learning_rate": 1.3479052823315118e-07, |
| "logits/chosen": -3.090611219406128, |
| "logits/rejected": -3.2334389686584473, |
| "logps/chosen": -0.5118279457092285, |
| "logps/rejected": -0.5650106072425842, |
| "loss": 1.2462, |
| "rewards/accuracies": 0.6041666865348816, |
| "rewards/chosen": -1.023655891418457, |
| "rewards/margins": 0.10636530816555023, |
| "rewards/rejected": -1.1300212144851685, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.770965757637892, |
| "grad_norm": 12.541303634643555, |
| "learning_rate": 1.2932604735883425e-07, |
| "logits/chosen": -3.310518264770508, |
| "logits/rejected": -3.390636444091797, |
| "logps/chosen": -0.525303840637207, |
| "logps/rejected": -0.5825842618942261, |
| "loss": 1.2412, |
| "rewards/accuracies": 0.6197916865348816, |
| "rewards/chosen": -1.050607681274414, |
| "rewards/margins": 0.11456100642681122, |
| "rewards/rejected": -1.1651686429977417, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.780807873692844, |
| "grad_norm": 14.414772987365723, |
| "learning_rate": 1.238615664845173e-07, |
| "logits/chosen": -2.993222713470459, |
| "logits/rejected": -3.050335168838501, |
| "logps/chosen": -0.5568434000015259, |
| "logps/rejected": -0.5955438613891602, |
| "loss": 1.2667, |
| "rewards/accuracies": 0.5625000596046448, |
| "rewards/chosen": -1.1136868000030518, |
| "rewards/margins": 0.07740084081888199, |
| "rewards/rejected": -1.1910877227783203, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.7906499897477959, |
| "grad_norm": 11.817216873168945, |
| "learning_rate": 1.1839708561020035e-07, |
| "logits/chosen": -3.2628841400146484, |
| "logits/rejected": -3.3687920570373535, |
| "logps/chosen": -0.5100204944610596, |
| "logps/rejected": -0.5642228722572327, |
| "loss": 1.2437, |
| "rewards/accuracies": 0.6614583730697632, |
| "rewards/chosen": -1.0200409889221191, |
| "rewards/margins": 0.10840488970279694, |
| "rewards/rejected": -1.1284458637237549, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.8004921058027477, |
| "grad_norm": 12.728991508483887, |
| "learning_rate": 1.1293260473588342e-07, |
| "logits/chosen": -3.1474390029907227, |
| "logits/rejected": -3.1595215797424316, |
| "logps/chosen": -0.5479636788368225, |
| "logps/rejected": -0.6197019815444946, |
| "loss": 1.2228, |
| "rewards/accuracies": 0.6354166865348816, |
| "rewards/chosen": -1.095927357673645, |
| "rewards/margins": 0.14347663521766663, |
| "rewards/rejected": -1.2394039630889893, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.8103342218576994, |
| "grad_norm": 12.561689376831055, |
| "learning_rate": 1.0746812386156647e-07, |
| "logits/chosen": -2.919260025024414, |
| "logits/rejected": -3.0359532833099365, |
| "logps/chosen": -0.5316007137298584, |
| "logps/rejected": -0.578840970993042, |
| "loss": 1.2546, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -1.0632013082504272, |
| "rewards/margins": 0.09448058158159256, |
| "rewards/rejected": -1.157681941986084, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.8201763379126512, |
| "grad_norm": 13.989924430847168, |
| "learning_rate": 1.0200364298724954e-07, |
| "logits/chosen": -3.202847957611084, |
| "logits/rejected": -3.3060855865478516, |
| "logps/chosen": -0.5514649748802185, |
| "logps/rejected": -0.6039638519287109, |
| "loss": 1.2515, |
| "rewards/accuracies": 0.5833333730697632, |
| "rewards/chosen": -1.102929949760437, |
| "rewards/margins": 0.10499779880046844, |
| "rewards/rejected": -1.2079277038574219, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.830018453967603, |
| "grad_norm": 13.695045471191406, |
| "learning_rate": 9.653916211293261e-08, |
| "logits/chosen": -2.9378960132598877, |
| "logits/rejected": -2.944990634918213, |
| "logps/chosen": -0.5494365692138672, |
| "logps/rejected": -0.6013669371604919, |
| "loss": 1.2493, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -1.0988731384277344, |
| "rewards/margins": 0.10386063158512115, |
| "rewards/rejected": -1.2027339935302734, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.8398605700225548, |
| "grad_norm": 14.38819694519043, |
| "learning_rate": 9.107468123861566e-08, |
| "logits/chosen": -2.811845302581787, |
| "logits/rejected": -2.8457605838775635, |
| "logps/chosen": -0.5353372097015381, |
| "logps/rejected": -0.5632250905036926, |
| "loss": 1.2837, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -1.0706744194030762, |
| "rewards/margins": 0.055775657296180725, |
| "rewards/rejected": -1.1264501810073853, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.8497026860775065, |
| "grad_norm": 11.79210376739502, |
| "learning_rate": 8.561020036429873e-08, |
| "logits/chosen": -3.3166909217834473, |
| "logits/rejected": -3.299351215362549, |
| "logps/chosen": -0.5189695358276367, |
| "logps/rejected": -0.572175145149231, |
| "loss": 1.2461, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -1.0379390716552734, |
| "rewards/margins": 0.10641118884086609, |
| "rewards/rejected": -1.1443501710891724, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.8595448021324583, |
| "grad_norm": 12.88967227935791, |
| "learning_rate": 8.014571948998178e-08, |
| "logits/chosen": -3.1258530616760254, |
| "logits/rejected": -3.1050872802734375, |
| "logps/chosen": -0.5324106216430664, |
| "logps/rejected": -0.5854768753051758, |
| "loss": 1.2475, |
| "rewards/accuracies": 0.6354166865348816, |
| "rewards/chosen": -1.0648212432861328, |
| "rewards/margins": 0.1061326190829277, |
| "rewards/rejected": -1.1709537506103516, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.8693869181874103, |
| "grad_norm": 13.419716835021973, |
| "learning_rate": 7.468123861566485e-08, |
| "logits/chosen": -2.8158140182495117, |
| "logits/rejected": -2.991899013519287, |
| "logps/chosen": -0.5541937947273254, |
| "logps/rejected": -0.5925447344779968, |
| "loss": 1.2686, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -1.1083875894546509, |
| "rewards/margins": 0.07670189440250397, |
| "rewards/rejected": -1.1850894689559937, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.879229034242362, |
| "grad_norm": 13.015626907348633, |
| "learning_rate": 6.92167577413479e-08, |
| "logits/chosen": -3.060314655303955, |
| "logits/rejected": -3.1021711826324463, |
| "logps/chosen": -0.5426675081253052, |
| "logps/rejected": -0.5891798734664917, |
| "loss": 1.2529, |
| "rewards/accuracies": 0.6302083730697632, |
| "rewards/chosen": -1.0853350162506104, |
| "rewards/margins": 0.09302478283643723, |
| "rewards/rejected": -1.1783597469329834, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.8890711502973139, |
| "grad_norm": 12.878572463989258, |
| "learning_rate": 6.375227686703097e-08, |
| "logits/chosen": -3.18039870262146, |
| "logits/rejected": -3.163947343826294, |
| "logps/chosen": -0.5458322167396545, |
| "logps/rejected": -0.5959421396255493, |
| "loss": 1.252, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -1.0916643142700195, |
| "rewards/margins": 0.1002199798822403, |
| "rewards/rejected": -1.1918842792510986, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.8989132663522659, |
| "grad_norm": 12.223690032958984, |
| "learning_rate": 5.828779599271402e-08, |
| "logits/chosen": -3.4556312561035156, |
| "logits/rejected": -3.4509153366088867, |
| "logps/chosen": -0.5170646905899048, |
| "logps/rejected": -0.5667902231216431, |
| "loss": 1.2527, |
| "rewards/accuracies": 0.6197916865348816, |
| "rewards/chosen": -1.0341295003890991, |
| "rewards/margins": 0.09945093840360641, |
| "rewards/rejected": -1.1335804462432861, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.9087553824072176, |
| "grad_norm": 11.406511306762695, |
| "learning_rate": 5.282331511839708e-08, |
| "logits/chosen": -3.1296825408935547, |
| "logits/rejected": -3.053649425506592, |
| "logps/chosen": -0.4993407130241394, |
| "logps/rejected": -0.557974636554718, |
| "loss": 1.2393, |
| "rewards/accuracies": 0.6145833730697632, |
| "rewards/chosen": -0.9986814260482788, |
| "rewards/margins": 0.11726780235767365, |
| "rewards/rejected": -1.115949273109436, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.9185974984621694, |
| "grad_norm": 12.973370552062988, |
| "learning_rate": 4.735883424408015e-08, |
| "logits/chosen": -2.872734785079956, |
| "logits/rejected": -2.9354488849639893, |
| "logps/chosen": -0.5702813863754272, |
| "logps/rejected": -0.6181797981262207, |
| "loss": 1.255, |
| "rewards/accuracies": 0.6197916865348816, |
| "rewards/chosen": -1.1405627727508545, |
| "rewards/margins": 0.0957968458533287, |
| "rewards/rejected": -1.2363595962524414, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.9284396145171212, |
| "grad_norm": 10.770984649658203, |
| "learning_rate": 4.189435336976321e-08, |
| "logits/chosen": -3.0323398113250732, |
| "logits/rejected": -3.084712266921997, |
| "logps/chosen": -0.5010161995887756, |
| "logps/rejected": -0.5550758242607117, |
| "loss": 1.2431, |
| "rewards/accuracies": 0.6197916865348816, |
| "rewards/chosen": -1.0020323991775513, |
| "rewards/margins": 0.10811936110258102, |
| "rewards/rejected": -1.110151767730713, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.938281730572073, |
| "grad_norm": 13.19275951385498, |
| "learning_rate": 3.642987249544627e-08, |
| "logits/chosen": -3.0209498405456543, |
| "logits/rejected": -3.197568893432617, |
| "logps/chosen": -0.5342345237731934, |
| "logps/rejected": -0.5740767121315002, |
| "loss": 1.2665, |
| "rewards/accuracies": 0.6093750596046448, |
| "rewards/chosen": -1.0684690475463867, |
| "rewards/margins": 0.07968436926603317, |
| "rewards/rejected": -1.148153305053711, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.9481238466270248, |
| "grad_norm": 13.59703540802002, |
| "learning_rate": 3.096539162112933e-08, |
| "logits/chosen": -3.445460319519043, |
| "logits/rejected": -3.4261269569396973, |
| "logps/chosen": -0.5216549634933472, |
| "logps/rejected": -0.5718774795532227, |
| "loss": 1.2519, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -1.0433099269866943, |
| "rewards/margins": 0.10044509172439575, |
| "rewards/rejected": -1.1437549591064453, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.9579659626819765, |
| "grad_norm": 13.935587882995605, |
| "learning_rate": 2.5500910746812385e-08, |
| "logits/chosen": -3.1474225521087646, |
| "logits/rejected": -3.192647933959961, |
| "logps/chosen": -0.5525200366973877, |
| "logps/rejected": -0.6108919382095337, |
| "loss": 1.2421, |
| "rewards/accuracies": 0.5885416865348816, |
| "rewards/chosen": -1.1050399541854858, |
| "rewards/margins": 0.1167440339922905, |
| "rewards/rejected": -1.2217838764190674, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.9678080787369283, |
| "grad_norm": 12.028646469116211, |
| "learning_rate": 2.0036429872495445e-08, |
| "logits/chosen": -2.8930859565734863, |
| "logits/rejected": -3.0996487140655518, |
| "logps/chosen": -0.5432897210121155, |
| "logps/rejected": -0.6004898548126221, |
| "loss": 1.2421, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -1.086579442024231, |
| "rewards/margins": 0.11440026760101318, |
| "rewards/rejected": -1.2009797096252441, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.97765019479188, |
| "grad_norm": 12.551324844360352, |
| "learning_rate": 1.4571948998178505e-08, |
| "logits/chosen": -2.6679749488830566, |
| "logits/rejected": -2.9275968074798584, |
| "logps/chosen": -0.5680824518203735, |
| "logps/rejected": -0.6121156215667725, |
| "loss": 1.2617, |
| "rewards/accuracies": 0.5989583730697632, |
| "rewards/chosen": -1.136164903640747, |
| "rewards/margins": 0.08806635439395905, |
| "rewards/rejected": -1.224231243133545, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.987492310846832, |
| "grad_norm": 13.448480606079102, |
| "learning_rate": 9.107468123861567e-09, |
| "logits/chosen": -2.95973801612854, |
| "logits/rejected": -3.03635835647583, |
| "logps/chosen": -0.5279879570007324, |
| "logps/rejected": -0.5912687182426453, |
| "loss": 1.2317, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.0559759140014648, |
| "rewards/margins": 0.1265614926815033, |
| "rewards/rejected": -1.1825374364852905, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.9973344269017839, |
| "grad_norm": 12.023303985595703, |
| "learning_rate": 3.6429872495446263e-09, |
| "logits/chosen": -2.846055746078491, |
| "logits/rejected": -2.954606533050537, |
| "logps/chosen": -0.5345979332923889, |
| "logps/rejected": -0.5786259770393372, |
| "loss": 1.2608, |
| "rewards/accuracies": 0.5989583730697632, |
| "rewards/chosen": -1.0691958665847778, |
| "rewards/margins": 0.08805612474679947, |
| "rewards/rejected": -1.1572520732879639, |
| "step": 609 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 610, |
| "total_flos": 174251037229056.0, |
| "train_loss": 1.2841152152077095, |
| "train_runtime": 12715.4202, |
| "train_samples_per_second": 3.068, |
| "train_steps_per_second": 0.048 |
| } |
| ], |
| "logging_steps": 3, |
| "max_steps": 610, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 250, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 174251037229056.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|