| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04, |
| "grad_norm": 8.528485298156738, |
| "learning_rate": 9e-06, |
| "logits/chosen": 2.054985523223877, |
| "logits/rejected": 1.9033839702606201, |
| "logps/chosen": -23.966228485107422, |
| "logps/rejected": -53.68938446044922, |
| "loss": 0.5192, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": 3.1913700103759766, |
| "rewards/margins": 0.7934623956680298, |
| "rewards/rejected": 2.3979074954986572, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.4699181616306305, |
| "learning_rate": 1.9e-05, |
| "logits/chosen": 2.652074098587036, |
| "logits/rejected": 2.6142349243164062, |
| "logps/chosen": -19.591583251953125, |
| "logps/rejected": -71.51148986816406, |
| "loss": 0.2992, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 3.1300272941589355, |
| "rewards/margins": 2.6587636470794678, |
| "rewards/rejected": 0.47126370668411255, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 6.240016937255859, |
| "learning_rate": 2.9e-05, |
| "logits/chosen": 3.368675947189331, |
| "logits/rejected": 3.328001022338867, |
| "logps/chosen": -21.065635681152344, |
| "logps/rejected": -97.60667419433594, |
| "loss": 0.2464, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 3.2080636024475098, |
| "rewards/margins": 5.249109745025635, |
| "rewards/rejected": -2.041045904159546, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.10691515356302261, |
| "learning_rate": 3.9000000000000006e-05, |
| "logits/chosen": 2.1906957626342773, |
| "logits/rejected": 2.274221658706665, |
| "logps/chosen": -17.63897132873535, |
| "logps/rejected": -90.91486358642578, |
| "loss": 0.2946, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": 3.081021785736084, |
| "rewards/margins": 5.548964500427246, |
| "rewards/rejected": -2.467942714691162, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.6372888088226318, |
| "learning_rate": 4.9e-05, |
| "logits/chosen": 0.6477093696594238, |
| "logits/rejected": 0.833999752998352, |
| "logps/chosen": -29.2796630859375, |
| "logps/rejected": -124.37042236328125, |
| "loss": 0.2171, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 2.3345980644226074, |
| "rewards/margins": 7.417657375335693, |
| "rewards/rejected": -5.083059310913086, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.571674108505249, |
| "learning_rate": 4.9e-05, |
| "logits/chosen": 2.270029067993164, |
| "logits/rejected": 2.524890899658203, |
| "logps/chosen": -27.73245620727539, |
| "logps/rejected": -103.86787414550781, |
| "loss": 0.2655, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 2.1670007705688477, |
| "rewards/margins": 5.720990180969238, |
| "rewards/rejected": -3.5539894104003906, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.6424742937088013, |
| "learning_rate": 4.7888888888888886e-05, |
| "logits/chosen": 1.0871316194534302, |
| "logits/rejected": 1.31881582736969, |
| "logps/chosen": -35.905784606933594, |
| "logps/rejected": -127.39874267578125, |
| "loss": 0.2822, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 2.1068429946899414, |
| "rewards/margins": 7.130377769470215, |
| "rewards/rejected": -5.02353572845459, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.2139262557029724, |
| "learning_rate": 4.677777777777778e-05, |
| "logits/chosen": 1.289627194404602, |
| "logits/rejected": 1.5509237051010132, |
| "logps/chosen": -37.245338439941406, |
| "logps/rejected": -143.37571716308594, |
| "loss": 0.2551, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": 1.7066528797149658, |
| "rewards/margins": 8.251852989196777, |
| "rewards/rejected": -6.545199394226074, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 9.060225486755371, |
| "learning_rate": 4.566666666666667e-05, |
| "logits/chosen": -0.21008212864398956, |
| "logits/rejected": 0.13004381954669952, |
| "logps/chosen": -34.545066833496094, |
| "logps/rejected": -136.86561584472656, |
| "loss": 0.2398, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 2.3621673583984375, |
| "rewards/margins": 8.225154876708984, |
| "rewards/rejected": -5.862987041473389, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.8874521255493164, |
| "learning_rate": 4.4555555555555555e-05, |
| "logits/chosen": 0.8144568204879761, |
| "logits/rejected": 1.1611213684082031, |
| "logps/chosen": -41.461158752441406, |
| "logps/rejected": -139.0437469482422, |
| "loss": 0.2243, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 1.0751674175262451, |
| "rewards/margins": 7.963423728942871, |
| "rewards/rejected": -6.8882575035095215, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.21037925779819489, |
| "learning_rate": 4.344444444444445e-05, |
| "logits/chosen": -0.559118390083313, |
| "logits/rejected": -0.11641822755336761, |
| "logps/chosen": -47.142601013183594, |
| "logps/rejected": -150.99496459960938, |
| "loss": 0.2594, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": 0.32266765832901, |
| "rewards/margins": 8.18213939666748, |
| "rewards/rejected": -7.859471321105957, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.33475127816200256, |
| "learning_rate": 4.233333333333334e-05, |
| "logits/chosen": -0.39622828364372253, |
| "logits/rejected": -0.1825840175151825, |
| "logps/chosen": -46.026100158691406, |
| "logps/rejected": -142.27127075195312, |
| "loss": 0.2442, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": 0.987580418586731, |
| "rewards/margins": 7.525818824768066, |
| "rewards/rejected": -6.538238525390625, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.12689562141895294, |
| "learning_rate": 4.1222222222222224e-05, |
| "logits/chosen": -0.23490352928638458, |
| "logits/rejected": 0.27892929315567017, |
| "logps/chosen": -33.58977508544922, |
| "logps/rejected": -144.8148956298828, |
| "loss": 0.2096, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 1.9526748657226562, |
| "rewards/margins": 8.64609146118164, |
| "rewards/rejected": -6.693416595458984, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.12881708145141602, |
| "learning_rate": 4.011111111111111e-05, |
| "logits/chosen": -1.1939542293548584, |
| "logits/rejected": -0.9672979116439819, |
| "logps/chosen": -37.64386749267578, |
| "logps/rejected": -125.4320068359375, |
| "loss": 0.2528, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": 2.0000433921813965, |
| "rewards/margins": 6.926792144775391, |
| "rewards/rejected": -4.926749229431152, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.21873943507671356, |
| "learning_rate": 3.9000000000000006e-05, |
| "logits/chosen": -1.780418038368225, |
| "logits/rejected": -1.4915210008621216, |
| "logps/chosen": -35.17759704589844, |
| "logps/rejected": -136.65286254882812, |
| "loss": 0.2278, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 1.875436544418335, |
| "rewards/margins": 8.086755752563477, |
| "rewards/rejected": -6.211319923400879, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.053676556795835495, |
| "learning_rate": 3.7888888888888894e-05, |
| "logits/chosen": -1.770747423171997, |
| "logits/rejected": -1.2938441038131714, |
| "logps/chosen": -29.032825469970703, |
| "logps/rejected": -138.14309692382812, |
| "loss": 0.2089, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 2.1548972129821777, |
| "rewards/margins": 8.57084846496582, |
| "rewards/rejected": -6.41594934463501, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.31680184602737427, |
| "learning_rate": 3.677777777777778e-05, |
| "logits/chosen": -1.9017817974090576, |
| "logits/rejected": -1.6287091970443726, |
| "logps/chosen": -32.30609893798828, |
| "logps/rejected": -135.4978485107422, |
| "loss": 0.1837, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": 2.9330215454101562, |
| "rewards/margins": 8.456713676452637, |
| "rewards/rejected": -5.5236921310424805, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.12343556433916092, |
| "learning_rate": 3.566666666666667e-05, |
| "logits/chosen": -2.0771210193634033, |
| "logits/rejected": -1.8654956817626953, |
| "logps/chosen": -38.074161529541016, |
| "logps/rejected": -140.7716522216797, |
| "loss": 0.2189, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 1.6962649822235107, |
| "rewards/margins": 8.185938835144043, |
| "rewards/rejected": -6.4896745681762695, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.16018041968345642, |
| "learning_rate": 3.4555555555555556e-05, |
| "logits/chosen": -2.5613696575164795, |
| "logits/rejected": -2.160233736038208, |
| "logps/chosen": -35.08837127685547, |
| "logps/rejected": -174.58566284179688, |
| "loss": 0.1491, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 1.949218511581421, |
| "rewards/margins": 11.258779525756836, |
| "rewards/rejected": -9.309560775756836, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 2.166445016860962, |
| "learning_rate": 3.3444444444444443e-05, |
| "logits/chosen": -3.0443778038024902, |
| "logits/rejected": -2.7526583671569824, |
| "logps/chosen": -40.106178283691406, |
| "logps/rejected": -151.62503051757812, |
| "loss": 0.2624, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 1.3750978708267212, |
| "rewards/margins": 9.279211044311523, |
| "rewards/rejected": -7.904112339019775, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.3322606384754181, |
| "learning_rate": 3.233333333333333e-05, |
| "logits/chosen": -2.9914917945861816, |
| "logits/rejected": -2.5901548862457275, |
| "logps/chosen": -43.788307189941406, |
| "logps/rejected": -210.02102661132812, |
| "loss": 0.2258, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 1.1431424617767334, |
| "rewards/margins": 13.883976936340332, |
| "rewards/rejected": -12.740835189819336, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.2920965850353241, |
| "learning_rate": 3.1222222222222225e-05, |
| "logits/chosen": -2.3677401542663574, |
| "logits/rejected": -1.9734885692596436, |
| "logps/chosen": -36.96697235107422, |
| "logps/rejected": -187.7698211669922, |
| "loss": 0.2081, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 1.8709278106689453, |
| "rewards/margins": 12.740228652954102, |
| "rewards/rejected": -10.86929988861084, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.15863250195980072, |
| "learning_rate": 3.0111111111111113e-05, |
| "logits/chosen": -1.819131851196289, |
| "logits/rejected": -1.4921624660491943, |
| "logps/chosen": -41.595420837402344, |
| "logps/rejected": -186.645751953125, |
| "loss": 0.1995, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": 0.9655638933181763, |
| "rewards/margins": 12.226719856262207, |
| "rewards/rejected": -11.26115608215332, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.0697341114282608, |
| "learning_rate": 2.9e-05, |
| "logits/chosen": -2.357139825820923, |
| "logits/rejected": -1.888144850730896, |
| "logps/chosen": -52.9119987487793, |
| "logps/rejected": -263.25390625, |
| "loss": 0.1824, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 0.7968165874481201, |
| "rewards/margins": 18.26348304748535, |
| "rewards/rejected": -17.466665267944336, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.8565608859062195, |
| "learning_rate": 2.788888888888889e-05, |
| "logits/chosen": -2.9249751567840576, |
| "logits/rejected": -2.6594655513763428, |
| "logps/chosen": -49.89072799682617, |
| "logps/rejected": -190.1117401123047, |
| "loss": 0.2175, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.047927260398864746, |
| "rewards/margins": 11.593302726745605, |
| "rewards/rejected": -11.54537582397461, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.04, |
| "grad_norm": 0.4322703778743744, |
| "learning_rate": 2.677777777777778e-05, |
| "logits/chosen": -2.591165781021118, |
| "logits/rejected": -2.2559447288513184, |
| "logps/chosen": -42.47401428222656, |
| "logps/rejected": -188.3767547607422, |
| "loss": 0.2081, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 0.9701582789421082, |
| "rewards/margins": 12.480576515197754, |
| "rewards/rejected": -11.510419845581055, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.08, |
| "grad_norm": 0.19003595411777496, |
| "learning_rate": 2.5666666666666666e-05, |
| "logits/chosen": -3.374302625656128, |
| "logits/rejected": -3.0488531589508057, |
| "logps/chosen": -57.8838996887207, |
| "logps/rejected": -193.90777587890625, |
| "loss": 0.3035, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -0.37895283102989197, |
| "rewards/margins": 11.51020622253418, |
| "rewards/rejected": -11.889159202575684, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 0.16604876518249512, |
| "learning_rate": 2.4555555555555557e-05, |
| "logits/chosen": -3.1895713806152344, |
| "logits/rejected": -2.834735870361328, |
| "logps/chosen": -52.5036735534668, |
| "logps/rejected": -177.6607666015625, |
| "loss": 0.2516, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": 0.857752799987793, |
| "rewards/margins": 10.699987411499023, |
| "rewards/rejected": -9.84223461151123, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.16, |
| "grad_norm": 0.14234374463558197, |
| "learning_rate": 2.3444444444444448e-05, |
| "logits/chosen": -3.344165086746216, |
| "logits/rejected": -3.020646572113037, |
| "logps/chosen": -41.34022903442383, |
| "logps/rejected": -178.89938354492188, |
| "loss": 0.2602, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": 1.0710757970809937, |
| "rewards/margins": 11.487492561340332, |
| "rewards/rejected": -10.416417121887207, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.1858437955379486, |
| "learning_rate": 2.2333333333333335e-05, |
| "logits/chosen": -3.0915908813476562, |
| "logits/rejected": -2.762167453765869, |
| "logps/chosen": -41.24951171875, |
| "logps/rejected": -172.49880981445312, |
| "loss": 0.2256, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 1.1378252506256104, |
| "rewards/margins": 10.839186668395996, |
| "rewards/rejected": -9.701360702514648, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.24, |
| "grad_norm": 0.2519996762275696, |
| "learning_rate": 2.1222222222222223e-05, |
| "logits/chosen": -3.076692581176758, |
| "logits/rejected": -2.7446165084838867, |
| "logps/chosen": -50.308746337890625, |
| "logps/rejected": -204.6868896484375, |
| "loss": 0.165, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 0.4235803484916687, |
| "rewards/margins": 13.201803207397461, |
| "rewards/rejected": -12.778223037719727, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 0.3874475657939911, |
| "learning_rate": 2.011111111111111e-05, |
| "logits/chosen": -3.646862030029297, |
| "logits/rejected": -3.424621105194092, |
| "logps/chosen": -52.49565505981445, |
| "logps/rejected": -167.20236206054688, |
| "loss": 0.2347, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 0.23652243614196777, |
| "rewards/margins": 9.693285942077637, |
| "rewards/rejected": -9.456764221191406, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.32, |
| "grad_norm": 0.2814558148384094, |
| "learning_rate": 1.9e-05, |
| "logits/chosen": -3.1871893405914307, |
| "logits/rejected": -2.7857449054718018, |
| "logps/chosen": -50.25436782836914, |
| "logps/rejected": -183.965087890625, |
| "loss": 0.269, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.5717954039573669, |
| "rewards/margins": 11.194442749023438, |
| "rewards/rejected": -10.622648239135742, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.3599999999999999, |
| "grad_norm": 0.2564876973628998, |
| "learning_rate": 1.788888888888889e-05, |
| "logits/chosen": -3.328599452972412, |
| "logits/rejected": -2.8498024940490723, |
| "logps/chosen": -40.71959686279297, |
| "logps/rejected": -194.56871032714844, |
| "loss": 0.1908, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": 1.3430808782577515, |
| "rewards/margins": 13.080599784851074, |
| "rewards/rejected": -11.737520217895508, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.2628025412559509, |
| "learning_rate": 1.677777777777778e-05, |
| "logits/chosen": -4.046440124511719, |
| "logits/rejected": -3.7269904613494873, |
| "logps/chosen": -46.230613708496094, |
| "logps/rejected": -186.4309539794922, |
| "loss": 0.2016, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": 0.4831501543521881, |
| "rewards/margins": 11.869375228881836, |
| "rewards/rejected": -11.386224746704102, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.44, |
| "grad_norm": 0.22657158970832825, |
| "learning_rate": 1.5666666666666667e-05, |
| "logits/chosen": -3.75866961479187, |
| "logits/rejected": -3.361738681793213, |
| "logps/chosen": -49.731544494628906, |
| "logps/rejected": -203.48196411132812, |
| "loss": 0.165, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 0.7678171992301941, |
| "rewards/margins": 12.776975631713867, |
| "rewards/rejected": -12.009159088134766, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.48, |
| "grad_norm": 0.2632221281528473, |
| "learning_rate": 1.4555555555555556e-05, |
| "logits/chosen": -3.995814561843872, |
| "logits/rejected": -3.644721508026123, |
| "logps/chosen": -48.3692626953125, |
| "logps/rejected": -201.17593383789062, |
| "loss": 0.1649, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": 0.5727468729019165, |
| "rewards/margins": 12.856643676757812, |
| "rewards/rejected": -12.283895492553711, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.52, |
| "grad_norm": 0.1788269430398941, |
| "learning_rate": 1.3444444444444445e-05, |
| "logits/chosen": -3.70817494392395, |
| "logits/rejected": -3.3923275470733643, |
| "logps/chosen": -40.871490478515625, |
| "logps/rejected": -171.85128784179688, |
| "loss": 0.2101, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 0.9809708595275879, |
| "rewards/margins": 11.068758010864258, |
| "rewards/rejected": -10.087786674499512, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.56, |
| "grad_norm": 0.14607764780521393, |
| "learning_rate": 1.2333333333333334e-05, |
| "logits/chosen": -3.0384268760681152, |
| "logits/rejected": -2.7124972343444824, |
| "logps/chosen": -36.8101806640625, |
| "logps/rejected": -163.4900360107422, |
| "loss": 0.26, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 1.447727084159851, |
| "rewards/margins": 10.76397705078125, |
| "rewards/rejected": -9.316250801086426, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.21636660397052765, |
| "learning_rate": 1.1222222222222224e-05, |
| "logits/chosen": -3.6440536975860596, |
| "logits/rejected": -3.278002977371216, |
| "logps/chosen": -41.639305114746094, |
| "logps/rejected": -200.2166290283203, |
| "loss": 0.1822, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 1.6405818462371826, |
| "rewards/margins": 13.554621696472168, |
| "rewards/rejected": -11.914040565490723, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.6400000000000001, |
| "grad_norm": 0.11900901794433594, |
| "learning_rate": 1.0111111111111111e-05, |
| "logits/chosen": -3.0452122688293457, |
| "logits/rejected": -2.5407042503356934, |
| "logps/chosen": -37.94886016845703, |
| "logps/rejected": -153.2200469970703, |
| "loss": 0.2347, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 1.7741081714630127, |
| "rewards/margins": 9.864465713500977, |
| "rewards/rejected": -8.090356826782227, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.6800000000000002, |
| "grad_norm": 0.05728014558553696, |
| "learning_rate": 9e-06, |
| "logits/chosen": -3.795870304107666, |
| "logits/rejected": -3.469318389892578, |
| "logps/chosen": -34.80308151245117, |
| "logps/rejected": -195.09146118164062, |
| "loss": 0.1996, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 1.8775447607040405, |
| "rewards/margins": 13.589404106140137, |
| "rewards/rejected": -11.711858749389648, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.72, |
| "grad_norm": 0.153469517827034, |
| "learning_rate": 7.88888888888889e-06, |
| "logits/chosen": -3.887744426727295, |
| "logits/rejected": -3.5376434326171875, |
| "logps/chosen": -34.76511001586914, |
| "logps/rejected": -146.95310974121094, |
| "loss": 0.2602, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": 1.5828745365142822, |
| "rewards/margins": 9.252839088439941, |
| "rewards/rejected": -7.669964790344238, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.76, |
| "grad_norm": 0.16263523697853088, |
| "learning_rate": 6.777777777777779e-06, |
| "logits/chosen": -3.6285464763641357, |
| "logits/rejected": -3.208299160003662, |
| "logps/chosen": -29.505624771118164, |
| "logps/rejected": -147.29527282714844, |
| "loss": 0.269, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 2.1386911869049072, |
| "rewards/margins": 9.6033296585083, |
| "rewards/rejected": -7.464638710021973, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.01779480278491974, |
| "learning_rate": 5.666666666666667e-06, |
| "logits/chosen": -3.886363983154297, |
| "logits/rejected": -3.3980984687805176, |
| "logps/chosen": -37.660362243652344, |
| "logps/rejected": -191.5253143310547, |
| "loss": 0.2082, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": 1.585105538368225, |
| "rewards/margins": 12.881246566772461, |
| "rewards/rejected": -11.296142578125, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.8399999999999999, |
| "grad_norm": 0.21525858342647552, |
| "learning_rate": 4.555555555555556e-06, |
| "logits/chosen": -3.215524673461914, |
| "logits/rejected": -2.8312759399414062, |
| "logps/chosen": -39.14852523803711, |
| "logps/rejected": -190.01809692382812, |
| "loss": 0.165, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 1.5289268493652344, |
| "rewards/margins": 12.520524978637695, |
| "rewards/rejected": -10.991598129272461, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.88, |
| "grad_norm": 0.19355317950248718, |
| "learning_rate": 3.4444444444444444e-06, |
| "logits/chosen": -3.51098895072937, |
| "logits/rejected": -3.231642484664917, |
| "logps/chosen": -39.47585678100586, |
| "logps/rejected": -193.40306091308594, |
| "loss": 0.2182, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 1.1096737384796143, |
| "rewards/margins": 12.903974533081055, |
| "rewards/rejected": -11.79430103302002, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 0.20150351524353027, |
| "learning_rate": 2.3333333333333336e-06, |
| "logits/chosen": -3.484516143798828, |
| "logits/rejected": -3.1024327278137207, |
| "logps/chosen": -42.00218963623047, |
| "logps/rejected": -183.51300048828125, |
| "loss": 0.1822, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 1.525578260421753, |
| "rewards/margins": 11.946281433105469, |
| "rewards/rejected": -10.420703887939453, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.96, |
| "grad_norm": 0.011142472736537457, |
| "learning_rate": 1.2222222222222223e-06, |
| "logits/chosen": -3.6615467071533203, |
| "logits/rejected": -3.2794384956359863, |
| "logps/chosen": -41.063148498535156, |
| "logps/rejected": -208.94747924804688, |
| "loss": 0.1647, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 1.4969767332077026, |
| "rewards/margins": 14.444828987121582, |
| "rewards/rejected": -12.947853088378906, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.214690163731575, |
| "learning_rate": 1.1111111111111112e-07, |
| "logits/chosen": -3.919487714767456, |
| "logits/rejected": -3.6030335426330566, |
| "logps/chosen": -34.534332275390625, |
| "logps/rejected": -180.12123107910156, |
| "loss": 0.2348, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": 2.082820177078247, |
| "rewards/margins": 12.23314094543457, |
| "rewards/rejected": -10.150321006774902, |
| "step": 500 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|